[gjs] byteArray: Refactor functionality into Encoding
- From: Philip Chimento <pchimento src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gjs] byteArray: Refactor functionality into Encoding
- Date: Fri, 2 Jul 2021 04:25:25 +0000 (UTC)
commit 4b02a015e20145c760743f3d2cb0493e631a4e5b
Author: Evan Welsh <contact evanwelsh com>
Date: Sun Apr 25 13:14:58 2021 -0700
byteArray: Refactor functionality into Encoding
Copies common functionality from byteArray into a Encoding native
module. This refactor is the basis for the WHATWG Encoding work.
gjs/byteArray.cpp | 111 +-----------------------------------
gjs/context.cpp | 3 +
gjs/text-encoding.cpp | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++
gjs/text-encoding.h | 23 ++++++++
meson.build | 1 +
5 files changed, 184 insertions(+), 108 deletions(-)
---
diff --git a/gjs/byteArray.cpp b/gjs/byteArray.cpp
index 1635e902..60826ab2 100644
--- a/gjs/byteArray.cpp
+++ b/gjs/byteArray.cpp
@@ -28,6 +28,7 @@
#include "gjs/deprecation.h"
#include "gjs/jsapi-util-args.h"
#include "gjs/jsapi-util.h"
+#include "gjs/text-encoding.h"
#include "util/misc.h" // for _gjs_memdup2
/* Callbacks to use with JS::NewExternalArrayBuffer() */
@@ -42,112 +43,6 @@ static void bytes_unref_arraybuffer(void* contents [[maybe_unused]],
g_bytes_unref(gbytes);
}
-GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
- const char* encoding, JS::MutableHandleValue rval) {
- size_t bytes_written;
- GError* error = nullptr;
- GjsAutoChar u16_str = g_convert(reinterpret_cast<char*>(data), len,
- // Make sure the bytes of the UTF-16 string are laid out in memory
- // such that we can simply reinterpret_cast<char16_t> them.
-#if G_BYTE_ORDER == G_LITTLE_ENDIAN
- "UTF-16LE",
-#else
- "UTF-16BE",
-#endif
- encoding, nullptr, /* bytes read */
- &bytes_written, &error);
- if (!u16_str)
- return gjs_throw_gerror_message(cx, error); // frees GError
-
- // bytes_written should be bytes in a UTF-16 string so should be a multiple
- // of 2
- g_assert((bytes_written % 2) == 0);
-
- // g_convert 0-terminates the string, although the 0 isn't included in
- // bytes_written
- JSString* s =
- JS_NewUCStringCopyZ(cx, reinterpret_cast<char16_t*>(u16_str.get()));
- if (!s)
- return false;
-
- rval.setString(s);
- return true;
-}
-
-/* implement toString() with an optional encoding arg */
-GJS_JSAPI_RETURN_CONVENTION
-static bool to_string_impl(JSContext* context, JS::HandleObject byte_array,
- const char* encoding, JS::MutableHandleValue rval) {
- if (!JS_IsUint8Array(byte_array)) {
- gjs_throw(context,
- "Argument to ByteArray.toString() must be a Uint8Array");
- return false;
- }
-
- bool encoding_is_utf8;
- uint8_t* data;
-
- if (encoding) {
- /* maybe we should be smarter about utf8 synonyms here.
- * doesn't matter much though. encoding_is_utf8 is
- * just an optimization anyway.
- */
- encoding_is_utf8 = (strcmp(encoding, "UTF-8") == 0);
- } else {
- encoding_is_utf8 = true;
- }
-
- uint32_t len;
- bool is_shared_memory;
- js::GetUint8ArrayLengthAndData(byte_array, &len, &is_shared_memory, &data);
-
- if (len == 0) {
- rval.setString(JS_GetEmptyString(context));
- return true;
- }
-
- if (!encoding_is_utf8)
- return to_string_impl_slow(context, data, len, encoding, rval);
-
- // optimization, avoids iconv overhead and runs libmozjs hardwired
- // utf8-to-utf16
-
- // If there are any 0 bytes, including the terminating byte, stop at the
- // first one
- if (data[len - 1] == 0 || memchr(data, 0, len)) {
- if (!gjs_string_from_utf8(context, reinterpret_cast<char*>(data), rval))
- return false;
- } else {
- if (!gjs_string_from_utf8_n(context, reinterpret_cast<char*>(data), len,
- rval))
- return false;
- }
-
- uint8_t* current_data;
- uint32_t current_len;
- bool ignore_val;
-
- // If a garbage collection occurs between when we call
- // js::GetUint8ArrayLengthAndData and return from gjs_string_from_utf8, a
- // use-after-free corruption can occur if the garbage collector shifts the
- // location of the Uint8Array's private data. To mitigate this we call
- // js::GetUint8ArrayLengthAndData again and then compare if the length and
- // pointer are still the same. If the pointers differ, we use the slow path
- // to ensure no data corruption occurred. The shared-ness of an array cannot
- // change between calls, so we ignore it.
- js::GetUint8ArrayLengthAndData(byte_array, ¤t_len, &ignore_val,
- ¤t_data);
-
- // Ensure the private data hasn't changed
- if (current_len == len && current_data == data)
- return true;
-
- // This was the UTF-8 optimized path, so we explicitly pass the encoding
- return to_string_impl_slow(context, current_data, current_len, "UTF-8",
- rval);
-}
-
GJS_JSAPI_RETURN_CONVENTION
static bool to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
@@ -158,7 +53,7 @@ static bool to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
&byte_array, "encoding", &encoding))
return false;
- return to_string_impl(cx, byte_array, encoding.get(), args.rval());
+ return bytearray_to_string(cx, byte_array, encoding.get(), args.rval());
}
/* Workaround to keep existing code compatible. This function is tacked onto
@@ -176,7 +71,7 @@ static bool instance_to_string_func(JSContext* cx, unsigned argc,
if (!gjs_parse_call_args(cx, "toString", args, "|s", "encoding", &encoding))
return false;
- return to_string_impl(cx, this_obj, encoding.get(), args.rval());
+ return bytearray_to_string(cx, this_obj, encoding.get(), args.rval());
}
GJS_JSAPI_RETURN_CONVENTION
diff --git a/gjs/context.cpp b/gjs/context.cpp
index 948ca8c6..41e3297e 100644
--- a/gjs/context.cpp
+++ b/gjs/context.cpp
@@ -77,6 +77,7 @@
#include "gjs/objectbox.h"
#include "gjs/profiler-private.h"
#include "gjs/profiler.h"
+#include "gjs/text-encoding.h"
#include "modules/modules.h"
#include "util/log.h"
@@ -319,6 +320,8 @@ gjs_context_class_init(GjsContextClass *klass)
}
gjs_register_native_module("_byteArrayNative", gjs_define_byte_array_stuff);
+ gjs_register_native_module("_encodingNative",
+ gjs_define_text_encoding_stuff);
gjs_register_native_module("_gi", gjs_define_private_gi_stuff);
gjs_register_native_module("gi", gjs_define_repo);
diff --git a/gjs/text-encoding.cpp b/gjs/text-encoding.cpp
new file mode 100644
index 00000000..98d3dca9
--- /dev/null
+++ b/gjs/text-encoding.cpp
@@ -0,0 +1,154 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; -*- */
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: 2010 litl, LLC
+// SPDX-FileCopyrightText: 2021 Evan Welsh
+
+#include <config.h>
+
+#include <stdint.h>
+#include <string.h> // for strcmp, memchr, strlen
+
+#include <algorithm>
+#include <vector>
+
+#include <gio/gio.h>
+#include <girepository.h>
+#include <glib-object.h>
+#include <glib.h>
+
+#include <js/ArrayBuffer.h>
+#include <js/CallArgs.h>
+#include <js/CharacterEncoding.h>
+#include <js/GCAPI.h> // for AutoCheckCannotGC
+#include <js/PropertySpec.h>
+#include <js/RootingAPI.h>
+#include <js/TypeDecls.h>
+#include <js/Utility.h> // for UniqueChars
+#include <jsapi.h> // for JS_DefineFunctionById, JS_DefineFun...
+#include <jsfriendapi.h> // for JS_NewUint8ArrayWithBuffer, GetUint...
+
+#include "gi/boxed.h"
+#include "gjs/atoms.h"
+#include "gjs/context-private.h"
+#include "gjs/deprecation.h"
+#include "gjs/jsapi-util-args.h"
+#include "gjs/jsapi-util.h"
+#include "gjs/text-encoding.h"
+
+GJS_JSAPI_RETURN_CONVENTION
+static bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
+ const char* encoding,
+ JS::MutableHandleValue rval) {
+ size_t bytes_written;
+ GError* error = nullptr;
+ GjsAutoChar u16_str = g_convert(reinterpret_cast<char*>(data), len,
+ // Make sure the bytes of the UTF-16 string are laid out in memory
+ // such that we can simply reinterpret_cast<char16_t> them.
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+ "UTF-16LE",
+#else
+ "UTF-16BE",
+#endif
+ encoding, nullptr, /* bytes read */
+ &bytes_written, &error);
+ if (!u16_str)
+ return gjs_throw_gerror_message(cx, error); // frees GError
+
+ // bytes_written should be bytes in a UTF-16 string so should be a multiple
+ // of 2
+ g_assert((bytes_written % 2) == 0);
+
+ // g_convert 0-terminates the string, although the 0 isn't included in
+ // bytes_written
+ JSString* s =
+ JS_NewUCStringCopyZ(cx, reinterpret_cast<char16_t*>(u16_str.get()));
+ if (!s)
+ return false;
+
+ rval.setString(s);
+ return true;
+}
+
+// implement ByteArray.toString() with an optional encoding arg
+bool bytearray_to_string(JSContext* context, JS::HandleObject byte_array,
+ const char* encoding, JS::MutableHandleValue rval) {
+ if (!JS_IsUint8Array(byte_array)) {
+ gjs_throw(context,
+ "Argument to ByteArray.toString() must be a Uint8Array");
+ return false;
+ }
+
+ bool encoding_is_utf8;
+ uint8_t* data;
+
+ if (encoding) {
+ /* maybe we should be smarter about utf8 synonyms here.
+ * doesn't matter much though. encoding_is_utf8 is
+ * just an optimization anyway.
+ */
+ encoding_is_utf8 = (strcmp(encoding, "UTF-8") == 0);
+ } else {
+ encoding_is_utf8 = true;
+ }
+
+ uint32_t len;
+ bool is_shared_memory;
+ js::GetUint8ArrayLengthAndData(byte_array, &len, &is_shared_memory, &data);
+
+ if (len == 0) {
+ rval.setString(JS_GetEmptyString(context));
+ return true;
+ }
+
+ if (!encoding_is_utf8)
+ return to_string_impl_slow(context, data, len, encoding, rval);
+
+ // optimization, avoids iconv overhead and runs libmozjs hardwired
+ // utf8-to-utf16
+
+ // If there are any 0 bytes, including the terminating byte, stop at the
+ // first one
+ if (data[len - 1] == 0 || memchr(data, 0, len)) {
+ if (!gjs_string_from_utf8(context, reinterpret_cast<char*>(data), rval))
+ return false;
+ } else {
+ if (!gjs_string_from_utf8_n(context, reinterpret_cast<char*>(data), len,
+ rval))
+ return false;
+ }
+
+ uint8_t* current_data;
+ uint32_t current_len;
+ bool ignore_val;
+
+ // If a garbage collection occurs between when we call
+ // js::GetUint8ArrayLengthAndData and return from gjs_string_from_utf8, a
+ // use-after-free corruption can occur if the garbage collector shifts the
+ // location of the Uint8Array's private data. To mitigate this we call
+ // js::GetUint8ArrayLengthAndData again and then compare if the length and
+ // pointer are still the same. If the pointers differ, we use the slow path
+ // to ensure no data corruption occurred. The shared-ness of an array cannot
+ // change between calls, so we ignore it.
+ js::GetUint8ArrayLengthAndData(byte_array, ¤t_len, &ignore_val,
+ ¤t_data);
+
+ // Ensure the private data hasn't changed
+ if (current_len == len && current_data == data)
+ return true;
+
+ // This was the UTF-8 optimized path, so we explicitly pass the encoding
+ return to_string_impl_slow(context, current_data, current_len, "UTF-8",
+ rval);
+}
+
+static JSFunctionSpec gjs_text_encoding_module_funcs[] = {JS_FS_END};
+
+bool gjs_define_text_encoding_stuff(JSContext* cx,
+ JS::MutableHandleObject module) {
+ JSObject* new_obj = JS_NewPlainObject(cx);
+ if (!new_obj)
+ return false;
+ module.set(new_obj);
+
+ return JS_DefineFunctions(cx, module, gjs_text_encoding_module_funcs);
+}
diff --git a/gjs/text-encoding.h b/gjs/text-encoding.h
new file mode 100644
index 00000000..e9425392
--- /dev/null
+++ b/gjs/text-encoding.h
@@ -0,0 +1,23 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; -*- */
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 Evan Welsh
+
+#pragma once
+
+#include <config.h>
+
+#include <stddef.h> // for size_t
+
+#include <glib.h>
+
+#include <js/TypeDecls.h>
+
+#include "gjs/macros.h"
+
+GJS_JSAPI_RETURN_CONVENTION
+bool bytearray_to_string(JSContext* cx, JS::HandleObject uint8array,
+ const char* encoding, JS::MutableHandleValue rval);
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_define_text_encoding_stuff(JSContext* cx,
+ JS::MutableHandleObject module);
diff --git a/meson.build b/meson.build
index 267ab9ad..1a8a0b63 100644
--- a/meson.build
+++ b/meson.build
@@ -406,6 +406,7 @@ libgjs_sources = [
'gjs/native.cpp', 'gjs/native.h',
'gjs/objectbox.cpp', 'gjs/objectbox.h',
'gjs/profiler.cpp', 'gjs/profiler-private.h',
+ 'gjs/text-encoding.cpp', 'gjs/text-encoding.h',
'gjs/stack.cpp',
'modules/console.cpp', 'modules/console.h',
'modules/modules.cpp', 'modules/modules.h',
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]