[gjs/ewlsh/text-encoding] modules: Implement WHATWG Encoding specification
- From: Evan Welsh <ewlsh src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gjs/ewlsh/text-encoding] modules: Implement WHATWG Encoding specification
- Date: Wed, 2 Jun 2021 02:46:35 +0000 (UTC)
commit 1ed3db31e9eccc5a1242ba38e6e4fd8ec298234d
Author: Evan Welsh <contact evanwelsh com>
Date: Tue Jun 1 22:44:57 2021 -0400
modules: Implement WHATWG Encoding specification
.eslintignore | 2 +
.eslintrc.yml | 2 +
gjs/byteArray.cpp | 81 ++++-
gjs/jsapi-util-string.cpp | 48 ++-
gjs/jsapi-util.h | 5 +
gjs/text-encoding.cpp | 558 +++++++++++++++++++++--------
gjs/text-encoding.h | 17 +-
installed-tests/js/meson.build | 1 +
installed-tests/js/testEncoding.js | 661 +++++++++++++++++++++++++++++++++++
js.gresource.xml | 3 +
modules/core/_encodings.js | 305 ++++++++++++++++
modules/core/_singleByteEncodings.js | 40 +++
modules/core/_text.js | 189 ++++++++++
modules/script/_bootstrap/default.js | 13 +
modules/script/byteArray.js | 28 +-
15 files changed, 1767 insertions(+), 186 deletions(-)
---
diff --git a/.eslintignore b/.eslintignore
index 9ee950d3..8f8f93ff 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -3,4 +3,6 @@
installed-tests/js/jasmine.js
installed-tests/js/modules/badOverrides/WarnLib.js
+# Until ESLint merges class fields.
+modules/core/_text.js
modules/script/jsUnit.js
diff --git a/.eslintrc.yml b/.eslintrc.yml
index 733db371..6887f1cb 100644
--- a/.eslintrc.yml
+++ b/.eslintrc.yml
@@ -253,5 +253,7 @@ globals:
print: readonly
printerr: readonly
window: readonly
+ TextEncoder: readonly
+ TextDecoder: readonly
parserOptions:
ecmaVersion: 2020
diff --git a/gjs/byteArray.cpp b/gjs/byteArray.cpp
index 341df69e..5bef1585 100644
--- a/gjs/byteArray.cpp
+++ b/gjs/byteArray.cpp
@@ -37,6 +37,26 @@ static void bytes_unref_arraybuffer(void* contents [[maybe_unused]],
g_bytes_unref(gbytes);
}
+GJS_JSAPI_RETURN_CONVENTION
+static bool to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+ JS::UniqueChars encoding;
+ JS::RootedObject byte_array(cx);
+
+ if (!gjs_parse_call_args(cx, "toString", args, "o|s", "byteArray",
+ &byte_array, "encoding", &encoding))
+ return false;
+
+ const char* actual_encoding = encoding ? encoding.get() : "utf-8";
+ JS::RootedString str(cx, gjs_decode_from_uint8array(
+ cx, byte_array, actual_encoding,
+ GjsStringTermination::ZERO_TERMINATED, true));
+ if (!str)
+ return false;
+
+ args.rval().setString(str);
+ return true;
+}
/* Workaround to keep existing code compatible. This function is tacked onto
* any Uint8Array instances created in situations where previously a ByteArray
@@ -53,24 +73,61 @@ static bool instance_to_string_func(JSContext* cx, unsigned argc,
if (!gjs_parse_call_args(cx, "toString", args, "|s", "encoding", &encoding))
return false;
- return to_string_impl(cx, this_obj, encoding.get(), args.rval());
+ const char* actual_encoding = encoding ? encoding.get() : "utf-8";
+ JS::RootedString str(cx, gjs_decode_from_uint8array(
+ cx, this_obj, actual_encoding,
+ GjsStringTermination::ZERO_TERMINATED, true));
+ if (!str)
+ return false;
+
+ args.rval().setString(str);
+ return true;
}
+/* fromString() function implementation */
GJS_JSAPI_RETURN_CONVENTION
-static bool define_to_string_func(JSContext* context, unsigned argc, JS::Value* vp) {
- JS::CallArgs argv = JS::CallArgsFromVp (argc, vp);
-
+static bool from_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
- JS::RootedObject obj(context);
- if (!gjs_parse_call_args(context, "defineToString", args, "o", "obj", &obj))
+
+ if (!args.requireAtLeast(cx, "fromString", 1))
return false;
- const GjsAtoms& atoms = GjsContextPrivate::atoms(context);
- if (!JS_DefineFunctionById(context, obj, atoms.to_string(),
+ if (!args[0].isString()) {
+ gjs_throw(cx, "First argument for fromString() should be a string.");
+ return false;
+ }
+
+ // Only check the argument type if there is more than one argument.
+ if (args.length() > 1 && !args[1].isString() && !args[1].isUndefined()) {
+ gjs_throw(cx,
+ "Second argument for fromString() should be a string or "
+ "undefined.");
+ return false;
+ }
+
+ GjsAutoChar encoding = nullptr;
+ // If more than one argument is passed and it is a string,
+ // convert it to an encoding label.
+ if (args.length() > 1 && args[1].isString()) {
+ JS::RootedString encoding_str(cx, args[1].toString());
+ JS::UniqueChars chars(JS_EncodeStringToUTF8(cx, encoding_str));
+ encoding = g_strdup(chars.get());
+ }
+
+ JS::RootedString str(cx, args[0].toString());
+ JS::RootedObject uint8array(
+ cx,
+ gjs_encode_to_uint8array(cx, str, encoding ? encoding.get() : "utf-8",
+ GjsStringTermination::ZERO_TERMINATED));
+ if (!uint8array)
+ return false;
+
+ const GjsAtoms& atoms = GjsContextPrivate::atoms(cx);
+ if (!JS_DefineFunctionById(cx, uint8array, atoms.to_string(),
instance_to_string_func, 1, 0))
return false;
- argv.rval().setUndefined();
+ args.rval().setObject(*uint8array);
return true;
}
@@ -80,7 +137,7 @@ from_gbytes_func(JSContext *context,
unsigned argc,
JS::Value *vp)
{
- JS::CallArgs argv = JS::CallArgsFromVp (argc, vp);
+ JS::CallArgs argv = JS::CallArgsFromVp(argc, vp);
JS::RootedObject bytes_obj(context);
GBytes *gbytes;
@@ -160,9 +217,9 @@ GByteArray* gjs_byte_array_get_byte_array(JSObject* obj) {
}
static JSFunctionSpec gjs_byte_array_module_funcs[] = {
+ JS_FN("fromString", from_string_func, 2, 0),
JS_FN("fromGBytes", from_gbytes_func, 1, 0),
- JS_FN("defineToString", define_to_string_func, 1, 0),
- JS_FS_END};
+ JS_FN("toString", to_string_func, 2, 0), JS_FS_END};
bool
gjs_define_byte_array_stuff(JSContext *cx,
diff --git a/gjs/jsapi-util-string.cpp b/gjs/jsapi-util-string.cpp
index 5fc1164a..21a643ea 100644
--- a/gjs/jsapi-util-string.cpp
+++ b/gjs/jsapi-util-string.cpp
@@ -98,6 +98,47 @@ JS::UniqueChars gjs_string_to_utf8(JSContext* cx, const JS::Value value) {
return JS_EncodeStringToUTF8(cx, str);
}
+/**
+ * gjs_lossy_string_from_utf8:
+ *
+ * @brief Converts an array of UTF-8 characters to a JS string.
+ * Instead of throwing, any invalid characters will be converted
+ * to the UTF-8 invalid character fallback.
+ *
+ * @param cx the current #JSContext
+ * @param utf8_string an array of UTF-8 characters
+ * @param value_p a value to store the resulting string in
+ */
+JSString* gjs_lossy_string_from_utf8(JSContext* cx, const char* utf8_string) {
+ JS::ConstUTF8CharsZ chars(utf8_string, strlen(utf8_string));
+ size_t len;
+ JS::TwoByteCharsZ twobyte_chars(
+ JS::LossyUTF8CharsToNewTwoByteCharsZ(cx, chars, &len, js::MallocArena));
+
+ if (!twobyte_chars)
+ return nullptr;
+
+ return JS_NewUCStringCopyN(cx, twobyte_chars.get(), len);
+}
+
+/**
+ * gjs_lossy_string_from_utf8_n:
+ *
+ * @brief Provides the same conversion behavior as gjs_lossy_string_from_utf8
+ * with a fixed length. See gjs_lossy_string_from_utf8()
+ */
+JSString* gjs_lossy_string_from_utf8_n(JSContext* cx, const char* utf8_string,
+ size_t len) {
+ JS::UTF8Chars chars(utf8_string, len);
+ size_t outlen;
+ JS::TwoByteCharsZ twobyte_chars(JS::LossyUTF8CharsToNewTwoByteCharsZ(
+ cx, chars, &outlen, js::MallocArena));
+ if (!twobyte_chars)
+ return nullptr;
+
+ return JS_NewUCStringCopyN(cx, twobyte_chars.get(), outlen);
+}
+
bool
gjs_string_from_utf8(JSContext *context,
const char *utf8_string,
@@ -105,10 +146,11 @@ gjs_string_from_utf8(JSContext *context,
{
JS::ConstUTF8CharsZ chars(utf8_string, strlen(utf8_string));
JS::RootedString str(context, JS_NewStringCopyUTF8Z(context, chars));
- if (str)
- value_p.setString(str);
+ if (!str)
+ return false;
- return str != nullptr;
+ value_p.setString(str);
+ return true;
}
bool
diff --git a/gjs/jsapi-util.h b/gjs/jsapi-util.h
index 4e399f25..0728f404 100644
--- a/gjs/jsapi-util.h
+++ b/gjs/jsapi-util.h
@@ -429,6 +429,11 @@ void gjs_warning_reporter(JSContext*, JSErrorReport* report);
GJS_JSAPI_RETURN_CONVENTION
JS::UniqueChars gjs_string_to_utf8(JSContext* cx, const JS::Value string_val);
GJS_JSAPI_RETURN_CONVENTION
+JSString* gjs_lossy_string_from_utf8(JSContext* cx, const char* utf8_string);
+GJS_JSAPI_RETURN_CONVENTION
+JSString* gjs_lossy_string_from_utf8_n(JSContext* cx, const char* utf8_string,
+ size_t len);
+GJS_JSAPI_RETURN_CONVENTION
bool gjs_string_from_utf8(JSContext *context,
const char *utf8_string,
JS::MutableHandleValue value_p);
diff --git a/gjs/text-encoding.cpp b/gjs/text-encoding.cpp
index 5e706a83..5b2d22a6 100644
--- a/gjs/text-encoding.cpp
+++ b/gjs/text-encoding.cpp
@@ -41,86 +41,240 @@ static void gfree_arraybuffer_contents(void* contents, void*) {
g_free(contents);
}
-GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
- const char* encoding, JS::MutableHandleValue rval) {
- size_t bytes_written;
- GError* error = nullptr;
- GjsAutoChar u16_str = g_convert(reinterpret_cast<char*>(data), len,
- // Make sure the bytes of the UTF-16 string are laid out in memory
- // such that we can simply reinterpret_cast<char16_t> them.
+static void gjs_throw_type_error_from_gerror(JSContext* cx, GError* error) {
+ g_return_if_fail(error);
+ gjs_throw_custom(cx, JSProto_TypeError, nullptr, "%s", error->message);
+ g_error_free(error);
+}
+
+// UTF16_CODESET is used to encode and decode UTF-16 buffers with
+// iconv. To ensure the output of iconv is laid out in memory correctly
+// we have to use UTF-16LE on little endian systems and UTF-16BE on big
+// endian systems.
+//
+// This ensures we can simply reinterpret_cast<char16_t> iconv's output.
#if G_BYTE_ORDER == G_LITTLE_ENDIAN
- "UTF-16LE",
+static const char* UTF16_CODESET = "UTF-16LE";
#else
- "UTF-16BE",
+static const char* UTF16_CODESET = "UTF-16BE";
#endif
- encoding, nullptr, /* bytes read */
- &bytes_written, &error);
- if (!u16_str)
- return gjs_throw_gerror_message(cx, error); // frees GError
- // bytes_written should be bytes in a UTF-16 string so should be a multiple
- // of 2
- g_assert((bytes_written % 2) == 0);
+static JSString* gjs_lossy_decode_from_uint8array_slow(
+ JSContext* cx, uint8_t* bytes, size_t bytes_len, const char* from_codeset) {
+ GError* error = nullptr;
+ GjsAutoUnref<GCharsetConverter> converter(
+ g_charset_converter_new(UTF16_CODESET, from_codeset, &error));
- // g_convert 0-terminates the string, although the 0 isn't included in
- // bytes_written
- JSString* s =
- JS_NewUCStringCopyZ(cx, reinterpret_cast<char16_t*>(u16_str.get()));
- if (!s)
- return false;
+ // This should only throw if an encoding is not available.
+ if (error) {
+ gjs_throw_type_error_from_gerror(cx, error);
+ return nullptr;
+ }
- rval.setString(s);
- return true;
+ // TODO: We can likely be more intelligent about our initial
+ // allocation and allocate based on bytes_len
+ int buffer_size = 1024;
+
+ // Cast data to correct input types
+ const char* input = reinterpret_cast<const char*>(bytes);
+ size_t input_len = bytes_len;
+
+ // The base string that we'll append to.
+ std::u16string output_str = u"";
+
+ do {
+ // Create a buffer to convert into.
+ char buffer[buffer_size];
+ size_t bytes_written = 0, bytes_read = 0;
+
+ g_converter_convert(G_CONVERTER(converter.get()), input, input_len,
+ buffer, buffer_size, G_CONVERTER_INPUT_AT_END,
+ &bytes_read, &bytes_written, &error);
+
+ // If bytes were read, adjust input.
+ if (bytes_read > 0) {
+ input += bytes_read;
+ input_len -= bytes_read;
+ }
+
+ // If bytes were written append them buffer contents to our string
+ // accumulator
+ if (bytes_written > 0) {
+ char16_t* utf16_buffer = reinterpret_cast<char16_t*>(buffer);
+ // UTF-16 uses exactly 2 bytes for every character.
+ output_str.append(utf16_buffer, bytes_written / 2);
+ } else if (error) {
+ // A PARTIAL_INPUT error can only occur if the user does not provide
+ // the full sequence for a multi-byte character, we skip over the
+ // next character and insert a unicode fallback.
+
+ // An INVALID_DATA error occurs when there is no way to decode a
+ // given byte into UTF-16 or the given byte does not exist in the
+ // source encoding.
+ if (g_error_matches(error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA) ||
+ g_error_matches(error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT)) {
+ // If we're already at the end of the string, don't insert a
+ // fallback.
+ if (input_len > 0) {
+ // Skip the next byte and reduce length by one.
+ input += 1;
+ input_len -= 1;
+
+ // Append the unicode fallback character to the output
+ output_str.append(u"\ufffd", 1);
+ }
+
+ // Clear the error.
+ g_clear_error(&error);
+ } else if (g_error_matches(error, G_IO_ERROR,
+ G_IO_ERROR_NO_SPACE)) {
+ // If the buffer was full increase the buffer
+ // size and re-try the conversion.
+ buffer_size += 512;
+
+ // Clear the error.
+ g_clear_error(&error);
+ }
+ }
+
+ // Stop decoding if an unknown error occurs.
+ } while (input_len > 0 && !error);
+
+ // An unexpected error occured.
+ if (error) {
+ gjs_throw_type_error_from_gerror(cx, error);
+ return nullptr;
+ }
+
+ // Copy the accumulator's data into a JSString of Unicode (UTF-16) chars.
+ return JS_NewUCStringCopyN(cx, output_str.c_str(), output_str.size());
}
-/* implement toString() with an optional encoding arg */
-GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl(JSContext* context, JS::HandleObject byte_array,
- const char* encoding, JS::MutableHandleValue rval) {
- if (!JS_IsUint8Array(byte_array)) {
- gjs_throw(context,
- "Argument to ByteArray.toString() must be a Uint8Array");
- return false;
+[[nodiscard]] JSString* gjs_decode_from_uint8array_slow(JSContext* cx,
+ uint8_t* input,
+ uint32_t input_len,
+ const char* encoding,
+ bool fatal) {
+ // If the decoding is not fatal we use the lossy decoder.
+ if (!fatal)
+ return gjs_lossy_decode_from_uint8array_slow(cx, input, input_len,
+ encoding);
+
+ size_t bytes_written, bytes_read;
+ GError* error = nullptr;
+
+ GjsAutoChar bytes =
+ g_convert(reinterpret_cast<char*>(input), input_len, UTF16_CODESET,
+ encoding, &bytes_read, &bytes_written, &error);
+
+ if (error) {
+ gjs_throw_type_error_from_gerror(cx, error);
+ return nullptr;
}
- bool encoding_is_utf8;
- uint8_t* data;
+ // bytes_written should be bytes in a UTF-16 string so should be a
+ // multiple of 2
+ g_assert((bytes_written % 2) == 0);
- if (encoding) {
- /* maybe we should be smarter about utf8 synonyms here.
- * doesn't matter much though. encoding_is_utf8 is
- * just an optimization anyway.
- */
- encoding_is_utf8 = (strcmp(encoding, "UTF-8") == 0);
- } else {
- encoding_is_utf8 = true;
+ // Cast g_convert's output to char16_t and copy the data.
+ const char16_t* unicode_bytes = reinterpret_cast<char16_t*>(bytes.get());
+ return JS_NewUCStringCopyN(cx, unicode_bytes, bytes_written / 2);
+}
+
+[[nodiscard]] static bool is_utf8_label(const char* encoding) {
+ // We could be smarter about utf8 synonyms here.
+ // For now, we handle any casing and trailing/leading
+ // whitespace.
+ //
+ // is_utf8_label is only an optimization, so if a label
+ // doesn't match we just use the slower path.
+ if (strcasecmp(encoding, "utf-8") == 0 || strcasecmp(encoding, "utf8") == 0)
+ return true;
+
+ GjsAutoChar stripped(g_strdup(encoding));
+ return strcasecmp(g_strstrip(stripped), "utf-8") == 0 ||
+ strcasecmp(stripped, "utf8") == 0;
+}
+
+// Finds the length of a given data array, stopping at the first 0 byte.
+[[nodiscard]] static uint32_t zero_terminated_length(uint8_t* data,
+ uint32_t len) {
+ uint8_t *start = data, *end = data + (len * sizeof(uint8_t));
+ uint8_t* found = std::find(start, end, 0);
+ if (found != end)
+ return std::distance(data, found) / sizeof(uint8_t);
+
+ return len;
+}
+
+// decode() function implementation
+JSString* gjs_decode_from_uint8array(JSContext* cx, JS::HandleObject byte_array,
+ const char* encoding,
+ GjsStringTermination string_termination,
+ bool fatal) {
+ if (!JS_IsUint8Array(byte_array)) {
+ gjs_throw(cx, "Argument to decode() must be a Uint8Array");
+ return nullptr;
}
+ uint8_t* data;
uint32_t len;
bool is_shared_memory;
js::GetUint8ArrayLengthAndData(byte_array, &len, &is_shared_memory, &data);
- if (len == 0) {
- rval.setString(JS_GetEmptyString(context));
- return true;
- }
-
+ // If the desired behavior is zero-terminated, calculate the
+ // zero-terminated length of the given data. If the original
+ // length, len, is smaller than the zero-terminated length,
+ // use it.
+ if (len && string_termination == GjsStringTermination::ZERO_TERMINATED)
+ len = std::min(len, zero_terminated_length(data, len));
+
+ // If the calculated length is 0 we can just return an empty string.
+ if (len == 0)
+ return JS_GetEmptyString(cx);
+
+ // Optimization, only use glib's iconv-based converters if we're dealing
+ // with a non-UTF8 encoding. SpiderMonkey has highly optimized UTF-8 decoder
+ // and encoders.
+ bool encoding_is_utf8 = is_utf8_label(encoding);
if (!encoding_is_utf8)
- return to_string_impl_slow(context, data, len, encoding, rval);
+ return gjs_decode_from_uint8array_slow(cx, data, len, encoding, fatal);
- // optimization, avoids iconv overhead and runs libmozjs hardwired
- // utf8-to-utf16
-
- // If there are any 0 bytes, including the terminating byte, stop at the
- // first one
- if (data[len - 1] == 0 || memchr(data, 0, len)) {
- if (!gjs_string_from_utf8(context, reinterpret_cast<char*>(data), rval))
- return false;
+ JS::RootedString decoded(cx);
+ if (!fatal) {
+ decoded.set(gjs_lossy_string_from_utf8_n(
+ cx, reinterpret_cast<char*>(data), len));
} else {
- if (!gjs_string_from_utf8_n(context, reinterpret_cast<char*>(data), len,
- rval))
- return false;
+ JS::UTF8Chars chars(reinterpret_cast<char*>(data), len);
+ JS::RootedString str(cx, JS_NewStringCopyUTF8N(cx, chars));
+ decoded.set(str);
+
+ // If an exception occurred, we need to check if the
+ // exception was an InternalError. Unfortunately,
+ // SpiderMonkey's decoder can throw InternalError for some
+ // invalid UTF-8 sources, we have to convert this into a
+ // TypeError to match the Encoding specification.
+ if (!str) {
+ if (!JS_IsExceptionPending(cx))
+ return nullptr;
+ JS::RootedValue exc(cx);
+
+ if (!JS_GetPendingException(cx, &exc) || !exc.isObject())
+ return nullptr;
+
+ JS::RootedObject exc_obj(cx, &exc.toObject());
+ const JSClass* internal_error =
+ js::ProtoKeyToClass(JSProto_InternalError);
+ if (JS_InstanceOf(cx, exc_obj, internal_error, nullptr)) {
+ // Clear the existing exception.
+ JS_ClearPendingException(cx);
+ gjs_throw_custom(
+ cx, JSProto_TypeError, nullptr,
+ "The provided encoded data was not valid UTF-8");
+ }
+
+ return nullptr;
+ }
}
uint8_t* current_data;
@@ -128,78 +282,96 @@ bool to_string_impl(JSContext* context, JS::HandleObject byte_array,
bool ignore_val;
// If a garbage collection occurs between when we call
- // js::GetUint8ArrayLengthAndData and return from gjs_string_from_utf8, a
- // use-after-free corruption can occur if the garbage collector shifts the
- // location of the Uint8Array's private data. To mitigate this we call
- // js::GetUint8ArrayLengthAndData again and then compare if the length and
- // pointer are still the same. If the pointers differ, we use the slow path
- // to ensure no data corruption occurred. The shared-ness of an array cannot
- // change between calls, so we ignore it.
+ // js::GetUint8ArrayLengthAndData and return from
+ // gjs_decode_from_uint8array, a use-after-free corruption can occur if the
+ // garbage collector shifts the location of the Uint8Array's private data.
+ // To mitigate this we call js::GetUint8ArrayLengthAndData again and then
+ // compare if the length and pointer are still the same. If the pointers
+ // differ, we use the slow path to ensure no data corruption occurred. The
+ // shared-ness of an array cannot change between calls, so we ignore it.
js::GetUint8ArrayLengthAndData(byte_array, ¤t_len, &ignore_val,
¤t_data);
// Ensure the private data hasn't changed
- if (current_len == len && current_data == data)
- return true;
+ if (current_data == data)
+ return decoded;
+
+ // Length shouldn't change across calls but recalculate
+ // based on the moved data to be sure.
+ if (current_len &&
+ string_termination == GjsStringTermination::ZERO_TERMINATED) {
+ current_len = std::min(
+ current_len, zero_terminated_length(current_data, current_len));
+ }
// This was the UTF-8 optimized path, so we explicitly pass the encoding
- return to_string_impl_slow(context, current_data, current_len, "UTF-8",
- rval);
+ return gjs_decode_from_uint8array_slow(cx, current_data, current_len,
+ "UTF-8", fatal);
}
GJS_JSAPI_RETURN_CONVENTION
-static bool to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
+static bool gjs_decode(JSContext* cx, unsigned argc, JS::Value* vp) {
JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
- JS::UniqueChars encoding;
- JS::RootedObject byte_array(cx);
- if (!gjs_parse_call_args(cx, "toString", args, "o|s", "byteArray",
- &byte_array, "encoding", &encoding))
+ JS::RootedObject byte_array(cx);
+ JS::UniqueChars encoding;
+ bool fatal = false;
+ if (!gjs_parse_call_args(cx, "decode", args, "os|b", "byteArray",
+ &byte_array, "encoding", &encoding, "fatal",
+ &fatal))
return false;
- return to_string_impl(cx, byte_array, encoding.get(), args.rval());
+ JS::RootedString decoded(
+ cx, gjs_decode_from_uint8array(cx, byte_array, encoding.get(),
+ GjsStringTermination::EXPLICIT_LENGTH,
+ fatal));
+ args.rval().setString(decoded);
+ return true;
}
-
-/* fromString() function implementation */
GJS_JSAPI_RETURN_CONVENTION
-static bool
-from_string_func(JSContext *context,
- unsigned argc,
- JS::Value *vp)
-{
- JS::CallArgs argv = JS::CallArgsFromVp (argc, vp);
- JS::UniqueChars encoding;
- JS::UniqueChars utf8;
- bool encoding_is_utf8;
- JS::RootedObject obj(context), array_buffer(context);
+static bool gjs_string_to_utf8_n(JSContext* cx, JS::HandleString str,
+ char** output, size_t* output_len) {
+ JSLinearString* linear = JS_EnsureLinearString(cx, str);
+ if (!linear)
+ return false;
- if (!gjs_parse_call_args(context, "fromString", argv, "s|s",
- "string", &utf8,
- "encoding", &encoding))
+ size_t length = JS::GetDeflatedUTF8StringLength(linear);
+ char* bytes = js_pod_malloc<char>(length);
+ if (!bytes)
return false;
- if (argc > 1) {
- /* maybe we should be smarter about utf8 synonyms here.
- * doesn't matter much though. encoding_is_utf8 is
- * just an optimization anyway.
- */
- encoding_is_utf8 = (strcmp(encoding.get(), "UTF-8") == 0);
- } else {
- encoding_is_utf8 = true;
- }
+ size_t deflated_length =
+ JS::DeflateStringToUTF8Buffer(linear, mozilla::Span(bytes, length));
+ g_assert(deflated_length == length);
+
+ *output_len = length;
+ *output = bytes;
+ return true;
+}
+// encode() function implementation
+JSObject* gjs_encode_to_uint8array(JSContext* cx, JS::HandleString str,
+ const char* encoding,
+ GjsStringTermination string_termination) {
+ JS::RootedObject array_buffer(cx);
+
+ bool encoding_is_utf8 = is_utf8_label(encoding);
if (encoding_is_utf8) {
- /* optimization? avoids iconv overhead and runs
- * libmozjs hardwired utf16-to-utf8.
- */
- size_t len = strlen(utf8.get());
- array_buffer =
- JS::NewArrayBufferWithContents(context, len, utf8.release());
+ char* utf8;
+ size_t utf8_len;
+
+ if (!gjs_string_to_utf8_n(cx, str, &utf8, &utf8_len))
+ return nullptr;
+
+ if (string_termination == GjsStringTermination::ZERO_TERMINATED) {
+ utf8_len = std::min(utf8_len, strlen(utf8));
+ }
+
+ array_buffer = JS::NewArrayBufferWithContents(cx, utf8_len, utf8);
} else {
- JSString *str = argv[0].toString(); /* Rooted by argv */
- GError *error = NULL;
- char *encoded = NULL;
+ GError* error = nullptr;
+ char* encoded = nullptr;
gsize bytes_written;
/* Scope for AutoCheckCannotGC, will crash if a GC is triggered
@@ -209,51 +381,157 @@ from_string_func(JSContext *context,
size_t len;
if (JS_StringHasLatin1Chars(str)) {
- const JS::Latin1Char *chars =
- JS_GetLatin1StringCharsAndLength(context, nogc, str, &len);
- if (chars == NULL)
- return false;
-
- encoded = g_convert((char *) chars, len,
- encoding.get(), // to_encoding
- "LATIN1", /* from_encoding */
- NULL, /* bytes read */
- &bytes_written, &error);
+ const JS::Latin1Char* chars =
+ JS_GetLatin1StringCharsAndLength(cx, nogc, str, &len);
+ if (!chars)
+ return nullptr;
+
+ encoded =
+ g_convert(reinterpret_cast<const char*>(chars), len,
+ /* to_encoding */ encoding,
+ /* from_encoding */ "LATIN1",
+ /* bytes read */ nullptr, &bytes_written, &error);
} else {
- const char16_t *chars =
- JS_GetTwoByteStringCharsAndLength(context, nogc, str, &len);
- if (chars == NULL)
- return false;
-
- encoded = g_convert((char *) chars, len * 2,
- encoding.get(), // to_encoding
- "UTF-16", /* from_encoding */
- NULL, /* bytes read */
- &bytes_written, &error);
+ const char16_t* chars =
+ JS_GetTwoByteStringCharsAndLength(cx, nogc, str, &len);
+ if (!chars)
+ return nullptr;
+
+ encoded =
+ g_convert(reinterpret_cast<const char*>(chars), len * 2,
+ encoding, // to_encoding
+ "UTF-16", /* from_encoding */
+ nullptr, /* bytes read */
+ &bytes_written, &error);
}
}
- if (!encoded)
- return gjs_throw_gerror_message(context, error); // frees GError
+ if (!encoded) {
+ gjs_throw_type_error_from_gerror(cx, error); // frees GError
+ return nullptr;
+ }
- array_buffer =
- JS::NewExternalArrayBuffer(context, bytes_written, encoded,
- gfree_arraybuffer_contents, nullptr);
+ array_buffer = JS::NewExternalArrayBuffer(
+ cx, bytes_written, encoded, gfree_arraybuffer_contents, nullptr);
}
if (!array_buffer)
+ return nullptr;
+
+ return JS_NewUint8ArrayWithBuffer(cx, array_buffer, 0, -1);
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_encode_into_uint8array(JSContext* cx, JS::HandleString str,
+ JS::HandleObject uint8array,
+ JS::MutableHandleValue rval) {
+ if (!JS_IsUint8Array(uint8array)) {
+ gjs_throw_custom(cx, JSProto_TypeError, nullptr,
+ "Argument to encodeInto() must be a Uint8Array");
+ return false;
+ }
+
+ auto len = JS_GetTypedArrayByteLength(uint8array);
+ bool shared = JS_GetTypedArraySharedness(uint8array);
+
+ if (shared) {
+ gjs_throw(cx, "Cannot encode data into shared memory.");
+ return false;
+ }
+
+ mozilla::Maybe<mozilla::Tuple<size_t, size_t>> results;
+
+ {
+ JS::AutoCheckCannotGC nogc(cx);
+ uint8_t* data = JS_GetUint8ArrayData(uint8array, &shared, nogc);
+
+ // We already checked for sharedness with JS_GetTypedArraySharedness
+ g_assert(!shared);
+
+ results = JS_EncodeStringToUTF8BufferPartial(
+ cx, str, mozilla::AsWritableChars(mozilla::Span(data, len)));
+ }
+
+ if (!results) {
+ JS_ReportOutOfMemory(cx);
+ return false;
+ }
+
+ size_t read, written;
+
+ mozilla::Tie(read, written) = *results;
+
+ g_assert(written <= len);
+
+ JS::RootedObject result(cx, JS_NewPlainObject(cx));
+ if (!result)
+ return false;
+
+ JS::RootedValue readv(cx, JS::NumberValue(read)),
+ writtenv(cx, JS::NumberValue(written));
+
+ if (!JS_SetProperty(cx, result, "read", readv) ||
+ !JS_SetProperty(cx, result, "written", writtenv)) {
+ return false;
+ }
+
+ rval.setObject(*result);
+ return true;
+}
+
+GJS_JSAPI_RETURN_CONVENTION static bool gjs_encode(JSContext* cx, unsigned argc,
+ JS::Value* vp) {
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+ if (!args.requireAtLeast(cx, "encode", 2))
+ return false;
+
+ if (!args[0].isString()) {
+ gjs_throw(cx, "First argument for encode() should be a string.");
+ return false;
+ }
+
+ JS::RootedString str(cx, args[0].toString());
+ JS::UniqueChars encoding = gjs_string_to_utf8(cx, args[1]);
+ if (!encoding)
+ return false;
+
+ JS::RootedObject uint8array(
+ cx, gjs_encode_to_uint8array(cx, str, encoding.get(),
+ GjsStringTermination::EXPLICIT_LENGTH));
+ if (!uint8array)
return false;
- obj = JS_NewUint8ArrayWithBuffer(context, array_buffer, 0, -1);
- argv.rval().setObject(*obj);
+ args.rval().setObject(*uint8array);
return true;
}
+GJS_JSAPI_RETURN_CONVENTION static bool gjs_encode_into(JSContext* cx,
+ unsigned argc,
+ JS::Value* vp) {
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+ if (!args.requireAtLeast(cx, "encodeInto", 2))
+ return false;
+
+ if (!args[0].isString()) {
+ gjs_throw(cx, "First argument for encodeInto() should be a string.");
+ return false;
+ }
+
+ if (!args[1].isObject()) {
+ gjs_throw(cx, "Second argument for encodeInto() should be an object.");
+ return false;
+ }
+
+ JS::RootedString str(cx, args[0].toString());
+ JS::RootedObject uint8array(cx, &args[1].toObject());
+
+ return gjs_encode_into_uint8array(cx, str, uint8array, args.rval());
+}
static JSFunctionSpec gjs_text_encoding_module_funcs[] = {
- JS_FN("fromString", from_string_func, 2, 0),
- JS_FN("toString", to_string_func, 2, 0),
- JS_FS_END};
+ JS_FN("decode", gjs_decode, 3, 0),
+ JS_FN("encodeInto", gjs_encode_into, 2, 0),
+ JS_FN("encode", gjs_encode, 2, 0), JS_FS_END};
bool gjs_define_text_encoding_stuff(JSContext* cx,
JS::MutableHandleObject module) {
diff --git a/gjs/text-encoding.h b/gjs/text-encoding.h
index b389acf2..54a780b2 100644
--- a/gjs/text-encoding.h
+++ b/gjs/text-encoding.h
@@ -15,11 +15,20 @@
#include "gjs/macros.h"
-[[nodiscard]] bool to_string_impl(JSContext* cx, JS::HandleObject uint8array,
- const char* encoding,
- JS::MutableHandleValue rval);
+enum class GjsStringTermination {
+ ZERO_TERMINATED,
+ EXPLICIT_LENGTH,
+};
-[[nodiscard]] bool gjs_define_text_encoding_stuff(
+[[nodiscard]] JSString* gjs_decode_from_uint8array(
+ JSContext* cx, JS::HandleObject uint8array, const char* encoding,
+ GjsStringTermination string_termination, bool fatal);
+
+[[nodiscard]] JSObject* gjs_encode_to_uint8array(
+ JSContext* cx, JS::HandleString str, const char* encoding,
+ GjsStringTermination string_termination);
+
+GJS_JSAPI_RETURN_CONVENTION bool gjs_define_text_encoding_stuff(
JSContext* cx, JS::MutableHandleObject module);
#endif // GJS_TEXT_ENCODING_H_
diff --git a/installed-tests/js/meson.build b/installed-tests/js/meson.build
index e11f1418..cca525c1 100644
--- a/installed-tests/js/meson.build
+++ b/installed-tests/js/meson.build
@@ -94,6 +94,7 @@ subdir('libgjstesttools')
jasmine_tests = [
'self',
'ByteArray',
+ 'Encoding',
'Exceptions',
'Format',
'Fundamental',
diff --git a/installed-tests/js/testEncoding.js b/installed-tests/js/testEncoding.js
new file mode 100644
index 00000000..af9ca06a
--- /dev/null
+++ b/installed-tests/js/testEncoding.js
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Copyright 2018-2020 the Deno authors. All rights reserved.
+
+// Modified from
https://github.com/denoland/deno/blob/923214c53725651792f6d55c5401bf6b475622ea/op_crates/web/08_text_encoding.js
+// Data originally from https://encoding.spec.whatwg.org/encodings.json
+
+describe('Text Encoding', function () {
+ it('textDecoder', function () {
+ const fixture = new Uint8Array([
+ 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
+ ]);
+ const decoder = new TextDecoder();
+ expect(decoder.decode(fixture)).toBe('𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderIgnoreBOM', function () {
+ const fixture = new Uint8Array([
+ 0xef, 0xbb, 0xbf, 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0,
0x9d, 0x93, 0xbd,
+ ]);
+ const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+ expect(decoder.decode(fixture)).toBe('𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderNotBOM', function () {
+ const fixture = new Uint8Array([
+ 0xef, 0xbb, 0x89, 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0,
0x9d, 0x93, 0xbd,
+ ]);
+ const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+ expect(decoder.decode(fixture)).toBe('ﻉ𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderASCII', function () {
+ const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
+ const decoder = new TextDecoder('ascii');
+ expect(decoder.decode(fixture)).toBe('‰•Ÿ¿');
+ });
+
+ it('textDecoderErrorEncoding', function () {
+ expect(() => new TextDecoder('Foo')).toThrowError("Invalid encoding label: 'Foo'");
+ });
+
+ it('textDecoderHandlesUndefined', function () {
+ const fixture = undefined;
+ const decoder = new TextDecoder();
+ expect(decoder.decode(fixture)).toBe('');
+ });
+
+ it('textDecoderThrowsOnEmpty', function () {
+ const fixture = '';
+ const decoder = new TextDecoder();
+
+ expect(() => decoder.decode(fixture))
+ .toThrowError('Provided input cannot be converted to ArrayBufferView or ArrayBuffer');
+ });
+
+ it('textDecoderThrowsOnNull', function () {
+ const fixture = null;
+ const decoder = new TextDecoder();
+
+ expect(() => decoder.decode(fixture))
+ .toThrowError('Provided input cannot be converted to ArrayBufferView or ArrayBuffer');
+ });
+
+ it('textEncoder', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+
+ expect(Array.from(encoder.encode(fixture))).toEqual([
+ 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
+ ]);
+ });
+
+ it('textEncodeInto', function () {
+ const fixture = 'text';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(5);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(4);
+ expect(result.written).toBe(4);
+
+ expect(Array.from(bytes)).toEqual([0x74, 0x65, 0x78, 0x74, 0x00]);
+ });
+
+ it('textEncodeInto2', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(17);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(8);
+ expect(result.written).toBe(16);
+
+ expect(Array.from(bytes)).toEqual([
+ 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
0x00,
+ ]);
+ });
+
+ it('textEncodeInto3', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(5);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(2);
+ expect(result.written).toBe(4);
+
+ expect(Array.from(bytes)).toEqual([0xf0, 0x9d, 0x93, 0xbd, 0x00]);
+ });
+
+ xit('textDecoderSharedUint8Array', function () {
+ const ab = new SharedArrayBuffer(6);
+ const dataView = new DataView(ab);
+ const charCodeA = 'A'.charCodeAt(0);
+ for (let i = 0; i < ab.byteLength; i++)
+ dataView.setUint8(i, charCodeA + i);
+
+ const ui8 = new Uint8Array(ab);
+ const decoder = new TextDecoder();
+ const actual = decoder.decode(ui8);
+ expect(actual).toBe('ABCDEF');
+ });
+
+ xit('textDecoderSharedInt32Array', function () {
+ const ab = new SharedArrayBuffer(8);
+ const dataView = new DataView(ab);
+ const charCodeA = 'A'.charCodeAt(0);
+ for (let i = 0; i < ab.byteLength; i++)
+ dataView.setUint8(i, charCodeA + i);
+
+ const i32 = new Int32Array(ab);
+ const decoder = new TextDecoder();
+ const actual = decoder.decode(i32);
+ expect(actual).toBe('ABCDEFGH');
+ });
+
+ it('toStringShouldBeWebCompatibility', function () {
+ const encoder = new TextEncoder();
+
+ expect(encoder.toString()).toBe('[object TextEncoder]');
+
+ const decoder = new TextDecoder();
+ expect(decoder.toString()).toBe('[object TextDecoder]');
+ });
+
+ describe('singleByteEncodings', function () {
+ // Straight from https://encoding.spec.whatwg.org/encodings.json
+ const encodingsTable = [
+ {
+ encodings: [
+ {
+ labels: [
+ 'unicode-1-1-utf-8',
+ 'unicode11utf8',
+ 'unicode20utf8',
+ 'utf-8',
+ 'utf8',
+ 'x-unicode20utf8',
+ ],
+ name: 'UTF-8',
+ },
+ ],
+ heading: 'The Encoding',
+ },
+ {
+ encodings: [
+ {
+ labels: ['866', 'cp866', 'csibm866', 'ibm866'],
+ name: 'IBM866',
+ },
+ {
+ labels: [
+ 'csisolatin2',
+ 'iso-8859-2',
+ 'iso-ir-101',
+ 'iso8859-2',
+ 'iso88592',
+ 'iso_8859-2',
+ 'iso_8859-2:1987',
+ 'l2',
+ 'latin2',
+ ],
+ name: 'ISO-8859-2',
+ },
+ {
+ labels: [
+ 'csisolatin3',
+ 'iso-8859-3',
+ 'iso-ir-109',
+ 'iso8859-3',
+ 'iso88593',
+ 'iso_8859-3',
+ 'iso_8859-3:1988',
+ 'l3',
+ 'latin3',
+ ],
+ name: 'ISO-8859-3',
+ },
+ {
+ labels: [
+ 'csisolatin4',
+ 'iso-8859-4',
+ 'iso-ir-110',
+ 'iso8859-4',
+ 'iso88594',
+ 'iso_8859-4',
+ 'iso_8859-4:1988',
+ 'l4',
+ 'latin4',
+ ],
+ name: 'ISO-8859-4',
+ },
+ {
+ labels: [
+ 'csisolatincyrillic',
+ 'cyrillic',
+ 'iso-8859-5',
+ 'iso-ir-144',
+ 'iso8859-5',
+ 'iso88595',
+ 'iso_8859-5',
+ 'iso_8859-5:1988',
+ ],
+ name: 'ISO-8859-5',
+ },
+ {
+ labels: [
+ 'arabic',
+ 'asmo-708',
+ 'csiso88596e',
+ 'csiso88596i',
+ 'csisolatinarabic',
+ 'ecma-114',
+ 'iso-8859-6',
+ 'iso-8859-6-e',
+ 'iso-8859-6-i',
+ 'iso-ir-127',
+ 'iso8859-6',
+ 'iso88596',
+ 'iso_8859-6',
+ 'iso_8859-6:1987',
+ ],
+ name: 'ISO-8859-6',
+ },
+ {
+ labels: [
+ 'csisolatingreek',
+ 'ecma-118',
+ 'elot_928',
+ 'greek',
+ 'greek8',
+ 'iso-8859-7',
+ 'iso-ir-126',
+ 'iso8859-7',
+ 'iso88597',
+ 'iso_8859-7',
+ 'iso_8859-7:1987',
+ 'sun_eu_greek',
+ ],
+ name: 'ISO-8859-7',
+ },
+ {
+ labels: [
+ 'csiso88598e',
+ 'csisolatinhebrew',
+ 'hebrew',
+ 'iso-8859-8',
+ 'iso-8859-8-e',
+ 'iso-ir-138',
+ 'iso8859-8',
+ 'iso88598',
+ 'iso_8859-8',
+ 'iso_8859-8:1988',
+ 'visual',
+ ],
+ name: 'ISO-8859-8',
+ },
+ {
+ labels: ['csiso88598i', 'iso-8859-8-i', 'logical'],
+ name: 'ISO-8859-8-I',
+ },
+ {
+ labels: [
+ 'csisolatin6',
+ 'iso-8859-10',
+ 'iso-ir-157',
+ 'iso8859-10',
+ 'iso885910',
+ 'l6',
+ 'latin6',
+ ],
+ name: 'ISO-8859-10',
+ },
+ {
+ labels: ['iso-8859-13', 'iso8859-13', 'iso885913'],
+ name: 'ISO-8859-13',
+ },
+ {
+ labels: ['iso-8859-14', 'iso8859-14', 'iso885914'],
+ name: 'ISO-8859-14',
+ },
+ {
+ labels: [
+ 'csisolatin9',
+ 'iso-8859-15',
+ 'iso8859-15',
+ 'iso885915',
+ 'iso_8859-15',
+ 'l9',
+ ],
+ name: 'ISO-8859-15',
+ },
+ {
+ labels: ['iso-8859-16'],
+ name: 'ISO-8859-16',
+ },
+ {
+ labels: ['cskoi8r', 'koi', 'koi8', 'koi8-r', 'koi8_r'],
+ name: 'KOI8-R',
+ },
+ {
+ labels: ['koi8-ru', 'koi8-u'],
+ name: 'KOI8-U',
+ },
+ {
+ labels: ['csmacintosh', 'mac', 'macintosh', 'x-mac-roman'],
+ name: 'macintosh',
+ },
+ {
+ labels: [
+ 'dos-874',
+ 'iso-8859-11',
+ 'iso8859-11',
+ 'iso885911',
+ 'tis-620',
+ 'windows-874',
+ ],
+ name: 'windows-874',
+ },
+ {
+ labels: ['cp1250', 'windows-1250', 'x-cp1250'],
+ name: 'windows-1250',
+ },
+ {
+ labels: ['cp1251', 'windows-1251', 'x-cp1251'],
+ name: 'windows-1251',
+ },
+ {
+ labels: [
+ 'ansi_x3.4-1968',
+ 'ascii',
+ 'cp1252',
+ 'cp819',
+ 'csisolatin1',
+ 'ibm819',
+ 'iso-8859-1',
+ 'iso-ir-100',
+ 'iso8859-1',
+ 'iso88591',
+ 'iso_8859-1',
+ 'iso_8859-1:1987',
+ 'l1',
+ 'latin1',
+ 'us-ascii',
+ 'windows-1252',
+ 'x-cp1252',
+ ],
+ name: 'windows-1252',
+ },
+ {
+ labels: ['cp1253', 'windows-1253', 'x-cp1253'],
+ name: 'windows-1253',
+ },
+ {
+ labels: [
+ 'cp1254',
+ 'csisolatin5',
+ 'iso-8859-9',
+ 'iso-ir-148',
+ 'iso8859-9',
+ 'iso88599',
+ 'iso_8859-9',
+ 'iso_8859-9:1989',
+ 'l5',
+ 'latin5',
+ 'windows-1254',
+ 'x-cp1254',
+ ],
+ name: 'windows-1254',
+ },
+ {
+ labels: ['cp1255', 'windows-1255', 'x-cp1255'],
+ name: 'windows-1255',
+ },
+ {
+ labels: ['cp1256', 'windows-1256', 'x-cp1256'],
+ name: 'windows-1256',
+ },
+ {
+ labels: ['cp1257', 'windows-1257', 'x-cp1257'],
+ name: 'windows-1257',
+ },
+ {
+ labels: ['cp1258', 'windows-1258', 'x-cp1258'],
+ name: 'windows-1258',
+ },
+ {
+ labels: ['x-mac-cyrillic', 'x-mac-ukrainian'],
+ name: 'x-mac-cyrillic',
+ },
+ ],
+ heading: 'Legacy single-byte encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'chinese',
+ 'csgb2312',
+ 'csiso58gb231280',
+ 'gb2312',
+ 'gb_2312',
+ 'gb_2312-80',
+ 'gbk',
+ 'iso-ir-58',
+ 'x-gbk',
+ ],
+ name: 'GBK',
+ },
+ {
+ labels: ['gb18030'],
+ name: 'gb18030',
+ },
+ ],
+ heading: 'Legacy multi-byte Chinese (simplified) encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: ['big5', 'big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
+ name: 'Big5',
+ },
+ ],
+ heading: 'Legacy multi-byte Chinese (traditional) encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: ['cseucpkdfmtjapanese', 'euc-jp', 'x-euc-jp'],
+ name: 'EUC-JP',
+ },
+ {
+ labels: ['csiso2022jp', 'iso-2022-jp'],
+ name: 'ISO-2022-JP',
+ },
+ {
+ labels: [
+ 'csshiftjis',
+ 'ms932',
+ 'ms_kanji',
+ 'shift-jis',
+ 'shift_jis',
+ 'sjis',
+ 'windows-31j',
+ 'x-sjis',
+ ],
+ name: 'Shift_JIS',
+ },
+ ],
+ heading: 'Legacy multi-byte Japanese encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'cseuckr',
+ 'csksc56011987',
+ 'euc-kr',
+ 'iso-ir-149',
+ 'korean',
+ 'ks_c_5601-1987',
+ 'ks_c_5601-1989',
+ 'ksc5601',
+ 'ksc_5601',
+ 'windows-949',
+ ],
+ name: 'EUC-KR',
+ },
+ ],
+ heading: 'Legacy multi-byte Korean encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'csiso2022kr',
+ 'hz-gb-2312',
+ 'iso-2022-cn',
+ 'iso-2022-cn-ext',
+ 'iso-2022-kr',
+ 'replacement',
+ ],
+ name: 'replacement',
+ },
+ {
+ labels: ['unicodefffe', 'utf-16be'],
+ name: 'UTF-16BE',
+ },
+ {
+ labels: [
+ 'csunicode',
+ 'iso-10646-ucs-2',
+ 'ucs-2',
+ 'unicode',
+ 'unicodefeff',
+ 'utf-16',
+ 'utf-16le',
+ ],
+ name: 'UTF-16LE',
+ },
+ {
+ labels: ['x-user-defined'],
+ name: 'x-user-defined',
+ },
+ ],
+ heading: 'Legacy miscellaneous encodings',
+ },
+ ];
+
+ const singleByteEncodings = encodingsTable.filter(group => {
+ return group.heading === 'Legacy single-byte encodings';
+ })[0].encodings;
+
+ // https://encoding.spec.whatwg.org/indexes.json
+ const singleByteIndexes = {
+ 'IBM866': [
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054,
1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072,
1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 9617, 9618, 9619,
9474, 9508, 9569, 9570, 9558, 9557, 9571, 9553, 9559, 9565, 9564, 9563, 9488, 9492, 9524, 9516, 9500, 9472,
9532, 9566, 9567, 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575, 9576, 9572, 9573, 9561, 9560, 9554, 9555,
9579, 9578, 9496, 9484, 9608, 9604, 9612, 9616, 9600, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096,
1097, 1098, 1099, 1100, 1101, 1102, 1103, 1025, 1105, 1028, 1108, 1031, 1111, 1038, 1118, 176, 8729, 183,
8730, 8470, 164, 9632, 160,
+ ],
+ 'ISO-8859-2': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 728, 321, 164, 317, 346, 167,
168, 352, 350, 356, 377, 173, 381, 379, 176, 261, 731, 322, 180, 318, 347, 711, 184, 353, 351, 357, 378, 733,
382, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 272, 323, 327, 211,
212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 263, 231, 269, 233,
281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 252, 253, 355, 729,
+ ],
+ 'ISO-8859-3': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 294, 728, 163, 164, null, 292,
167, 168, 304, 350, 286, 308, 173, null, 379, 176, 295, 178, 179, 180, 181, 293, 183, 184, 305, 351, 287,
309, 189, null, 380, 192, 193, 194, null, 196, 266, 264, 199, 200, 201, 202, 203, 204, 205, 206, 207, null,
209, 210, 211, 212, 288, 214, 215, 284, 217, 218, 219, 220, 364, 348, 223, 224, 225, 226, null, 228, 267,
265, 231, 232, 233, 234, 235, 236, 237, 238, 239, null, 241, 242, 243, 244, 289, 246, 247, 285, 249, 250,
251, 252, 365, 349, 729,
+ ],
+ 'ISO-8859-4': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 312, 342, 164, 296, 315, 167,
168, 352, 274, 290, 358, 173, 381, 175, 176, 261, 731, 343, 180, 297, 316, 711, 184, 353, 275, 291, 359, 330,
382, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 298, 272, 325, 332, 310,
212, 213, 214, 215, 216, 370, 218, 219, 220, 360, 362, 223, 257, 225, 226, 227, 228, 229, 230, 303, 269, 233,
281, 235, 279, 237, 238, 299, 273, 326, 333, 311, 244, 245, 246, 247, 248, 371, 250, 251, 252, 361, 363, 729,
+ ],
+ 'ISO-8859-5': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 1025, 1026, 1027, 1028, 1029,
1030, 1031, 1032, 1033, 1034, 1035, 1036, 173, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065,
1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083,
1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101,
1102, 1103, 8470, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 167, 1118, 1119,
+ ],
+ 'ISO-8859-6': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, null, null, null, 164, null, null,
null,
+ null, null, null, null, 1548, 173, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, 1563, null, null, null, 1567, null, 1569, 1570, 1571, 1572, 1573, 1574,
1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592,
1593, 1594, null, null, null, null, null,
+ 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614,
1615, 1616, 1617, 1618, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ ],
+ 'ISO-8859-7': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8216, 8217, 163, 8364, 8367, 166,
167, 168, 169, 890, 171, 172, 173, null, 8213, 176, 177, 178, 179, 900, 901, 902, 183, 904, 905, 906, 187,
908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929,
null, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950,
951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972,
973, 974, null,
+ ],
+ 'ISO-8859-8': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, null, 162, 163, 164, 165, 166,
167, 168, 169, 215, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 247, 187, 188,
189, 190, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, 8215, 1488, 1489, 1490, 1491, 1492, 1493, 1494,
1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512,
1513, 1514, null, null, 8206, 8207, null,
+ ],
+ 'ISO-8859-10': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 274, 290, 298, 296, 310, 167,
315, 272, 352, 358, 381, 173, 362, 330, 176, 261, 275, 291, 299, 297, 311, 183, 316, 273, 353, 359, 382,
8213, 363, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 207, 208, 325,
332, 211, 212, 213, 214, 360, 216, 370, 218, 219, 220, 221, 222, 223, 257, 225, 226, 227, 228, 229, 230, 303,
269, 233, 281, 235, 279, 237, 238, 239, 240, 326, 333, 243, 244, 245, 246, 361, 248, 371, 250, 251, 252, 253,
254, 312,
+ ],
+ 'ISO-8859-13': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8221, 162, 163, 164, 8222, 166,
167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 8220, 181, 182, 183, 248, 185, 343, 187,
188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 315, 352, 323,
325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 229, 281, 275,
269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 363, 252, 380,
382, 8217,
+ ],
+ 'ISO-8859-14': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 7682, 7683, 163, 266, 267, 7690,
167, 7808, 169, 7810, 7691, 7922, 173, 174, 376, 7710, 7711, 288, 289, 7744, 7745, 182, 7766, 7809, 7767,
7811, 7776, 7923, 7812, 7813, 7777, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205,
206, 207, 372, 209, 210, 211, 212, 213, 214, 7786, 216, 217, 218, 219, 220, 221, 374, 223, 224, 225, 226,
227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 373, 241, 242, 243, 244, 245, 246, 7787,
248, 249, 250, 251, 252, 253, 375, 255,
+ ],
+ 'ISO-8859-15': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 8364, 165, 352,
167, 353, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 381, 181, 182, 183, 382, 185, 186, 187, 338,
339, 376, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210,
211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232,
233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255,
+ ],
+ 'ISO-8859-16': [
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 261, 321, 8364, 8222, 352,
167, 353, 169, 536, 171, 377, 173, 378, 379, 176, 177, 268, 322, 381, 8221, 182, 183, 382, 269, 537, 187,
338, 339, 376, 380, 192, 193, 194, 258, 196, 262, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 272, 323,
210, 211, 212, 336, 214, 346, 368, 217, 218, 219, 220, 280, 538, 223, 224, 225, 226, 259, 228, 263, 230, 231,
232, 233, 234, 235, 236, 237, 238, 239, 273, 324, 242, 243, 244, 337, 246, 347, 369, 249, 250, 251, 252, 281,
539, 255,
+ ],
+ 'KOI8-R': [
+ 9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612,
9616, 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553,
9554, 1105, 9555, 9556, 9557, 9558, 9559, 9560, 9561, 9562, 9563, 9564, 9565, 9566, 9567, 9568, 9569, 1025,
9570, 9571, 9572, 9573, 9574, 9575, 9576, 9577, 9578, 9579, 9580, 169, 1102, 1072, 1073, 1094, 1076, 1077,
1092, 1075, 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074,
1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098, 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048,
1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064,
1069, 1065, 1063, 1066,
+ ],
+ 'KOI8-U': [
+ 9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612,
9616, 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553,
9554, 1105, 1108, 9556, 1110, 1111, 9559, 9560, 9561, 9562, 9563, 1169, 1118, 9566, 9567, 9568, 9569, 1025,
1028, 9571, 1030, 1031, 9574, 9575, 9576, 9577, 9578, 1168, 1038, 169, 1102, 1072, 1073, 1094, 1076, 1077,
1092, 1075, 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074,
1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098, 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048,
1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064,
1069, 1065, 1063, 1066,
+ ],
+ 'macintosh': [
+ 196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233, 232, 234, 235,
237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249, 251, 252, 8224, 176, 162, 163, 167, 8226, 182,
223, 174, 169, 8482, 180, 168, 8800, 198, 216, 8734, 177, 8804, 8805, 165, 181, 8706, 8721, 8719, 960, 8747,
170, 186, 937, 230, 248, 191, 161, 172, 8730, 402, 8776, 8710, 171, 187, 8230, 160, 192, 195, 213, 338, 339,
8211, 8212, 8220, 8221, 8216, 8217, 247, 9674, 255, 376, 8260, 8364, 8249, 8250, 64257, 64258, 8225, 183,
8218, 8222, 8240, 194, 202, 193, 203, 200, 205, 206, 207, 204, 211, 212, 63743, 210, 218, 219, 217, 305, 710,
732, 175, 728, 729, 730, 184, 733, 731, 711,
+ ],
+ 'windows-874': [
+ 8364, 129, 130, 131, 132, 8230, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 8216,
8217, 8220, 8221, 8226, 8211, 8212, 152, 153, 154, 155, 156, 157, 158, 159, 160, 3585, 3586, 3587, 3588,
3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606,
3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624,
3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 3642,
null, null, null, null, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660,
3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, null, null, null,
null,
+ ],
+ 'windows-1250': [
+ 8364, 129, 8218, 131, 8222, 8230, 8224, 8225, 136, 8240, 352, 8249, 346, 356, 381, 377, 144,
8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 353, 8250, 347, 357, 382, 378, 160, 711, 728, 321, 164,
260, 166, 167, 168, 169, 350, 171, 172, 173, 174, 379, 176, 177, 731, 322, 180, 181, 182, 183, 184, 261, 351,
187, 317, 733, 318, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 272,
323, 327, 211, 212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 263,
231, 269, 233, 281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 252,
253, 355, 729,
+ ],
+ 'windows-1251': [
+ 1026, 1027, 8218, 1107, 8222, 8230, 8224, 8225, 8364, 8240, 1033, 8249, 1034, 1036, 1035,
1039, 1106, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 1113, 8250, 1114, 1116, 1115, 1119, 160,
1038, 1118, 1032, 164, 1168, 166, 167, 1025, 169, 1028, 171, 172, 173, 174, 1031, 176, 177, 1030, 1110, 1169,
181, 182, 183, 1105, 8470, 1108, 187, 1112, 1029, 1109, 1111, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065,
1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083,
1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101,
1102, 1103,
+ ],
+ 'windows-1252': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143, 144,
8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208,
209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
253, 254, 255,
+ ],
+ 'windows-1253': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 136, 8240, 138, 8249, 140, 141, 142, 143, 144,
8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 154, 8250, 156, 157, 158, 159, 160, 901, 902, 163, 164,
165, 166, 167, 168, 169, null, 171, 172, 173, 174, 8213, 176, 177, 178, 179, 900, 181, 182, 183, 904, 905,
906, 187, 908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927,
928, 929, null, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948,
949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970,
971, 972, 973, 974, null,
+ ],
+ 'windows-1254': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 142, 143, 144,
8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 158, 376, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 286,
209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 304, 350, 223, 224, 225, 226, 227, 228, 229, 230,
231, 232, 233, 234, 235, 236, 237, 238, 239, 287, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
305, 351, 255,
+ ],
+ 'windows-1255': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 138, 8249, 140, 141, 142, 143, 144,
8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 154, 8250, 156, 157, 158, 159, 160, 161, 162, 163, 8362,
165, 166, 167, 168, 169, 215, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 247,
187, 188, 189, 190, 191, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469,
1470, 1471, 1472, 1473, 1474, 1475, 1520, 1521, 1522, 1523, 1524, null, null, null, null, null, null, null,
+ 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502,
1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, null, null, 8206, 8207, null,
+ ],
+ 'windows-1256': [
+ 8364, 1662, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 1657, 8249, 338, 1670, 1688, 1672,
1711, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 1705, 8482, 1681, 8250, 339, 8204, 8205, 1722, 160, 1548,
162, 163, 164, 165, 166, 167, 168, 169, 1726, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182,
183, 184, 185, 1563, 187, 188, 189, 190, 1567, 1729, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577,
1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 215, 1591, 1592, 1593, 1594,
1600, 1601, 1602, 1603, 224, 1604, 226, 1605, 1606, 1607, 1608, 231, 232, 233, 234, 235, 1609, 1610, 238,
239, 1611, 1612, 1613, 1614, 244, 1615, 1616, 247, 1617, 249, 1618, 251, 252, 8206, 8207, 1746,
+ ],
+ 'windows-1257': [
+ 8364, 129, 8218, 131, 8222, 8230, 8224, 8225, 136, 8240, 138, 8249, 140, 168, 711, 184, 144,
8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 154, 8250, 156, 175, 731, 159, 160, null, 162, 163, 164,
null, 166, 167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 180, 181, 182, 183, 248, 185,
343, 187, 188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 315,
352, 323, 325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 229,
281, 275, 269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 363,
252, 380, 382, 729,
+ ],
+ 'windows-1258': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 138, 8249, 338, 141, 142, 143, 144,
8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 154, 8250, 339, 157, 158, 376, 160, 161, 162, 163, 164,
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186,
187, 188, 189, 190, 191, 192, 193, 194, 258, 196, 197, 198, 199, 200, 201, 202, 203, 768, 205, 206, 207, 272,
209, 777, 211, 212, 416, 214, 215, 216, 217, 218, 219, 220, 431, 771, 223, 224, 225, 226, 259, 228, 229, 230,
231, 232, 233, 234, 235, 769, 237, 238, 239, 273, 241, 803, 243, 244, 417, 246, 247, 248, 249, 250, 251, 252,
432, 8363, 255,
+ ],
+ 'x-mac-cyrillic': [
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054,
1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 8224,
176, 1168, 163, 167, 8226, 182, 1030, 174, 169, 8482, 1026, 1106, 8800, 1027, 1107, 8734, 177, 8804, 8805,
1110, 181, 1169, 1032, 1028, 1108, 1031, 1111, 1033, 1113, 1034, 1114, 1112, 1029, 172, 8730, 402, 8776,
8710, 171, 187, 8230, 160, 1035, 1115, 1036, 1116, 1109, 8211, 8212, 8220, 8221, 8216, 8217, 247, 8222, 1038,
1118, 1039, 1119, 8470, 1025, 1105, 1103, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082,
1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100,
1101, 1102, 8364,
+ ],
+ };
+
+ function assertDecode(data, encoding) {
+ if (encoding === 'ISO-8859-8-I')
+ encoding = 'ISO-8859-8';
+
+ for (let i = 0, l = data.length; i < l; i++) {
+ const cp = data.charCodeAt(i);
+ let expectedCp = i < 0x80 ? i : singleByteIndexes[encoding][i - 0x80];
+ if (typeof expectedCp === 'undefined' || expectedCp === null)
+ expectedCp = 0xfffd;
+
+ expect(cp).toBe(expectedCp);
+ }
+ }
+ const buffer = new ArrayBuffer(255);
+ const view = new Uint8Array(buffer);
+
+ for (let i = 0, l = view.byteLength; i < l; i++)
+ view[i] = i;
+
+
+ for (let i = 0, l = singleByteEncodings.length; i < l; i++) {
+ const encoding = singleByteEncodings[i];
+
+ it(`singleByteEncoding ${encoding.name}`, function () {
+ for (let i2 = 0, l2 = encoding.labels.length; i2 < l2; i2++) {
+ const label = encoding.labels[i2];
+ const decoder = new TextDecoder(label);
+ const data = decoder.decode(view);
+
+ expect(decoder.encoding).toBe(encoding.name.toLowerCase());
+ assertDecode(data, encoding.name);
+ }
+ });
+ }
+ });
+});
diff --git a/js.gresource.xml b/js.gresource.xml
index fc55e597..fc64a8f6 100644
--- a/js.gresource.xml
+++ b/js.gresource.xml
@@ -42,8 +42,11 @@
<file>modules/core/_cairo.js</file>
<file>modules/core/_common.js</file>
+ <file>modules/core/_encodings.js</file>
+ <file>modules/core/_singleByteEncodings.js</file>
<file>modules/core/_format.js</file>
<file>modules/core/_gettext.js</file>
<file>modules/core/_signals.js</file>
+ <file>modules/core/_text.js</file>
</gresource>
</gresources>
diff --git a/modules/core/_encodings.js b/modules/core/_encodings.js
new file mode 100644
index 00000000..cfbfc2a5
--- /dev/null
+++ b/modules/core/_encodings.js
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Node.js contributors. All rights reserved.
+
+// Modified from
https://github.com/nodejs/node/blob/78680c1cbc8b0c435963bc512e826b2a6227c315/lib/internal/encoding.js
+// Data from https://encoding.spec.whatwg.org/encodings.json
+
+/* exported getEncodingFromLabel */
+
+const encodings = new Map([
+ ['unicode-1-1-utf-8', 'utf-8'],
+ ['unicode11utf8', 'utf-8'],
+ ['unicode20utf8', 'utf-8'],
+ ['utf-8', 'utf-8'],
+ ['utf8', 'utf-8'],
+ ['x-unicode20utf8', 'utf-8'],
+ ['866', 'ibm866'],
+ ['cp866', 'ibm866'],
+ ['csibm866', 'ibm866'],
+ ['ibm866', 'ibm866'],
+ ['csisolatin2', 'iso-8859-2'],
+ ['iso-8859-2', 'iso-8859-2'],
+ ['iso-ir-101', 'iso-8859-2'],
+ ['iso8859-2', 'iso-8859-2'],
+ ['iso88592', 'iso-8859-2'],
+ ['iso_8859-2', 'iso-8859-2'],
+ ['iso_8859-2:1987', 'iso-8859-2'],
+ ['l2', 'iso-8859-2'],
+ ['latin2', 'iso-8859-2'],
+ ['csisolatin3', 'iso-8859-3'],
+ ['iso-8859-3', 'iso-8859-3'],
+ ['iso-ir-109', 'iso-8859-3'],
+ ['iso8859-3', 'iso-8859-3'],
+ ['iso88593', 'iso-8859-3'],
+ ['iso_8859-3', 'iso-8859-3'],
+ ['iso_8859-3:1988', 'iso-8859-3'],
+ ['l3', 'iso-8859-3'],
+ ['latin3', 'iso-8859-3'],
+ ['csisolatin4', 'iso-8859-4'],
+ ['iso-8859-4', 'iso-8859-4'],
+ ['iso-ir-110', 'iso-8859-4'],
+ ['iso8859-4', 'iso-8859-4'],
+ ['iso88594', 'iso-8859-4'],
+ ['iso_8859-4', 'iso-8859-4'],
+ ['iso_8859-4:1988', 'iso-8859-4'],
+ ['l4', 'iso-8859-4'],
+ ['latin4', 'iso-8859-4'],
+ ['csisolatincyrillic', 'iso-8859-5'],
+ ['cyrillic', 'iso-8859-5'],
+ ['iso-8859-5', 'iso-8859-5'],
+ ['iso-ir-144', 'iso-8859-5'],
+ ['iso8859-5', 'iso-8859-5'],
+ ['iso88595', 'iso-8859-5'],
+ ['iso_8859-5', 'iso-8859-5'],
+ ['iso_8859-5:1988', 'iso-8859-5'],
+ ['arabic', 'iso-8859-6'],
+ ['asmo-708', 'iso-8859-6'],
+ ['csiso88596e', 'iso-8859-6'],
+ ['csiso88596i', 'iso-8859-6'],
+ ['csisolatinarabic', 'iso-8859-6'],
+ ['ecma-114', 'iso-8859-6'],
+ ['iso-8859-6', 'iso-8859-6'],
+ ['iso-8859-6-e', 'iso-8859-6'],
+ ['iso-8859-6-i', 'iso-8859-6'],
+ ['iso-ir-127', 'iso-8859-6'],
+ ['iso8859-6', 'iso-8859-6'],
+ ['iso88596', 'iso-8859-6'],
+ ['iso_8859-6', 'iso-8859-6'],
+ ['iso_8859-6:1987', 'iso-8859-6'],
+ ['csisolatingreek', 'iso-8859-7'],
+ ['ecma-118', 'iso-8859-7'],
+ ['elot_928', 'iso-8859-7'],
+ ['greek', 'iso-8859-7'],
+ ['greek8', 'iso-8859-7'],
+ ['iso-8859-7', 'iso-8859-7'],
+ ['iso-ir-126', 'iso-8859-7'],
+ ['iso8859-7', 'iso-8859-7'],
+ ['iso88597', 'iso-8859-7'],
+ ['iso_8859-7', 'iso-8859-7'],
+ ['iso_8859-7:1987', 'iso-8859-7'],
+ ['sun_eu_greek', 'iso-8859-7'],
+ ['csiso88598e', 'iso-8859-8'],
+ ['csisolatinhebrew', 'iso-8859-8'],
+ ['hebrew', 'iso-8859-8'],
+ ['iso-8859-8', 'iso-8859-8'],
+ ['iso-8859-8-e', 'iso-8859-8'],
+ ['iso-ir-138', 'iso-8859-8'],
+ ['iso8859-8', 'iso-8859-8'],
+ ['iso88598', 'iso-8859-8'],
+ ['iso_8859-8', 'iso-8859-8'],
+ ['iso_8859-8:1988', 'iso-8859-8'],
+ ['visual', 'iso-8859-8'],
+ ['csiso88598i', 'iso-8859-8-i'],
+ ['iso-8859-8-i', 'iso-8859-8-i'],
+ ['logical', 'iso-8859-8-i'],
+ ['csisolatin6', 'iso-8859-10'],
+ ['iso-8859-10', 'iso-8859-10'],
+ ['iso-ir-157', 'iso-8859-10'],
+ ['iso8859-10', 'iso-8859-10'],
+ ['iso885910', 'iso-8859-10'],
+ ['l6', 'iso-8859-10'],
+ ['latin6', 'iso-8859-10'],
+ ['iso-8859-13', 'iso-8859-13'],
+ ['iso8859-13', 'iso-8859-13'],
+ ['iso885913', 'iso-8859-13'],
+ ['iso-8859-14', 'iso-8859-14'],
+ ['iso8859-14', 'iso-8859-14'],
+ ['iso885914', 'iso-8859-14'],
+ ['csisolatin9', 'iso-8859-15'],
+ ['iso-8859-15', 'iso-8859-15'],
+ ['iso8859-15', 'iso-8859-15'],
+ ['iso885915', 'iso-8859-15'],
+ ['iso_8859-15', 'iso-8859-15'],
+ ['l9', 'iso-8859-15'],
+ ['iso-8859-16', 'iso-8859-16'],
+ ['cskoi8r', 'koi8-r'],
+ ['koi', 'koi8-r'],
+ ['koi8', 'koi8-r'],
+ ['koi8-r', 'koi8-r'],
+ ['koi8_r', 'koi8-r'],
+ ['koi8-ru', 'koi8-u'],
+ ['koi8-u', 'koi8-u'],
+ ['csmacintosh', 'macintosh'],
+ ['mac', 'macintosh'],
+ ['macintosh', 'macintosh'],
+ ['x-mac-roman', 'macintosh'],
+ ['dos-874', 'windows-874'],
+ ['iso-8859-11', 'windows-874'],
+ ['iso8859-11', 'windows-874'],
+ ['iso885911', 'windows-874'],
+ ['tis-620', 'windows-874'],
+ ['windows-874', 'windows-874'],
+ ['cp1250', 'windows-1250'],
+ ['windows-1250', 'windows-1250'],
+ ['x-cp1250', 'windows-1250'],
+ ['cp1251', 'windows-1251'],
+ ['windows-1251', 'windows-1251'],
+ ['x-cp1251', 'windows-1251'],
+ ['ansi_x3.4-1968', 'windows-1252'],
+ ['ascii', 'windows-1252'],
+ ['cp1252', 'windows-1252'],
+ ['cp819', 'windows-1252'],
+ ['csisolatin1', 'windows-1252'],
+ ['ibm819', 'windows-1252'],
+ ['iso-8859-1', 'windows-1252'],
+ ['iso-ir-100', 'windows-1252'],
+ ['iso8859-1', 'windows-1252'],
+ ['iso88591', 'windows-1252'],
+ ['iso_8859-1', 'windows-1252'],
+ ['iso_8859-1:1987', 'windows-1252'],
+ ['l1', 'windows-1252'],
+ ['latin1', 'windows-1252'],
+ ['us-ascii', 'windows-1252'],
+ ['windows-1252', 'windows-1252'],
+ ['x-cp1252', 'windows-1252'],
+ ['cp1253', 'windows-1253'],
+ ['windows-1253', 'windows-1253'],
+ ['x-cp1253', 'windows-1253'],
+ ['cp1254', 'windows-1254'],
+ ['csisolatin5', 'windows-1254'],
+ ['iso-8859-9', 'windows-1254'],
+ ['iso-ir-148', 'windows-1254'],
+ ['iso8859-9', 'windows-1254'],
+ ['iso88599', 'windows-1254'],
+ ['iso_8859-9', 'windows-1254'],
+ ['iso_8859-9:1989', 'windows-1254'],
+ ['l5', 'windows-1254'],
+ ['latin5', 'windows-1254'],
+ ['windows-1254', 'windows-1254'],
+ ['x-cp1254', 'windows-1254'],
+ ['cp1255', 'windows-1255'],
+ ['windows-1255', 'windows-1255'],
+ ['x-cp1255', 'windows-1255'],
+ ['cp1256', 'windows-1256'],
+ ['windows-1256', 'windows-1256'],
+ ['x-cp1256', 'windows-1256'],
+ ['cp1257', 'windows-1257'],
+ ['windows-1257', 'windows-1257'],
+ ['x-cp1257', 'windows-1257'],
+ ['cp1258', 'windows-1258'],
+ ['windows-1258', 'windows-1258'],
+ ['x-cp1258', 'windows-1258'],
+ ['x-mac-cyrillic', 'x-mac-cyrillic'],
+ ['x-mac-ukrainian', 'x-mac-cyrillic'],
+ ['chinese', 'gbk'],
+ ['csgb2312', 'gbk'],
+ ['csiso58gb231280', 'gbk'],
+ ['gb2312', 'gbk'],
+ ['gb_2312', 'gbk'],
+ ['gb_2312-80', 'gbk'],
+ ['gbk', 'gbk'],
+ ['iso-ir-58', 'gbk'],
+ ['x-gbk', 'gbk'],
+ ['gb18030', 'gb18030'],
+ ['big5', 'big5'],
+ ['big5-hkscs', 'big5'],
+ ['cn-big5', 'big5'],
+ ['csbig5', 'big5'],
+ ['x-x-big5', 'big5'],
+ ['cseucpkdfmtjapanese', 'euc-jp'],
+ ['euc-jp', 'euc-jp'],
+ ['x-euc-jp', 'euc-jp'],
+ ['csiso2022jp', 'iso-2022-jp'],
+ ['iso-2022-jp', 'iso-2022-jp'],
+ ['csshiftjis', 'shift_jis'],
+ ['ms932', 'shift_jis'],
+ ['ms_kanji', 'shift_jis'],
+ ['shift-jis', 'shift_jis'],
+ ['shift_jis', 'shift_jis'],
+ ['sjis', 'shift_jis'],
+ ['windows-31j', 'shift_jis'],
+ ['x-sjis', 'shift_jis'],
+ ['cseuckr', 'euc-kr'],
+ ['csksc56011987', 'euc-kr'],
+ ['euc-kr', 'euc-kr'],
+ ['iso-ir-149', 'euc-kr'],
+ ['korean', 'euc-kr'],
+ ['ks_c_5601-1987', 'euc-kr'],
+ ['ks_c_5601-1989', 'euc-kr'],
+ ['ksc5601', 'euc-kr'],
+ ['ksc_5601', 'euc-kr'],
+ ['windows-949', 'euc-kr'],
+ ['csiso2022kr', 'replacement'],
+ ['hz-gb-2312', 'replacement'],
+ ['iso-2022-cn', 'replacement'],
+ ['iso-2022-cn-ext', 'replacement'],
+ ['iso-2022-kr', 'replacement'],
+ ['replacement', 'replacement'],
+ ['unicodefffe', 'utf-16be'],
+ ['utf-16be', 'utf-16be'],
+ ['csunicode', 'utf-16le'],
+ ['iso-10646-ucs-2', 'utf-16le'],
+ ['ucs-2', 'utf-16le'],
+ ['unicode', 'utf-16le'],
+ ['unicodefeff', 'utf-16le'],
+ ['utf-16', 'utf-16le'],
+ ['utf-16le', 'utf-16le'],
+ ['x-user-defined', 'x-user-defined'],
+]);
+
+
+// Some of the web-specified encodings use
+// aliases which aren't supported in iconv
+const internalEncodings = new Map([
+ ['x-mac-cyrillic', 'MacCyrillic'],
+ // For our purposes we can encode 8-i as 8
+ ['iso-8859-8-i', 'iso-8859-8'],
+]);
+
+/**
+ * Trims ASCII whitespace from a string.
+ * `String.prototype.trim` removes non-ASCII whitespace.
+ *
+ * @param {string} label the label to trim
+ * @returns {string}
+ */
+const trimAsciiWhitespace = label => {
+ let s = 0;
+ let e = label.length;
+ while (s < e && (
+ label[s] === '\u0009' ||
+ label[s] === '\u000a' ||
+ label[s] === '\u000c' ||
+ label[s] === '\u000d' ||
+ label[s] === '\u0020'))
+ s++;
+
+ while (e > s && (
+ label[e - 1] === '\u0009' ||
+ label[e - 1] === '\u000a' ||
+ label[e - 1] === '\u000c' ||
+ label[e - 1] === '\u000d' ||
+ label[e - 1] === '\u0020'))
+ e--;
+
+ return label.slice(s, e);
+};
+
+/**
+ * @typedef Encoding
+ * @property {string} internalLabel
+ * @property {string} label
+ */
+
+/**
+ * @param {string} label the encoding label
+ * @returns {Encoding | null}
+ */
+function getEncodingFromLabel(label) {
+ let encoding = encodings.get(label);
+
+ if (encoding === undefined) {
+ const trimmedLabel = trimAsciiWhitespace(label.toLowerCase());
+ encoding = encodings.get(trimmedLabel);
+ }
+
+ if (!encoding)
+ return null;
+
+ let internalEncoding = internalEncodings.get(encoding);
+
+ return {
+ label: encoding,
+ internalLabel: internalEncoding ?? encoding,
+ };
+}
diff --git a/modules/core/_singleByteEncodings.js b/modules/core/_singleByteEncodings.js
new file mode 100644
index 00000000..eb0cfb94
--- /dev/null
+++ b/modules/core/_singleByteEncodings.js
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Evan Welsh
+
+/* exported singleByteEncodings */
+
+// These single byte encodings are considered "legacy" and have inconsistent implementations
+// by platform. The WHATWG Encoding specification standardizes a set of encodings and
+// their character mapping to UTF-8.
+
+// Taken from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
+
+var singleByteEncodings = {
+ 'ibm866': [1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054,
1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072,
1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 9617, 9618, 9619,
9474, 9508, 9569, 9570, 9558, 9557, 9571, 9553, 9559, 9565, 9564, 9563, 9488, 9492, 9524, 9516, 9500, 9472,
9532, 9566, 9567, 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575, 9576, 9572, 9573, 9561, 9560, 9554, 9555,
9579, 9578, 9496, 9484, 9608, 9604, 9612, 9616, 9600, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096,
1097, 1098, 1099, 1100, 1101, 1102, 1103, 1025, 1105, 1028, 1108, 1031, 1111, 1038, 1118, 176, 8729, 183,
8730, 8470, 164, 9632, 160],
+ 'iso-8859-2': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 728, 321, 164, 317, 346, 167,
168, 352, 350, 356, 377, 173, 381, 379, 176, 261, 731, 322, 180, 318, 347, 711, 184, 353, 351, 357, 378, 733,
382, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 272, 323, 327, 211,
212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 263, 231, 269, 233,
281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 252, 253, 355, 729],
+ 'iso-8859-3': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 294, 728, 163, 164, null, 292,
167, 168, 304, 350, 286, 308, 173, null, 379, 176, 295, 178, 179, 180, 181, 293, 183, 184, 305, 351, 287,
309, 189, null, 380, 192, 193, 194, null, 196, 266, 264, 199, 200, 201, 202, 203, 204, 205, 206, 207, null,
209, 210, 211, 212, 288, 214, 215, 284, 217, 218, 219, 220, 364, 348, 223, 224, 225, 226, null, 228, 267,
265, 231, 232, 233, 234, 235, 236, 237, 238, 239, null, 241, 242, 243, 244, 289, 246, 247, 285, 249, 250,
251, 252, 365, 349, 729],
+ 'iso-8859-4': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 312, 342, 164, 296, 315, 167,
168, 352, 274, 290, 358, 173, 381, 175, 176, 261, 731, 343, 180, 297, 316, 711, 184, 353, 275, 291, 359, 330,
382, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 298, 272, 325, 332, 310,
212, 213, 214, 215, 216, 370, 218, 219, 220, 360, 362, 223, 257, 225, 226, 227, 228, 229, 230, 303, 269, 233,
281, 235, 279, 237, 238, 299, 273, 326, 333, 311, 244, 245, 246, 247, 248, 371, 250, 251, 252, 361, 363, 729],
+ 'iso-8859-5': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 1025, 1026, 1027, 1028, 1029,
1030, 1031, 1032, 1033, 1034, 1035, 1036, 173, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065,
1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083,
1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101,
1102, 1103, 8470, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 167, 1118, 1119],
+ 'iso-8859-6': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, null, null, null, 164, null, null,
null, null, null, null, null, 1548, 173, null, null, null, null, null, null, null, null, null, null, null,
null, null, 1563, null, null, null, 1567, null, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578,
1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, null, null,
null, null, null, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614,
1615, 1616, 1617, 1618, null, null, null, null, null, null, null, null, null, null, null, null, null],
+ 'iso-8859-7': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8216, 8217, 163, 8364, 8367, 166,
167, 168, 169, 890, 171, 172, 173, null, 8213, 176, 177, 178, 179, 900, 901, 902, 183, 904, 905, 906, 187,
908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929,
null, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950,
951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972,
973, 974, null],
+ 'iso-8859-8': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, null, 162, 163, 164, 165, 166,
167, 168, 169, 215, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 247, 187, 188,
189, 190, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, 8215, 1488,
1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506,
1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, null, null, 8206, 8207, null],
+ 'iso-8859-10': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 274, 290, 298, 296, 310,
167, 315, 272, 352, 358, 381, 173, 362, 330, 176, 261, 275, 291, 299, 297, 311, 183, 316, 273, 353, 359, 382,
8213, 363, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 207, 208, 325,
332, 211, 212, 213, 214, 360, 216, 370, 218, 219, 220, 221, 222, 223, 257, 225, 226, 227, 228, 229, 230, 303,
269, 233, 281, 235, 279, 237, 238, 239, 240, 326, 333, 243, 244, 245, 246, 361, 248, 371, 250, 251, 252, 253,
254, 312],
+ 'iso-8859-13': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8221, 162, 163, 164, 8222,
166, 167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 8220, 181, 182, 183, 248, 185, 343,
187, 188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 315, 352,
323, 325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 229, 281,
275, 269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 363, 252,
380, 382, 8217],
+ 'iso-8859-14': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 7682, 7683, 163, 266, 267,
7690, 167, 7808, 169, 7810, 7691, 7922, 173, 174, 376, 7710, 7711, 288, 289, 7744, 7745, 182, 7766, 7809,
7767, 7811, 7776, 7923, 7812, 7813, 7777, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204,
205, 206, 207, 372, 209, 210, 211, 212, 213, 214, 7786, 216, 217, 218, 219, 220, 221, 374, 223, 224, 225,
226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 373, 241, 242, 243, 244, 245, 246,
7787, 248, 249, 250, 251, 252, 253, 375, 255],
+ 'iso-8859-15': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 8364, 165,
352, 167, 353, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 381, 181, 182, 183, 382, 185, 186, 187,
338, 339, 376, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231,
232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253,
254, 255],
+ 'iso-8859-16': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 261, 321, 8364, 8222,
352, 167, 353, 169, 536, 171, 377, 173, 378, 379, 176, 177, 268, 322, 381, 8221, 182, 183, 382, 269, 537,
187, 338, 339, 376, 380, 192, 193, 194, 258, 196, 262, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 272,
323, 210, 211, 212, 336, 214, 346, 368, 217, 218, 219, 220, 280, 538, 223, 224, 225, 226, 259, 228, 263, 230,
231, 232, 233, 234, 235, 236, 237, 238, 239, 273, 324, 242, 243, 244, 337, 246, 347, 369, 249, 250, 251, 252,
281, 539, 255],
+ 'koi8-r': [9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612,
9616, 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553,
9554, 1105, 9555, 9556, 9557, 9558, 9559, 9560, 9561, 9562, 9563, 9564, 9565, 9566, 9567, 9568, 9569, 1025,
9570, 9571, 9572, 9573, 9574, 9575, 9576, 9577, 9578, 9579, 9580, 169, 1102, 1072, 1073, 1094, 1076, 1077,
1092, 1075, 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074,
1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098, 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048,
1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064,
1069, 1065, 1063, 1066],
+ 'koi8-u': [9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612,
9616, 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553,
9554, 1105, 1108, 9556, 1110, 1111, 9559, 9560, 9561, 9562, 9563, 1169, 1118, 9566, 9567, 9568, 9569, 1025,
1028, 9571, 1030, 1031, 9574, 9575, 9576, 9577, 9578, 1168, 1038, 169, 1102, 1072, 1073, 1094, 1076, 1077,
1092, 1075, 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074,
1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098, 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048,
1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064,
1069, 1065, 1063, 1066],
+ 'macintosh': [196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233, 232, 234, 235,
237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249, 251, 252, 8224, 176, 162, 163, 167, 8226, 182,
223, 174, 169, 8482, 180, 168, 8800, 198, 216, 8734, 177, 8804, 8805, 165, 181, 8706, 8721, 8719, 960, 8747,
170, 186, 937, 230, 248, 191, 161, 172, 8730, 402, 8776, 8710, 171, 187, 8230, 160, 192, 195, 213, 338, 339,
8211, 8212, 8220, 8221, 8216, 8217, 247, 9674, 255, 376, 8260, 8364, 8249, 8250, 64257, 64258, 8225, 183,
8218, 8222, 8240, 194, 202, 193, 203, 200, 205, 206, 207, 204, 211, 212, 63743, 210, 218, 219, 217, 305, 710,
732, 175, 728, 729, 730, 184, 733, 731, 711],
+ 'windows-874': [8364, 129, 130, 131, 132, 8230, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 153, 154, 155, 156, 157, 158, 159, 160, 3585, 3586, 3587,
3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605,
3606, 3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623,
3624, 3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641,
3642, null, null, null, null, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659,
3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, null, null,
null, null],
+ 'windows-1250': [8364, 129, 8218, 131, 8222, 8230, 8224, 8225, 136, 8240, 352, 8249, 346, 356, 381, 377,
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 353, 8250, 347, 357, 382, 378, 160, 711, 728, 321,
164, 260, 166, 167, 168, 169, 350, 171, 172, 173, 174, 379, 176, 177, 731, 322, 180, 181, 182, 183, 184, 261,
351, 187, 317, 733, 318, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270,
272, 323, 327, 211, 212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314,
263, 231, 269, 233, 281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369,
252, 253, 355, 729],
+ 'windows-1251': [1026, 1027, 8218, 1107, 8222, 8230, 8224, 8225, 8364, 8240, 1033, 8249, 1034, 1036,
1035, 1039, 1106, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 1113, 8250, 1114, 1116, 1115, 1119,
160, 1038, 1118, 1032, 164, 1168, 166, 167, 1025, 169, 1028, 171, 172, 173, 174, 1031, 176, 177, 1030, 1110,
1169, 181, 182, 183, 1105, 8470, 1108, 187, 1112, 1029, 1109, 1111, 1040, 1041, 1042, 1043, 1044, 1045, 1046,
1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064,
1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082,
1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100,
1101, 1102, 1103],
+ 'windows-1252': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143,
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376, 160, 161, 162, 163,
164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
252, 253, 254, 255],
+ 'windows-1253': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 136, 8240, 138, 8249, 140, 141, 142, 143,
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 154, 8250, 156, 157, 158, 159, 160, 901, 902, 163,
164, 165, 166, 167, 168, 169, null, 171, 172, 173, 174, 8213, 176, 177, 178, 179, 900, 181, 182, 183, 904,
905, 906, 187, 908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926,
927, 928, 929, null, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947,
948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969,
970, 971, 972, 973, 974, null],
+ 'windows-1254': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 142, 143,
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 158, 376, 160, 161, 162, 163,
164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
286, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 304, 350, 223, 224, 225, 226, 227, 228, 229,
230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 287, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
252, 305, 351, 255],
+ 'windows-1255': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 138, 8249, 140, 141, 142, 143,
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 154, 8250, 156, 157, 158, 159, 160, 161, 162, 163,
8362, 165, 166, 167, 168, 169, 215, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184,
185, 247, 187, 188, 189, 190, 191, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467,
1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1520, 1521, 1522, 1523, 1524, null, null, null, null, null,
null, null, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, null, null, 8206, 8207, null],
+ 'windows-1256': [8364, 1662, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 1657, 8249, 338, 1670, 1688,
1672, 1711, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 1705, 8482, 1681, 8250, 339, 8204, 8205, 1722, 160,
1548, 162, 163, 164, 165, 166, 167, 168, 169, 1726, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
182, 183, 184, 185, 1563, 187, 188, 189, 190, 1567, 1729, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576,
1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 215, 1591, 1592, 1593,
1594, 1600, 1601, 1602, 1603, 224, 1604, 226, 1605, 1606, 1607, 1608, 231, 232, 233, 234, 235, 1609, 1610,
238, 239, 1611, 1612, 1613, 1614, 244, 1615, 1616, 247, 1617, 249, 1618, 251, 252, 8206, 8207, 1746],
+ 'windows-1257': [8364, 129, 8218, 131, 8222, 8230, 8224, 8225, 136, 8240, 138, 8249, 140, 168, 711, 184,
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 154, 8250, 156, 175, 731, 159, 160, null, 162, 163,
164, null, 166, 167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 180, 181, 182, 183, 248,
185, 343, 187, 188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298,
315, 352, 323, 325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228,
229, 281, 275, 269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347,
363, 252, 380, 382, 729],
+ 'windows-1258': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 138, 8249, 338, 141, 142, 143,
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 154, 8250, 339, 157, 158, 376, 160, 161, 162, 163,
164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185,
186, 187, 188, 189, 190, 191, 192, 193, 194, 258, 196, 197, 198, 199, 200, 201, 202, 203, 768, 205, 206, 207,
272, 209, 777, 211, 212, 416, 214, 215, 216, 217, 218, 219, 220, 431, 771, 223, 224, 225, 226, 259, 228, 229,
230, 231, 232, 233, 234, 235, 769, 237, 238, 239, 273, 241, 803, 243, 244, 417, 246, 247, 248, 249, 250, 251,
252, 432, 8363, 255],
+ 'x-mac-cyrillic': [1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053,
1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
8224, 176, 1168, 163, 167, 8226, 182, 1030, 174, 169, 8482, 1026, 1106, 8800, 1027, 1107, 8734, 177, 8804,
8805, 1110, 181, 1169, 1032, 1028, 1108, 1031, 1111, 1033, 1113, 1034, 1114, 1112, 1029, 172, 8730, 402,
8776, 8710, 171, 187, 8230, 160, 1035, 1115, 1036, 1116, 1109, 8211, 8212, 8220, 8221, 8216, 8217, 247, 8222,
1038, 1118, 1039, 1119, 8470, 1025, 1105, 1103, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081,
1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099,
1100, 1101, 1102, 8364],
+};
diff --git a/modules/core/_text.js b/modules/core/_text.js
new file mode 100644
index 00000000..a59b4df7
--- /dev/null
+++ b/modules/core/_text.js
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Evan Welsh
+
+const Encoding = imports._encodingNative;
+
+const { getEncodingFromLabel } = imports._encodings;
+const { singleByteEncodings } = imports._singleByteEncodings;
+
+/**
+ * @param {number[]} encoding
+ * @param {number} byte
+ * @param {boolean} fatal
+ * @returns {number}
+ */
+const decodeSingleByteEncodingCharacter = (encoding, byte, fatal) => {
+ if (byte < 0x80) {
+ return byte;
+ } else {
+ if (encoding[byte - 0x80] == null && fatal) {
+ throw new TypeError(`Invalid character in decode.`);
+ }
+ return encoding[byte - 0x80] ?? 0xFFFD;
+ }
+}
+
+/**
+ * @param {number[]} encoding
+ * @param {Uint8Array} bytes
+ * @param {boolean} fatal
+ * @returns {string}
+ */
+const decodeSingleByteEncoding = (encoding, bytes, fatal) => {
+ const decoded = [...bytes.values()].map(byte => decodeSingleByteEncodingCharacter(encoding, byte,
fatal));
+
+ return String.fromCodePoint(...decoded);
+};
+
+const isSingleByteEncoding = (label) => label in singleByteEncodings;
+const getSingleByteEncoding = (label) => singleByteEncodings[label];
+
+var TextDecoder = class TextDecoder {
+ /**
+ * @type {string}
+ */
+ encoding;
+
+ /**
+ * @type {boolean}
+ */
+ ignoreBOM;
+
+ /**
+ * @type {boolean}
+ */
+ fatal;
+
+ get [Symbol.toStringTag]() {
+ return 'TextDecoder';
+ }
+
+ /**
+ * @param {string} encoding
+ * @param {object} [options]
+ * @param {boolean=} options.fatal
+ * @param {boolean=} options.ignoreBOM
+ */
+ constructor(encoding = 'utf-8', options = {}) {
+ const { fatal = false, ignoreBOM = false } = options;
+
+ const encodingDefinition = getEncodingFromLabel(`${encoding}`);
+
+ if (!encodingDefinition) {
+ throw new RangeError(`Invalid encoding label: '${encoding}'`);
+ }
+
+ if (encodingDefinition.label === 'replacement') {
+ throw new RangeError(`Unsupported replacement encoding: '${encoding}'`);
+ }
+
+ Object.defineProperty(this, '_internalEncoding', {
+ value: encodingDefinition.internalLabel,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'encoding', {
+ value: encodingDefinition.label,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'ignoreBOM', {
+ value: ignoreBOM,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'fatal', {
+ value: fatal,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+ }
+
+
+
+ /**
+ * @param {unknown} bytes
+ * @param {object} [options]
+ * @param {boolean=} options.stream
+ * @returns
+ */
+ decode(bytes, options = {}) {
+ const { stream = false } = options;
+
+ if (stream) {
+ throw new Error(`TextDecoder does not implement the 'stream' option.`);
+ }
+
+ /** @type {Uint8Array} */
+ let input;
+
+ if (bytes instanceof ArrayBuffer) {
+ input = new Uint8Array(bytes);
+ } else if (bytes instanceof Uint8Array) {
+ input = bytes;
+ } else if (bytes instanceof Object.getPrototypeOf(Uint8Array)) {
+ let { buffer, byteLength, byteOffset } = /** @type {Uint32Array} */ (bytes);
+ input = new Uint8Array(buffer, byteOffset, byteLength);
+ } else if (
+ typeof bytes === "object" &&
+ bytes !== null &&
+ "buffer" in bytes &&
+ bytes.buffer instanceof ArrayBuffer
+ ) {
+ let { buffer, byteLength, byteOffset } = bytes;
+ input = new Uint8Array(
+ buffer,
+ byteOffset,
+ byteLength
+ );
+ } else if (bytes === undefined) {
+ input = new Uint8Array(0);
+ } else {
+ throw new Error(`Provided input cannot be converted to ArrayBufferView or ArrayBuffer`);
+ }
+
+ if (this.ignoreBOM && input.length > 2 && input[0] === 0xEF && input[1] === 0xBB && input[2] ===
0xBF) {
+ if (this.encoding !== 'utf-8') {
+ throw new Error(`Cannot ignore BOM for non-UTF8 encoding.`);
+ }
+
+ let { buffer, byteLength, byteOffset } = input;
+ input = new Uint8Array(buffer, byteOffset + 3, byteLength - 3);
+ }
+
+ if (isSingleByteEncoding(this._internalEncoding)) {
+ const encoding = getSingleByteEncoding(this._internalEncoding);
+
+ return decodeSingleByteEncoding(encoding, input, this.fatal);
+ }
+
+ return Encoding.decode(input, this._internalEncoding, this.fatal);
+ }
+}
+
+var TextEncoder = class TextEncoder {
+ get [Symbol.toStringTag]() {
+ return 'TextEncoder';
+ }
+
+ get encoding() {
+ return 'utf-8';
+ }
+
+ encode(input = '') {
+ // The TextEncoder specification only allows for UTF-8 encoding.
+ return Encoding.encode(`${input}`, 'UTF-8');
+ }
+
+ encodeInto(input = '', output = new Uint8Array()) {
+ // The TextEncoder specification only allows for UTF-8 encoding.
+ return Encoding.encodeInto(`${input}`, output);
+ }
+}
\ No newline at end of file
diff --git a/modules/script/_bootstrap/default.js b/modules/script/_bootstrap/default.js
index 952d7fe3..fe354a02 100644
--- a/modules/script/_bootstrap/default.js
+++ b/modules/script/_bootstrap/default.js
@@ -6,6 +6,7 @@
'use strict';
const {print, printerr, log, logError} = imports._print;
+ const {TextEncoder, TextDecoder} = imports._text;
Object.defineProperties(exports, {
ARGV: {
@@ -16,6 +17,18 @@
return imports.system.programArgs;
},
},
+ TextEncoder: {
+ configurable: false,
+ enumerable: true,
+ writable: false,
+ value: TextEncoder,
+ },
+ TextDecoder: {
+ configurable: false,
+ enumerable: true,
+ writable: false,
+ value: TextDecoder,
+ },
print: {
configurable: false,
enumerable: true,
diff --git a/modules/script/byteArray.js b/modules/script/byteArray.js
index e0b650ac..6669e348 100644
--- a/modules/script/byteArray.js
+++ b/modules/script/byteArray.js
@@ -2,9 +2,8 @@
// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
// SPDX-FileCopyrightText: 2017 Philip Chimento <philip chimento gmail com>
-var {fromGBytes, defineToString} = imports._byteArrayNative;
+var {fromGBytes, fromString, toString} = imports._byteArrayNative;
-const Encoding = imports._encodingNative;
const {GLib} = imports.gi;
// For backwards compatibility
@@ -32,31 +31,6 @@ function toGBytes(array) {
/* eslint no-redeclare: ["error", { "builtinGlobals": false }] */
-/**
- * @param {Uint8Array} array the byte array to decode into a string
- * @param {string} [encoding] a text encoding tag
- * @returns {string}
- */
-function toString(array, encoding = 'utf-8') {
- if (!(array instanceof Uint8Array))
- throw new Error('Argument to ByteArray.toString() must be a Uint8Array');
-
- return Encoding.toString(array, encoding);
-}
-
-/**
- * @param {string} str the string to encode into bytes
- * @param {string} [encoding] a text encoding tag
- * @returns {Uint8Array}
- */
-function fromString(str, encoding = 'utf-8') {
- const array = Encoding.fromString(str, encoding);
-
- defineToString(array);
-
- return array;
-}
-
var ByteArray = class ByteArray {
constructor(arg = 0) {
if (arg instanceof Uint8Array)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]