[gjs/ewlsh/text-encoding] modules: Implement WHATWG Encoding specification




commit 1ed3db31e9eccc5a1242ba38e6e4fd8ec298234d
Author: Evan Welsh <contact evanwelsh com>
Date:   Tue Jun 1 22:44:57 2021 -0400

    modules: Implement WHATWG Encoding specification

 .eslintignore                        |   2 +
 .eslintrc.yml                        |   2 +
 gjs/byteArray.cpp                    |  81 ++++-
 gjs/jsapi-util-string.cpp            |  48 ++-
 gjs/jsapi-util.h                     |   5 +
 gjs/text-encoding.cpp                | 558 +++++++++++++++++++++--------
 gjs/text-encoding.h                  |  17 +-
 installed-tests/js/meson.build       |   1 +
 installed-tests/js/testEncoding.js   | 661 +++++++++++++++++++++++++++++++++++
 js.gresource.xml                     |   3 +
 modules/core/_encodings.js           | 305 ++++++++++++++++
 modules/core/_singleByteEncodings.js |  40 +++
 modules/core/_text.js                | 189 ++++++++++
 modules/script/_bootstrap/default.js |  13 +
 modules/script/byteArray.js          |  28 +-
 15 files changed, 1767 insertions(+), 186 deletions(-)
---
diff --git a/.eslintignore b/.eslintignore
index 9ee950d3..8f8f93ff 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -3,4 +3,6 @@
 
 installed-tests/js/jasmine.js
 installed-tests/js/modules/badOverrides/WarnLib.js
+# Until ESLint merges class fields.
+modules/core/_text.js
 modules/script/jsUnit.js
diff --git a/.eslintrc.yml b/.eslintrc.yml
index 733db371..6887f1cb 100644
--- a/.eslintrc.yml
+++ b/.eslintrc.yml
@@ -253,5 +253,7 @@ globals:
   print: readonly
   printerr: readonly
   window: readonly
+  TextEncoder: readonly
+  TextDecoder: readonly
 parserOptions:
   ecmaVersion: 2020
diff --git a/gjs/byteArray.cpp b/gjs/byteArray.cpp
index 341df69e..5bef1585 100644
--- a/gjs/byteArray.cpp
+++ b/gjs/byteArray.cpp
@@ -37,6 +37,26 @@ static void bytes_unref_arraybuffer(void* contents [[maybe_unused]],
     g_bytes_unref(gbytes);
 }
 
+GJS_JSAPI_RETURN_CONVENTION
+static bool to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
+    JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+    JS::UniqueChars encoding;
+    JS::RootedObject byte_array(cx);
+
+    if (!gjs_parse_call_args(cx, "toString", args, "o|s", "byteArray",
+                             &byte_array, "encoding", &encoding))
+        return false;
+
+    const char* actual_encoding = encoding ? encoding.get() : "utf-8";
+    JS::RootedString str(cx, gjs_decode_from_uint8array(
+                                 cx, byte_array, actual_encoding,
+                                 GjsStringTermination::ZERO_TERMINATED, true));
+    if (!str)
+        return false;
+
+    args.rval().setString(str);
+    return true;
+}
 
 /* Workaround to keep existing code compatible. This function is tacked onto
  * any Uint8Array instances created in situations where previously a ByteArray
@@ -53,24 +73,61 @@ static bool instance_to_string_func(JSContext* cx, unsigned argc,
     if (!gjs_parse_call_args(cx, "toString", args, "|s", "encoding", &encoding))
         return false;
 
-    return to_string_impl(cx, this_obj, encoding.get(), args.rval());
+    const char* actual_encoding = encoding ? encoding.get() : "utf-8";
+    JS::RootedString str(cx, gjs_decode_from_uint8array(
+                                 cx, this_obj, actual_encoding,
+                                 GjsStringTermination::ZERO_TERMINATED, true));
+    if (!str)
+        return false;
+
+    args.rval().setString(str);
+    return true;
 }
 
+/* fromString() function implementation */
 GJS_JSAPI_RETURN_CONVENTION
-static bool define_to_string_func(JSContext* context, unsigned argc, JS::Value* vp) {
-    JS::CallArgs argv = JS::CallArgsFromVp (argc, vp);
-
+static bool from_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
     JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
-    JS::RootedObject obj(context);
-    if (!gjs_parse_call_args(context, "defineToString", args, "o", "obj", &obj))
+
+    if (!args.requireAtLeast(cx, "fromString", 1))
         return false;
 
-    const GjsAtoms& atoms = GjsContextPrivate::atoms(context);
-    if (!JS_DefineFunctionById(context, obj, atoms.to_string(),
+    if (!args[0].isString()) {
+        gjs_throw(cx, "First argument for fromString() should be a string.");
+        return false;
+    }
+
+    // Only check the argument type if there is more than one argument.
+    if (args.length() > 1 && !args[1].isString() && !args[1].isUndefined()) {
+        gjs_throw(cx,
+                  "Second argument for fromString() should be a string or "
+                  "undefined.");
+        return false;
+    }
+
+    GjsAutoChar encoding = nullptr;
+    // If more than one argument is passed and it is a string,
+    // convert it to an encoding label.
+    if (args.length() > 1 && args[1].isString()) {
+        JS::RootedString encoding_str(cx, args[1].toString());
+        JS::UniqueChars chars(JS_EncodeStringToUTF8(cx, encoding_str));
+        encoding = g_strdup(chars.get());
+    }
+
+    JS::RootedString str(cx, args[0].toString());
+    JS::RootedObject uint8array(
+        cx,
+        gjs_encode_to_uint8array(cx, str, encoding ? encoding.get() : "utf-8",
+                                 GjsStringTermination::ZERO_TERMINATED));
+    if (!uint8array)
+        return false;
+
+    const GjsAtoms& atoms = GjsContextPrivate::atoms(cx);
+    if (!JS_DefineFunctionById(cx, uint8array, atoms.to_string(),
                                instance_to_string_func, 1, 0))
         return false;
 
-    argv.rval().setUndefined();
+    args.rval().setObject(*uint8array);
     return true;
 }
 
@@ -80,7 +137,7 @@ from_gbytes_func(JSContext *context,
                  unsigned   argc,
                  JS::Value *vp)
 {
-    JS::CallArgs argv = JS::CallArgsFromVp (argc, vp);
+    JS::CallArgs argv = JS::CallArgsFromVp(argc, vp);
     JS::RootedObject bytes_obj(context);
     GBytes *gbytes;
 
@@ -160,9 +217,9 @@ GByteArray* gjs_byte_array_get_byte_array(JSObject* obj) {
 }
 
 static JSFunctionSpec gjs_byte_array_module_funcs[] = {
+    JS_FN("fromString", from_string_func, 2, 0),
     JS_FN("fromGBytes", from_gbytes_func, 1, 0),
-    JS_FN("defineToString", define_to_string_func, 1, 0),
-    JS_FS_END};
+    JS_FN("toString", to_string_func, 2, 0), JS_FS_END};
 
 bool
 gjs_define_byte_array_stuff(JSContext              *cx,
diff --git a/gjs/jsapi-util-string.cpp b/gjs/jsapi-util-string.cpp
index 5fc1164a..21a643ea 100644
--- a/gjs/jsapi-util-string.cpp
+++ b/gjs/jsapi-util-string.cpp
@@ -98,6 +98,47 @@ JS::UniqueChars gjs_string_to_utf8(JSContext* cx, const JS::Value value) {
     return JS_EncodeStringToUTF8(cx, str);
 }
 
+/**
+ * gjs_lossy_string_from_utf8:
+ *
+ * @brief Converts an array of UTF-8 characters to a JS string.
+ * Instead of throwing, any invalid characters will be converted
+ * to the UTF-8 invalid character fallback.
+ *
+ * @param cx the current #JSContext
+ * @param utf8_string an array of UTF-8 characters
+ * @param value_p a value to store the resulting string in
+ */
+JSString* gjs_lossy_string_from_utf8(JSContext* cx, const char* utf8_string) {
+    JS::ConstUTF8CharsZ chars(utf8_string, strlen(utf8_string));
+    size_t len;
+    JS::TwoByteCharsZ twobyte_chars(
+        JS::LossyUTF8CharsToNewTwoByteCharsZ(cx, chars, &len, js::MallocArena));
+
+    if (!twobyte_chars)
+        return nullptr;
+
+    return JS_NewUCStringCopyN(cx, twobyte_chars.get(), len);
+}
+
+/**
+ * gjs_lossy_string_from_utf8_n:
+ *
+ * @brief Provides the same conversion behavior as gjs_lossy_string_from_utf8
+ * with a fixed length. See gjs_lossy_string_from_utf8()
+ */
+JSString* gjs_lossy_string_from_utf8_n(JSContext* cx, const char* utf8_string,
+                                       size_t len) {
+    JS::UTF8Chars chars(utf8_string, len);
+    size_t outlen;
+    JS::TwoByteCharsZ twobyte_chars(JS::LossyUTF8CharsToNewTwoByteCharsZ(
+        cx, chars, &outlen, js::MallocArena));
+    if (!twobyte_chars)
+        return nullptr;
+
+    return JS_NewUCStringCopyN(cx, twobyte_chars.get(), outlen);
+}
+
 bool
 gjs_string_from_utf8(JSContext             *context,
                      const char            *utf8_string,
@@ -105,10 +146,11 @@ gjs_string_from_utf8(JSContext             *context,
 {
     JS::ConstUTF8CharsZ chars(utf8_string, strlen(utf8_string));
     JS::RootedString str(context, JS_NewStringCopyUTF8Z(context, chars));
-    if (str)
-        value_p.setString(str);
+    if (!str)
+        return false;
 
-    return str != nullptr;
+    value_p.setString(str);
+    return true;
 }
 
 bool
diff --git a/gjs/jsapi-util.h b/gjs/jsapi-util.h
index 4e399f25..0728f404 100644
--- a/gjs/jsapi-util.h
+++ b/gjs/jsapi-util.h
@@ -429,6 +429,11 @@ void gjs_warning_reporter(JSContext*, JSErrorReport* report);
 GJS_JSAPI_RETURN_CONVENTION
 JS::UniqueChars gjs_string_to_utf8(JSContext* cx, const JS::Value string_val);
 GJS_JSAPI_RETURN_CONVENTION
+JSString* gjs_lossy_string_from_utf8(JSContext* cx, const char* utf8_string);
+GJS_JSAPI_RETURN_CONVENTION
+JSString* gjs_lossy_string_from_utf8_n(JSContext* cx, const char* utf8_string,
+                                       size_t len);
+GJS_JSAPI_RETURN_CONVENTION
 bool gjs_string_from_utf8(JSContext             *context,
                           const char            *utf8_string,
                           JS::MutableHandleValue value_p);
diff --git a/gjs/text-encoding.cpp b/gjs/text-encoding.cpp
index 5e706a83..5b2d22a6 100644
--- a/gjs/text-encoding.cpp
+++ b/gjs/text-encoding.cpp
@@ -41,86 +41,240 @@ static void gfree_arraybuffer_contents(void* contents, void*) {
     g_free(contents);
 }
 
-GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
-                         const char* encoding, JS::MutableHandleValue rval) {
-    size_t bytes_written;
-    GError* error = nullptr;
-    GjsAutoChar u16_str = g_convert(reinterpret_cast<char*>(data), len,
-    // Make sure the bytes of the UTF-16 string are laid out in memory
-    // such that we can simply reinterpret_cast<char16_t> them.
+static void gjs_throw_type_error_from_gerror(JSContext* cx, GError* error) {
+    g_return_if_fail(error);
+    gjs_throw_custom(cx, JSProto_TypeError, nullptr, "%s", error->message);
+    g_error_free(error);
+}
+
+// UTF16_CODESET is used to encode and decode UTF-16 buffers with
+// iconv. To ensure the output of iconv is laid out in memory correctly
+// we have to use UTF-16LE on little endian systems and UTF-16BE on big
+// endian systems.
+//
+// This ensures we can simply reinterpret_cast<char16_t> iconv's output.
 #if G_BYTE_ORDER == G_LITTLE_ENDIAN
-                                    "UTF-16LE",
+static const char* UTF16_CODESET = "UTF-16LE";
 #else
-                                    "UTF-16BE",
+static const char* UTF16_CODESET = "UTF-16BE";
 #endif
-                                    encoding, nullptr, /* bytes read */
-                                    &bytes_written, &error);
-    if (!u16_str)
-        return gjs_throw_gerror_message(cx, error);  // frees GError
 
-    // bytes_written should be bytes in a UTF-16 string so should be a multiple
-    // of 2
-    g_assert((bytes_written % 2) == 0);
+static JSString* gjs_lossy_decode_from_uint8array_slow(
+    JSContext* cx, uint8_t* bytes, size_t bytes_len, const char* from_codeset) {
+    GError* error = nullptr;
+    GjsAutoUnref<GCharsetConverter> converter(
+        g_charset_converter_new(UTF16_CODESET, from_codeset, &error));
 
-    // g_convert 0-terminates the string, although the 0 isn't included in
-    // bytes_written
-    JSString* s =
-        JS_NewUCStringCopyZ(cx, reinterpret_cast<char16_t*>(u16_str.get()));
-    if (!s)
-        return false;
+    // This should only throw if an encoding is not available.
+    if (error) {
+        gjs_throw_type_error_from_gerror(cx, error);
+        return nullptr;
+    }
 
-    rval.setString(s);
-    return true;
+    // TODO: We can likely be more intelligent about our initial
+    // allocation and allocate based on bytes_len
+    int buffer_size = 1024;
+
+    // Cast data to correct input types
+    const char* input = reinterpret_cast<const char*>(bytes);
+    size_t input_len = bytes_len;
+
+    // The base string that we'll append to.
+    std::u16string output_str = u"";
+
+    do {
+        // Create a buffer to convert into.
+        char buffer[buffer_size];
+        size_t bytes_written = 0, bytes_read = 0;
+
+        g_converter_convert(G_CONVERTER(converter.get()), input, input_len,
+                            buffer, buffer_size, G_CONVERTER_INPUT_AT_END,
+                            &bytes_read, &bytes_written, &error);
+
+        // If bytes were read, adjust input.
+        if (bytes_read > 0) {
+            input += bytes_read;
+            input_len -= bytes_read;
+        }
+
+        // If bytes were written append them buffer contents to our string
+        // accumulator
+        if (bytes_written > 0) {
+            char16_t* utf16_buffer = reinterpret_cast<char16_t*>(buffer);
+            // UTF-16 uses exactly 2 bytes for every character.
+            output_str.append(utf16_buffer, bytes_written / 2);
+        } else if (error) {
+            // A PARTIAL_INPUT error can only occur if the user does not provide
+            // the full sequence for a multi-byte character, we skip over the
+            // next character and insert a unicode fallback.
+
+            // An INVALID_DATA error occurs when there is no way to decode a
+            // given byte into UTF-16 or the given byte does not exist in the
+            // source encoding.
+            if (g_error_matches(error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA) ||
+                g_error_matches(error, G_IO_ERROR, G_IO_ERROR_PARTIAL_INPUT)) {
+                // If we're already at the end of the string, don't insert a
+                // fallback.
+                if (input_len > 0) {
+                    // Skip the next byte and reduce length by one.
+                    input += 1;
+                    input_len -= 1;
+
+                    // Append the unicode fallback character to the output
+                    output_str.append(u"\ufffd", 1);
+                }
+
+                // Clear the error.
+                g_clear_error(&error);
+            } else if (g_error_matches(error, G_IO_ERROR,
+                                       G_IO_ERROR_NO_SPACE)) {
+                // If the buffer was full increase the buffer
+                // size and re-try the conversion.
+                buffer_size += 512;
+
+                // Clear the error.
+                g_clear_error(&error);
+            }
+        }
+
+        // Stop decoding if an unknown error occurs.
+    } while (input_len > 0 && !error);
+
+    // An unexpected error occured.
+    if (error) {
+        gjs_throw_type_error_from_gerror(cx, error);
+        return nullptr;
+    }
+
+    // Copy the accumulator's data into a JSString of Unicode (UTF-16) chars.
+    return JS_NewUCStringCopyN(cx, output_str.c_str(), output_str.size());
 }
 
-/* implement toString() with an optional encoding arg */
-GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl(JSContext* context, JS::HandleObject byte_array,
-                           const char* encoding, JS::MutableHandleValue rval) {
-    if (!JS_IsUint8Array(byte_array)) {
-        gjs_throw(context,
-                  "Argument to ByteArray.toString() must be a Uint8Array");
-        return false;
+[[nodiscard]] JSString* gjs_decode_from_uint8array_slow(JSContext* cx,
+                                                        uint8_t* input,
+                                                        uint32_t input_len,
+                                                        const char* encoding,
+                                                        bool fatal) {
+    // If the decoding is not fatal we use the lossy decoder.
+    if (!fatal)
+        return gjs_lossy_decode_from_uint8array_slow(cx, input, input_len,
+                                                     encoding);
+
+    size_t bytes_written, bytes_read;
+    GError* error = nullptr;
+
+    GjsAutoChar bytes =
+        g_convert(reinterpret_cast<char*>(input), input_len, UTF16_CODESET,
+                  encoding, &bytes_read, &bytes_written, &error);
+
+    if (error) {
+        gjs_throw_type_error_from_gerror(cx, error);
+        return nullptr;
     }
 
-    bool encoding_is_utf8;
-    uint8_t* data;
+    // bytes_written should be bytes in a UTF-16 string so should be a
+    // multiple of 2
+    g_assert((bytes_written % 2) == 0);
 
-    if (encoding) {
-        /* maybe we should be smarter about utf8 synonyms here.
-         * doesn't matter much though. encoding_is_utf8 is
-         * just an optimization anyway.
-         */
-        encoding_is_utf8 = (strcmp(encoding, "UTF-8") == 0);
-    } else {
-        encoding_is_utf8 = true;
+    // Cast g_convert's output to char16_t and copy the data.
+    const char16_t* unicode_bytes = reinterpret_cast<char16_t*>(bytes.get());
+    return JS_NewUCStringCopyN(cx, unicode_bytes, bytes_written / 2);
+}
+
+[[nodiscard]] static bool is_utf8_label(const char* encoding) {
+    // We could be smarter about utf8 synonyms here.
+    // For now, we handle any casing and trailing/leading
+    // whitespace.
+    //
+    // is_utf8_label is only an optimization, so if a label
+    // doesn't match we just use the slower path.
+    if (strcasecmp(encoding, "utf-8") == 0 || strcasecmp(encoding, "utf8") == 0)
+        return true;
+
+    GjsAutoChar stripped(g_strdup(encoding));
+    return strcasecmp(g_strstrip(stripped), "utf-8") == 0 ||
+           strcasecmp(stripped, "utf8") == 0;
+}
+
+// Finds the length of a given data array, stopping at the first 0 byte.
+[[nodiscard]] static uint32_t zero_terminated_length(uint8_t* data,
+                                                     uint32_t len) {
+    uint8_t *start = data, *end = data + (len * sizeof(uint8_t));
+    uint8_t* found = std::find(start, end, 0);
+    if (found != end)
+        return std::distance(data, found) / sizeof(uint8_t);
+
+    return len;
+}
+
+// decode() function implementation
+JSString* gjs_decode_from_uint8array(JSContext* cx, JS::HandleObject byte_array,
+                                     const char* encoding,
+                                     GjsStringTermination string_termination,
+                                     bool fatal) {
+    if (!JS_IsUint8Array(byte_array)) {
+        gjs_throw(cx, "Argument to decode() must be a Uint8Array");
+        return nullptr;
     }
 
+    uint8_t* data;
     uint32_t len;
     bool is_shared_memory;
     js::GetUint8ArrayLengthAndData(byte_array, &len, &is_shared_memory, &data);
 
-    if (len == 0) {
-        rval.setString(JS_GetEmptyString(context));
-        return true;
-    }
-
+    // If the desired behavior is zero-terminated, calculate the
+    // zero-terminated length of the given data. If the original
+    // length, len, is smaller than the zero-terminated length,
+    // use it.
+    if (len && string_termination == GjsStringTermination::ZERO_TERMINATED)
+        len = std::min(len, zero_terminated_length(data, len));
+
+    // If the calculated length is 0 we can just return an empty string.
+    if (len == 0)
+        return JS_GetEmptyString(cx);
+
+    // Optimization, only use glib's iconv-based converters if we're dealing
+    // with a non-UTF8 encoding. SpiderMonkey has highly optimized UTF-8 decoder
+    // and encoders.
+    bool encoding_is_utf8 = is_utf8_label(encoding);
     if (!encoding_is_utf8)
-        return to_string_impl_slow(context, data, len, encoding, rval);
+        return gjs_decode_from_uint8array_slow(cx, data, len, encoding, fatal);
 
-    // optimization, avoids iconv overhead and runs libmozjs hardwired
-    // utf8-to-utf16
-
-    // If there are any 0 bytes, including the terminating byte, stop at the
-    // first one
-    if (data[len - 1] == 0 || memchr(data, 0, len)) {
-        if (!gjs_string_from_utf8(context, reinterpret_cast<char*>(data), rval))
-            return false;
+    JS::RootedString decoded(cx);
+    if (!fatal) {
+        decoded.set(gjs_lossy_string_from_utf8_n(
+            cx, reinterpret_cast<char*>(data), len));
     } else {
-        if (!gjs_string_from_utf8_n(context, reinterpret_cast<char*>(data), len,
-                                    rval))
-            return false;
+        JS::UTF8Chars chars(reinterpret_cast<char*>(data), len);
+        JS::RootedString str(cx, JS_NewStringCopyUTF8N(cx, chars));
+        decoded.set(str);
+
+        // If an exception occurred, we need to check if the
+        // exception was an InternalError. Unfortunately,
+        // SpiderMonkey's decoder can throw InternalError for some
+        // invalid UTF-8 sources, we have to convert this into a
+        // TypeError to match the Encoding specification.
+        if (!str) {
+            if (!JS_IsExceptionPending(cx))
+                return nullptr;
+            JS::RootedValue exc(cx);
+
+            if (!JS_GetPendingException(cx, &exc) || !exc.isObject())
+                return nullptr;
+
+            JS::RootedObject exc_obj(cx, &exc.toObject());
+            const JSClass* internal_error =
+                js::ProtoKeyToClass(JSProto_InternalError);
+            if (JS_InstanceOf(cx, exc_obj, internal_error, nullptr)) {
+                // Clear the existing exception.
+                JS_ClearPendingException(cx);
+                gjs_throw_custom(
+                    cx, JSProto_TypeError, nullptr,
+                    "The provided encoded data was not valid UTF-8");
+            }
+
+            return nullptr;
+        }
     }
 
     uint8_t* current_data;
@@ -128,78 +282,96 @@ bool to_string_impl(JSContext* context, JS::HandleObject byte_array,
     bool ignore_val;
 
     // If a garbage collection occurs between when we call
-    // js::GetUint8ArrayLengthAndData and return from gjs_string_from_utf8, a
-    // use-after-free corruption can occur if the garbage collector shifts the
-    // location of the Uint8Array's private data. To mitigate this we call
-    // js::GetUint8ArrayLengthAndData again and then compare if the length and
-    // pointer are still the same. If the pointers differ, we use the slow path
-    // to ensure no data corruption occurred. The shared-ness of an array cannot
-    // change between calls, so we ignore it.
+    // js::GetUint8ArrayLengthAndData and return from
+    // gjs_decode_from_uint8array, a use-after-free corruption can occur if the
+    // garbage collector shifts the location of the Uint8Array's private data.
+    // To mitigate this we call js::GetUint8ArrayLengthAndData again and then
+    // compare if the length and pointer are still the same. If the pointers
+    // differ, we use the slow path to ensure no data corruption occurred. The
+    // shared-ness of an array cannot change between calls, so we ignore it.
     js::GetUint8ArrayLengthAndData(byte_array, &current_len, &ignore_val,
                                    &current_data);
 
     // Ensure the private data hasn't changed
-    if (current_len == len && current_data == data)
-        return true;
+    if (current_data == data)
+        return decoded;
+
+    // Length shouldn't change across calls but recalculate
+    // based on the moved data to be sure.
+    if (current_len &&
+        string_termination == GjsStringTermination::ZERO_TERMINATED) {
+        current_len = std::min(
+            current_len, zero_terminated_length(current_data, current_len));
+    }
 
     // This was the UTF-8 optimized path, so we explicitly pass the encoding
-    return to_string_impl_slow(context, current_data, current_len, "UTF-8",
-                               rval);
+    return gjs_decode_from_uint8array_slow(cx, current_data, current_len,
+                                           "UTF-8", fatal);
 }
 
 GJS_JSAPI_RETURN_CONVENTION
-static bool to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
+static bool gjs_decode(JSContext* cx, unsigned argc, JS::Value* vp) {
     JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
-    JS::UniqueChars encoding;
-    JS::RootedObject byte_array(cx);
 
-    if (!gjs_parse_call_args(cx, "toString", args, "o|s", "byteArray",
-                             &byte_array, "encoding", &encoding))
+    JS::RootedObject byte_array(cx);
+    JS::UniqueChars encoding;
+    bool fatal = false;
+    if (!gjs_parse_call_args(cx, "decode", args, "os|b", "byteArray",
+                             &byte_array, "encoding", &encoding, "fatal",
+                             &fatal))
         return false;
 
-    return to_string_impl(cx, byte_array, encoding.get(), args.rval());
+    JS::RootedString decoded(
+        cx, gjs_decode_from_uint8array(cx, byte_array, encoding.get(),
+                                       GjsStringTermination::EXPLICIT_LENGTH,
+                                       fatal));
+    args.rval().setString(decoded);
+    return true;
 }
 
-
-/* fromString() function implementation */
 GJS_JSAPI_RETURN_CONVENTION
-static bool
-from_string_func(JSContext *context,
-                 unsigned   argc,
-                 JS::Value *vp)
-{
-    JS::CallArgs argv = JS::CallArgsFromVp (argc, vp);
-    JS::UniqueChars encoding;
-    JS::UniqueChars utf8;
-    bool encoding_is_utf8;
-    JS::RootedObject obj(context), array_buffer(context);
+static bool gjs_string_to_utf8_n(JSContext* cx, JS::HandleString str,
+                                 char** output, size_t* output_len) {
+    JSLinearString* linear = JS_EnsureLinearString(cx, str);
+    if (!linear)
+        return false;
 
-    if (!gjs_parse_call_args(context, "fromString", argv, "s|s",
-                             "string", &utf8,
-                             "encoding", &encoding))
+    size_t length = JS::GetDeflatedUTF8StringLength(linear);
+    char* bytes = js_pod_malloc<char>(length);
+    if (!bytes)
         return false;
 
-    if (argc > 1) {
-        /* maybe we should be smarter about utf8 synonyms here.
-         * doesn't matter much though. encoding_is_utf8 is
-         * just an optimization anyway.
-         */
-        encoding_is_utf8 = (strcmp(encoding.get(), "UTF-8") == 0);
-    } else {
-        encoding_is_utf8 = true;
-    }
+    size_t deflated_length =
+        JS::DeflateStringToUTF8Buffer(linear, mozilla::Span(bytes, length));
+    g_assert(deflated_length == length);
+
+    *output_len = length;
+    *output = bytes;
+    return true;
+}
 
+// encode() function implementation
+JSObject* gjs_encode_to_uint8array(JSContext* cx, JS::HandleString str,
+                                   const char* encoding,
+                                   GjsStringTermination string_termination) {
+    JS::RootedObject array_buffer(cx);
+
+    bool encoding_is_utf8 = is_utf8_label(encoding);
     if (encoding_is_utf8) {
-        /* optimization? avoids iconv overhead and runs
-         * libmozjs hardwired utf16-to-utf8.
-         */
-        size_t len = strlen(utf8.get());
-        array_buffer =
-            JS::NewArrayBufferWithContents(context, len, utf8.release());
+        char* utf8;
+        size_t utf8_len;
+
+        if (!gjs_string_to_utf8_n(cx, str, &utf8, &utf8_len))
+            return nullptr;
+
+        if (string_termination == GjsStringTermination::ZERO_TERMINATED) {
+            utf8_len = std::min(utf8_len, strlen(utf8));
+        }
+
+        array_buffer = JS::NewArrayBufferWithContents(cx, utf8_len, utf8);
     } else {
-        JSString *str = argv[0].toString();  /* Rooted by argv */
-        GError *error = NULL;
-        char *encoded = NULL;
+        GError* error = nullptr;
+        char* encoded = nullptr;
         gsize bytes_written;
 
         /* Scope for AutoCheckCannotGC, will crash if a GC is triggered
@@ -209,51 +381,157 @@ from_string_func(JSContext *context,
             size_t len;
 
             if (JS_StringHasLatin1Chars(str)) {
-                const JS::Latin1Char *chars =
-                    JS_GetLatin1StringCharsAndLength(context, nogc, str, &len);
-                if (chars == NULL)
-                    return false;
-
-                encoded = g_convert((char *) chars, len,
-                                    encoding.get(),  // to_encoding
-                                    "LATIN1",  /* from_encoding */
-                                    NULL,  /* bytes read */
-                                    &bytes_written, &error);
+                const JS::Latin1Char* chars =
+                    JS_GetLatin1StringCharsAndLength(cx, nogc, str, &len);
+                if (!chars)
+                    return nullptr;
+
+                encoded =
+                    g_convert(reinterpret_cast<const char*>(chars), len,
+                              /* to_encoding */ encoding,
+                              /* from_encoding */ "LATIN1",
+                              /* bytes read */ nullptr, &bytes_written, &error);
             } else {
-                const char16_t *chars =
-                    JS_GetTwoByteStringCharsAndLength(context, nogc, str, &len);
-                if (chars == NULL)
-                    return false;
-
-                encoded = g_convert((char *) chars, len * 2,
-                                    encoding.get(),  // to_encoding
-                                    "UTF-16",  /* from_encoding */
-                                    NULL,  /* bytes read */
-                                    &bytes_written, &error);
+                const char16_t* chars =
+                    JS_GetTwoByteStringCharsAndLength(cx, nogc, str, &len);
+                if (!chars)
+                    return nullptr;
+
+                encoded =
+                    g_convert(reinterpret_cast<const char*>(chars), len * 2,
+                              encoding,  // to_encoding
+                              "UTF-16",  /* from_encoding */
+                              nullptr,   /* bytes read */
+                              &bytes_written, &error);
             }
         }
 
-        if (!encoded)
-            return gjs_throw_gerror_message(context, error);  // frees GError
+        if (!encoded) {
+            gjs_throw_type_error_from_gerror(cx, error);  // frees GError
+            return nullptr;
+        }
 
-        array_buffer =
-            JS::NewExternalArrayBuffer(context, bytes_written, encoded,
-                                       gfree_arraybuffer_contents, nullptr);
+        array_buffer = JS::NewExternalArrayBuffer(
+            cx, bytes_written, encoded, gfree_arraybuffer_contents, nullptr);
     }
 
     if (!array_buffer)
+        return nullptr;
+
+    return JS_NewUint8ArrayWithBuffer(cx, array_buffer, 0, -1);
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_encode_into_uint8array(JSContext* cx, JS::HandleString str,
+                                JS::HandleObject uint8array,
+                                JS::MutableHandleValue rval) {
+    if (!JS_IsUint8Array(uint8array)) {
+        gjs_throw_custom(cx, JSProto_TypeError, nullptr,
+                         "Argument to encodeInto() must be a Uint8Array");
+        return false;
+    }
+
+    auto len = JS_GetTypedArrayByteLength(uint8array);
+    bool shared = JS_GetTypedArraySharedness(uint8array);
+
+    if (shared) {
+        gjs_throw(cx, "Cannot encode data into shared memory.");
+        return false;
+    }
+
+    mozilla::Maybe<mozilla::Tuple<size_t, size_t>> results;
+
+    {
+        JS::AutoCheckCannotGC nogc(cx);
+        uint8_t* data = JS_GetUint8ArrayData(uint8array, &shared, nogc);
+
+        // We already checked for sharedness with JS_GetTypedArraySharedness
+        g_assert(!shared);
+
+        results = JS_EncodeStringToUTF8BufferPartial(
+            cx, str, mozilla::AsWritableChars(mozilla::Span(data, len)));
+    }
+
+    if (!results) {
+        JS_ReportOutOfMemory(cx);
+        return false;
+    }
+
+    size_t read, written;
+
+    mozilla::Tie(read, written) = *results;
+
+    g_assert(written <= len);
+
+    JS::RootedObject result(cx, JS_NewPlainObject(cx));
+    if (!result)
+        return false;
+
+    JS::RootedValue readv(cx, JS::NumberValue(read)),
+        writtenv(cx, JS::NumberValue(written));
+
+    if (!JS_SetProperty(cx, result, "read", readv) ||
+        !JS_SetProperty(cx, result, "written", writtenv)) {
+        return false;
+    }
+
+    rval.setObject(*result);
+    return true;
+}
+
+GJS_JSAPI_RETURN_CONVENTION static bool gjs_encode(JSContext* cx, unsigned argc,
+                                                   JS::Value* vp) {
+    JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+    if (!args.requireAtLeast(cx, "encode", 2))
+        return false;
+
+    if (!args[0].isString()) {
+        gjs_throw(cx, "First argument for encode() should be a string.");
+        return false;
+    }
+
+    JS::RootedString str(cx, args[0].toString());
+    JS::UniqueChars encoding = gjs_string_to_utf8(cx, args[1]);
+    if (!encoding)
+        return false;
+
+    JS::RootedObject uint8array(
+        cx, gjs_encode_to_uint8array(cx, str, encoding.get(),
+                                     GjsStringTermination::EXPLICIT_LENGTH));
+    if (!uint8array)
         return false;
-    obj = JS_NewUint8ArrayWithBuffer(context, array_buffer, 0, -1);
 
-    argv.rval().setObject(*obj);
+    args.rval().setObject(*uint8array);
     return true;
 }
 
+GJS_JSAPI_RETURN_CONVENTION static bool gjs_encode_into(JSContext* cx,
+                                                        unsigned argc,
+                                                        JS::Value* vp) {
+    JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+    if (!args.requireAtLeast(cx, "encodeInto", 2))
+        return false;
+
+    if (!args[0].isString()) {
+        gjs_throw(cx, "First argument for encodeInto() should be a string.");
+        return false;
+    }
+
+    if (!args[1].isObject()) {
+        gjs_throw(cx, "Second argument for encodeInto() should be an object.");
+        return false;
+    }
+
+    JS::RootedString str(cx, args[0].toString());
+    JS::RootedObject uint8array(cx, &args[1].toObject());
+
+    return gjs_encode_into_uint8array(cx, str, uint8array, args.rval());
+}
 
 static JSFunctionSpec gjs_text_encoding_module_funcs[] = {
-    JS_FN("fromString", from_string_func, 2, 0),
-    JS_FN("toString", to_string_func, 2, 0),
-    JS_FS_END};
+    JS_FN("decode", gjs_decode, 3, 0),
+    JS_FN("encodeInto", gjs_encode_into, 2, 0),
+    JS_FN("encode", gjs_encode, 2, 0), JS_FS_END};
 
 bool gjs_define_text_encoding_stuff(JSContext* cx,
                                     JS::MutableHandleObject module) {
diff --git a/gjs/text-encoding.h b/gjs/text-encoding.h
index b389acf2..54a780b2 100644
--- a/gjs/text-encoding.h
+++ b/gjs/text-encoding.h
@@ -15,11 +15,20 @@
 
 #include "gjs/macros.h"
 
-[[nodiscard]] bool to_string_impl(JSContext* cx, JS::HandleObject uint8array,
-                                   const char* encoding,
-                                   JS::MutableHandleValue rval);
+enum class GjsStringTermination {
+    ZERO_TERMINATED,
+    EXPLICIT_LENGTH,
+};
 
-[[nodiscard]] bool gjs_define_text_encoding_stuff(
+[[nodiscard]] JSString* gjs_decode_from_uint8array(
+    JSContext* cx, JS::HandleObject uint8array, const char* encoding,
+    GjsStringTermination string_termination, bool fatal);
+
+[[nodiscard]] JSObject* gjs_encode_to_uint8array(
+    JSContext* cx, JS::HandleString str, const char* encoding,
+    GjsStringTermination string_termination);
+
+GJS_JSAPI_RETURN_CONVENTION bool gjs_define_text_encoding_stuff(
     JSContext* cx, JS::MutableHandleObject module);
 
 #endif  // GJS_TEXT_ENCODING_H_
diff --git a/installed-tests/js/meson.build b/installed-tests/js/meson.build
index e11f1418..cca525c1 100644
--- a/installed-tests/js/meson.build
+++ b/installed-tests/js/meson.build
@@ -94,6 +94,7 @@ subdir('libgjstesttools')
 jasmine_tests = [
     'self',
     'ByteArray',
+    'Encoding',
     'Exceptions',
     'Format',
     'Fundamental',
diff --git a/installed-tests/js/testEncoding.js b/installed-tests/js/testEncoding.js
new file mode 100644
index 00000000..af9ca06a
--- /dev/null
+++ b/installed-tests/js/testEncoding.js
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Copyright 2018-2020 the Deno authors. All rights reserved.
+
+// Modified from 
https://github.com/denoland/deno/blob/923214c53725651792f6d55c5401bf6b475622ea/op_crates/web/08_text_encoding.js
+// Data originally from https://encoding.spec.whatwg.org/encodings.json
+
+describe('Text Encoding', function () {
+    it('textDecoder', function () {
+        const fixture = new Uint8Array([
+            0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
+        ]);
+        const decoder = new TextDecoder();
+        expect(decoder.decode(fixture)).toBe('𝓽𝓮𝔁𝓽');
+    });
+
+    it('textDecoderIgnoreBOM', function () {
+        const fixture = new Uint8Array([
+            0xef, 0xbb, 0xbf, 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 
0x9d, 0x93, 0xbd,
+        ]);
+        const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+        expect(decoder.decode(fixture)).toBe('𝓽𝓮𝔁𝓽');
+    });
+
+    it('textDecoderNotBOM', function () {
+        const fixture = new Uint8Array([
+            0xef, 0xbb, 0x89, 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 
0x9d, 0x93, 0xbd,
+        ]);
+        const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+        expect(decoder.decode(fixture)).toBe('ﻉ𝓽𝓮𝔁𝓽');
+    });
+
+    it('textDecoderASCII', function () {
+        const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
+        const decoder = new TextDecoder('ascii');
+        expect(decoder.decode(fixture)).toBe('‰•Ÿ¿');
+    });
+
+    it('textDecoderErrorEncoding', function () {
+        expect(() => new TextDecoder('Foo')).toThrowError("Invalid encoding label: 'Foo'");
+    });
+
+    it('textDecoderHandlesUndefined', function () {
+        const fixture = undefined;
+        const decoder = new TextDecoder();
+        expect(decoder.decode(fixture)).toBe('');
+    });
+
+    it('textDecoderThrowsOnEmpty', function () {
+        const fixture = '';
+        const decoder = new TextDecoder();
+
+        expect(() => decoder.decode(fixture))
+            .toThrowError('Provided input cannot be converted to ArrayBufferView or ArrayBuffer');
+    });
+
+    it('textDecoderThrowsOnNull', function () {
+        const fixture = null;
+        const decoder = new TextDecoder();
+
+        expect(() => decoder.decode(fixture))
+            .toThrowError('Provided input cannot be converted to ArrayBufferView or ArrayBuffer');
+    });
+
+    it('textEncoder', function () {
+        const fixture = '𝓽𝓮𝔁𝓽';
+        const encoder = new TextEncoder();
+
+        expect(Array.from(encoder.encode(fixture))).toEqual([
+            0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd,
+        ]);
+    });
+
+    it('textEncodeInto', function () {
+        const fixture = 'text';
+        const encoder = new TextEncoder();
+        const bytes = new Uint8Array(5);
+        const result = encoder.encodeInto(fixture, bytes);
+        expect(result.read).toBe(4);
+        expect(result.written).toBe(4);
+
+        expect(Array.from(bytes)).toEqual([0x74, 0x65, 0x78, 0x74, 0x00]);
+    });
+
+    it('textEncodeInto2', function () {
+        const fixture = '𝓽𝓮𝔁𝓽';
+        const encoder = new TextEncoder();
+        const bytes = new Uint8Array(17);
+        const result = encoder.encodeInto(fixture, bytes);
+        expect(result.read).toBe(8);
+        expect(result.written).toBe(16);
+
+        expect(Array.from(bytes)).toEqual([
+            0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81, 0xf0, 0x9d, 0x93, 0xbd, 
0x00,
+        ]);
+    });
+
+    it('textEncodeInto3', function () {
+        const fixture = '𝓽𝓮𝔁𝓽';
+        const encoder = new TextEncoder();
+        const bytes = new Uint8Array(5);
+        const result = encoder.encodeInto(fixture, bytes);
+        expect(result.read).toBe(2);
+        expect(result.written).toBe(4);
+
+        expect(Array.from(bytes)).toEqual([0xf0, 0x9d, 0x93, 0xbd, 0x00]);
+    });
+
+    xit('textDecoderSharedUint8Array', function () {
+        const ab = new SharedArrayBuffer(6);
+        const dataView = new DataView(ab);
+        const charCodeA = 'A'.charCodeAt(0);
+        for (let i = 0; i < ab.byteLength; i++)
+            dataView.setUint8(i, charCodeA + i);
+
+        const ui8 = new Uint8Array(ab);
+        const decoder = new TextDecoder();
+        const actual = decoder.decode(ui8);
+        expect(actual).toBe('ABCDEF');
+    });
+
+    xit('textDecoderSharedInt32Array', function () {
+        const ab = new SharedArrayBuffer(8);
+        const dataView = new DataView(ab);
+        const charCodeA = 'A'.charCodeAt(0);
+        for (let i = 0; i < ab.byteLength; i++)
+            dataView.setUint8(i, charCodeA + i);
+
+        const i32 = new Int32Array(ab);
+        const decoder = new TextDecoder();
+        const actual = decoder.decode(i32);
+        expect(actual).toBe('ABCDEFGH');
+    });
+
+    it('toStringShouldBeWebCompatibility', function () {
+        const encoder = new TextEncoder();
+
+        expect(encoder.toString()).toBe('[object TextEncoder]');
+
+        const decoder = new TextDecoder();
+        expect(decoder.toString()).toBe('[object TextDecoder]');
+    });
+
+    describe('singleByteEncodings', function () {
+        // Straight from https://encoding.spec.whatwg.org/encodings.json
+        const encodingsTable = [
+            {
+                encodings: [
+                    {
+                        labels: [
+                            'unicode-1-1-utf-8',
+                            'unicode11utf8',
+                            'unicode20utf8',
+                            'utf-8',
+                            'utf8',
+                            'x-unicode20utf8',
+                        ],
+                        name: 'UTF-8',
+                    },
+                ],
+                heading: 'The Encoding',
+            },
+            {
+                encodings: [
+                    {
+                        labels: ['866', 'cp866', 'csibm866', 'ibm866'],
+                        name: 'IBM866',
+                    },
+                    {
+                        labels: [
+                            'csisolatin2',
+                            'iso-8859-2',
+                            'iso-ir-101',
+                            'iso8859-2',
+                            'iso88592',
+                            'iso_8859-2',
+                            'iso_8859-2:1987',
+                            'l2',
+                            'latin2',
+                        ],
+                        name: 'ISO-8859-2',
+                    },
+                    {
+                        labels: [
+                            'csisolatin3',
+                            'iso-8859-3',
+                            'iso-ir-109',
+                            'iso8859-3',
+                            'iso88593',
+                            'iso_8859-3',
+                            'iso_8859-3:1988',
+                            'l3',
+                            'latin3',
+                        ],
+                        name: 'ISO-8859-3',
+                    },
+                    {
+                        labels: [
+                            'csisolatin4',
+                            'iso-8859-4',
+                            'iso-ir-110',
+                            'iso8859-4',
+                            'iso88594',
+                            'iso_8859-4',
+                            'iso_8859-4:1988',
+                            'l4',
+                            'latin4',
+                        ],
+                        name: 'ISO-8859-4',
+                    },
+                    {
+                        labels: [
+                            'csisolatincyrillic',
+                            'cyrillic',
+                            'iso-8859-5',
+                            'iso-ir-144',
+                            'iso8859-5',
+                            'iso88595',
+                            'iso_8859-5',
+                            'iso_8859-5:1988',
+                        ],
+                        name: 'ISO-8859-5',
+                    },
+                    {
+                        labels: [
+                            'arabic',
+                            'asmo-708',
+                            'csiso88596e',
+                            'csiso88596i',
+                            'csisolatinarabic',
+                            'ecma-114',
+                            'iso-8859-6',
+                            'iso-8859-6-e',
+                            'iso-8859-6-i',
+                            'iso-ir-127',
+                            'iso8859-6',
+                            'iso88596',
+                            'iso_8859-6',
+                            'iso_8859-6:1987',
+                        ],
+                        name: 'ISO-8859-6',
+                    },
+                    {
+                        labels: [
+                            'csisolatingreek',
+                            'ecma-118',
+                            'elot_928',
+                            'greek',
+                            'greek8',
+                            'iso-8859-7',
+                            'iso-ir-126',
+                            'iso8859-7',
+                            'iso88597',
+                            'iso_8859-7',
+                            'iso_8859-7:1987',
+                            'sun_eu_greek',
+                        ],
+                        name: 'ISO-8859-7',
+                    },
+                    {
+                        labels: [
+                            'csiso88598e',
+                            'csisolatinhebrew',
+                            'hebrew',
+                            'iso-8859-8',
+                            'iso-8859-8-e',
+                            'iso-ir-138',
+                            'iso8859-8',
+                            'iso88598',
+                            'iso_8859-8',
+                            'iso_8859-8:1988',
+                            'visual',
+                        ],
+                        name: 'ISO-8859-8',
+                    },
+                    {
+                        labels: ['csiso88598i', 'iso-8859-8-i', 'logical'],
+                        name: 'ISO-8859-8-I',
+                    },
+                    {
+                        labels: [
+                            'csisolatin6',
+                            'iso-8859-10',
+                            'iso-ir-157',
+                            'iso8859-10',
+                            'iso885910',
+                            'l6',
+                            'latin6',
+                        ],
+                        name: 'ISO-8859-10',
+                    },
+                    {
+                        labels: ['iso-8859-13', 'iso8859-13', 'iso885913'],
+                        name: 'ISO-8859-13',
+                    },
+                    {
+                        labels: ['iso-8859-14', 'iso8859-14', 'iso885914'],
+                        name: 'ISO-8859-14',
+                    },
+                    {
+                        labels: [
+                            'csisolatin9',
+                            'iso-8859-15',
+                            'iso8859-15',
+                            'iso885915',
+                            'iso_8859-15',
+                            'l9',
+                        ],
+                        name: 'ISO-8859-15',
+                    },
+                    {
+                        labels: ['iso-8859-16'],
+                        name: 'ISO-8859-16',
+                    },
+                    {
+                        labels: ['cskoi8r', 'koi', 'koi8', 'koi8-r', 'koi8_r'],
+                        name: 'KOI8-R',
+                    },
+                    {
+                        labels: ['koi8-ru', 'koi8-u'],
+                        name: 'KOI8-U',
+                    },
+                    {
+                        labels: ['csmacintosh', 'mac', 'macintosh', 'x-mac-roman'],
+                        name: 'macintosh',
+                    },
+                    {
+                        labels: [
+                            'dos-874',
+                            'iso-8859-11',
+                            'iso8859-11',
+                            'iso885911',
+                            'tis-620',
+                            'windows-874',
+                        ],
+                        name: 'windows-874',
+                    },
+                    {
+                        labels: ['cp1250', 'windows-1250', 'x-cp1250'],
+                        name: 'windows-1250',
+                    },
+                    {
+                        labels: ['cp1251', 'windows-1251', 'x-cp1251'],
+                        name: 'windows-1251',
+                    },
+                    {
+                        labels: [
+                            'ansi_x3.4-1968',
+                            'ascii',
+                            'cp1252',
+                            'cp819',
+                            'csisolatin1',
+                            'ibm819',
+                            'iso-8859-1',
+                            'iso-ir-100',
+                            'iso8859-1',
+                            'iso88591',
+                            'iso_8859-1',
+                            'iso_8859-1:1987',
+                            'l1',
+                            'latin1',
+                            'us-ascii',
+                            'windows-1252',
+                            'x-cp1252',
+                        ],
+                        name: 'windows-1252',
+                    },
+                    {
+                        labels: ['cp1253', 'windows-1253', 'x-cp1253'],
+                        name: 'windows-1253',
+                    },
+                    {
+                        labels: [
+                            'cp1254',
+                            'csisolatin5',
+                            'iso-8859-9',
+                            'iso-ir-148',
+                            'iso8859-9',
+                            'iso88599',
+                            'iso_8859-9',
+                            'iso_8859-9:1989',
+                            'l5',
+                            'latin5',
+                            'windows-1254',
+                            'x-cp1254',
+                        ],
+                        name: 'windows-1254',
+                    },
+                    {
+                        labels: ['cp1255', 'windows-1255', 'x-cp1255'],
+                        name: 'windows-1255',
+                    },
+                    {
+                        labels: ['cp1256', 'windows-1256', 'x-cp1256'],
+                        name: 'windows-1256',
+                    },
+                    {
+                        labels: ['cp1257', 'windows-1257', 'x-cp1257'],
+                        name: 'windows-1257',
+                    },
+                    {
+                        labels: ['cp1258', 'windows-1258', 'x-cp1258'],
+                        name: 'windows-1258',
+                    },
+                    {
+                        labels: ['x-mac-cyrillic', 'x-mac-ukrainian'],
+                        name: 'x-mac-cyrillic',
+                    },
+                ],
+                heading: 'Legacy single-byte encodings',
+            },
+            {
+                encodings: [
+                    {
+                        labels: [
+                            'chinese',
+                            'csgb2312',
+                            'csiso58gb231280',
+                            'gb2312',
+                            'gb_2312',
+                            'gb_2312-80',
+                            'gbk',
+                            'iso-ir-58',
+                            'x-gbk',
+                        ],
+                        name: 'GBK',
+                    },
+                    {
+                        labels: ['gb18030'],
+                        name: 'gb18030',
+                    },
+                ],
+                heading: 'Legacy multi-byte Chinese (simplified) encodings',
+            },
+            {
+                encodings: [
+                    {
+                        labels: ['big5', 'big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
+                        name: 'Big5',
+                    },
+                ],
+                heading: 'Legacy multi-byte Chinese (traditional) encodings',
+            },
+            {
+                encodings: [
+                    {
+                        labels: ['cseucpkdfmtjapanese', 'euc-jp', 'x-euc-jp'],
+                        name: 'EUC-JP',
+                    },
+                    {
+                        labels: ['csiso2022jp', 'iso-2022-jp'],
+                        name: 'ISO-2022-JP',
+                    },
+                    {
+                        labels: [
+                            'csshiftjis',
+                            'ms932',
+                            'ms_kanji',
+                            'shift-jis',
+                            'shift_jis',
+                            'sjis',
+                            'windows-31j',
+                            'x-sjis',
+                        ],
+                        name: 'Shift_JIS',
+                    },
+                ],
+                heading: 'Legacy multi-byte Japanese encodings',
+            },
+            {
+                encodings: [
+                    {
+                        labels: [
+                            'cseuckr',
+                            'csksc56011987',
+                            'euc-kr',
+                            'iso-ir-149',
+                            'korean',
+                            'ks_c_5601-1987',
+                            'ks_c_5601-1989',
+                            'ksc5601',
+                            'ksc_5601',
+                            'windows-949',
+                        ],
+                        name: 'EUC-KR',
+                    },
+                ],
+                heading: 'Legacy multi-byte Korean encodings',
+            },
+            {
+                encodings: [
+                    {
+                        labels: [
+                            'csiso2022kr',
+                            'hz-gb-2312',
+                            'iso-2022-cn',
+                            'iso-2022-cn-ext',
+                            'iso-2022-kr',
+                            'replacement',
+                        ],
+                        name: 'replacement',
+                    },
+                    {
+                        labels: ['unicodefffe', 'utf-16be'],
+                        name: 'UTF-16BE',
+                    },
+                    {
+                        labels: [
+                            'csunicode',
+                            'iso-10646-ucs-2',
+                            'ucs-2',
+                            'unicode',
+                            'unicodefeff',
+                            'utf-16',
+                            'utf-16le',
+                        ],
+                        name: 'UTF-16LE',
+                    },
+                    {
+                        labels: ['x-user-defined'],
+                        name: 'x-user-defined',
+                    },
+                ],
+                heading: 'Legacy miscellaneous encodings',
+            },
+        ];
+
+        const singleByteEncodings = encodingsTable.filter(group => {
+            return group.heading === 'Legacy single-byte encodings';
+        })[0].encodings;
+
+        // https://encoding.spec.whatwg.org/indexes.json
+        const singleByteIndexes = {
+            'IBM866': [
+                1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 
1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 
1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 9617, 9618, 9619, 
9474, 9508, 9569, 9570, 9558, 9557, 9571, 9553, 9559, 9565, 9564, 9563, 9488, 9492, 9524, 9516, 9500, 9472, 
9532, 9566, 9567, 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575, 9576, 9572, 9573, 9561, 9560, 9554, 9555, 
9579, 9578, 9496, 9484, 9608, 9604, 9612, 9616, 9600, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 
1097, 1098, 1099, 1100, 1101, 1102, 1103, 1025, 1105, 1028, 1108, 1031, 1111, 1038, 1118, 176, 8729, 183, 
8730, 8470, 164, 9632, 160,
+            ],
+            'ISO-8859-2': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 728, 321, 164, 317, 346, 167, 
168, 352, 350, 356, 377, 173, 381, 379, 176, 261, 731, 322, 180, 318, 347, 711, 184, 353, 351, 357, 378, 733, 
382, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 272, 323, 327, 211, 
212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 263, 231, 269, 233, 
281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 252, 253, 355, 729,
+            ],
+            'ISO-8859-3': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 294, 728, 163, 164, null, 292, 
167, 168, 304, 350, 286, 308, 173, null, 379, 176, 295, 178, 179, 180, 181, 293, 183, 184, 305, 351, 287, 
309, 189, null, 380, 192, 193, 194, null, 196, 266, 264, 199, 200, 201, 202, 203, 204, 205, 206, 207, null, 
209, 210, 211, 212, 288, 214, 215, 284, 217, 218, 219, 220, 364, 348, 223, 224, 225, 226, null, 228, 267, 
265, 231, 232, 233, 234, 235, 236, 237, 238, 239, null, 241, 242, 243, 244, 289, 246, 247, 285, 249, 250, 
251, 252, 365, 349, 729,
+            ],
+            'ISO-8859-4': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 312, 342, 164, 296, 315, 167, 
168, 352, 274, 290, 358, 173, 381, 175, 176, 261, 731, 343, 180, 297, 316, 711, 184, 353, 275, 291, 359, 330, 
382, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 298, 272, 325, 332, 310, 
212, 213, 214, 215, 216, 370, 218, 219, 220, 360, 362, 223, 257, 225, 226, 227, 228, 229, 230, 303, 269, 233, 
281, 235, 279, 237, 238, 299, 273, 326, 333, 311, 244, 245, 246, 247, 248, 371, 250, 251, 252, 361, 363, 729,
+            ],
+            'ISO-8859-5': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 1025, 1026, 1027, 1028, 1029, 
1030, 1031, 1032, 1033, 1034, 1035, 1036, 173, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 
1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 
1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 
1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 
1102, 1103, 8470, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 167, 1118, 1119,
+            ],
+            'ISO-8859-6': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, null, null, null, 164, null, null, 
null,
+                null, null, null, null, 1548, 173, null, null,
+                null, null, null, null, null, null, null, null,
+                null, null, null, 1563, null, null, null, 1567, null, 1569, 1570, 1571, 1572, 1573, 1574, 
1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 
1593, 1594, null, null, null, null, null,
+                1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 
1615, 1616, 1617, 1618, null, null, null, null, null,
+                null, null, null, null, null, null, null, null,
+            ],
+            'ISO-8859-7': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8216, 8217, 163, 8364, 8367, 166, 
167, 168, 169, 890, 171, 172, 173, null, 8213, 176, 177, 178, 179, 900, 901, 902, 183, 904, 905, 906, 187, 
908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 
null, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 
951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 
973, 974, null,
+            ],
+            'ISO-8859-8': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, null, 162, 163, 164, 165, 166, 
167, 168, 169, 215, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 247, 187, 188, 
189, 190, null,
+                null, null, null, null, null, null, null, null,
+                null, null, null, null, null, null, null, null,
+                null, null, null, null, null, null, null, null,
+                null, null, null, null, null, null, null, 8215, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 
1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 
1513, 1514, null, null, 8206, 8207, null,
+            ],
+            'ISO-8859-10': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 274, 290, 298, 296, 310, 167, 
315, 272, 352, 358, 381, 173, 362, 330, 176, 261, 275, 291, 299, 297, 311, 183, 316, 273, 353, 359, 382, 
8213, 363, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 207, 208, 325, 
332, 211, 212, 213, 214, 360, 216, 370, 218, 219, 220, 221, 222, 223, 257, 225, 226, 227, 228, 229, 230, 303, 
269, 233, 281, 235, 279, 237, 238, 239, 240, 326, 333, 243, 244, 245, 246, 361, 248, 371, 250, 251, 252, 253, 
254, 312,
+            ],
+            'ISO-8859-13': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8221, 162, 163, 164, 8222, 166, 
167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 8220, 181, 182, 183, 248, 185, 343, 187, 
188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 315, 352, 323, 
325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 229, 281, 275, 
269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 363, 252, 380, 
382, 8217,
+            ],
+            'ISO-8859-14': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 7682, 7683, 163, 266, 267, 7690, 
167, 7808, 169, 7810, 7691, 7922, 173, 174, 376, 7710, 7711, 288, 289, 7744, 7745, 182, 7766, 7809, 7767, 
7811, 7776, 7923, 7812, 7813, 7777, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 
206, 207, 372, 209, 210, 211, 212, 213, 214, 7786, 216, 217, 218, 219, 220, 221, 374, 223, 224, 225, 226, 
227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 373, 241, 242, 243, 244, 245, 246, 7787, 
248, 249, 250, 251, 252, 253, 375, 255,
+            ],
+            'ISO-8859-15': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 8364, 165, 352, 
167, 353, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 381, 181, 182, 183, 382, 185, 186, 187, 338, 
339, 376, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 
211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 
233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 
255,
+            ],
+            'ISO-8859-16': [
+                128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 261, 321, 8364, 8222, 352, 
167, 353, 169, 536, 171, 377, 173, 378, 379, 176, 177, 268, 322, 381, 8221, 182, 183, 382, 269, 537, 187, 
338, 339, 376, 380, 192, 193, 194, 258, 196, 262, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 272, 323, 
210, 211, 212, 336, 214, 346, 368, 217, 218, 219, 220, 280, 538, 223, 224, 225, 226, 259, 228, 263, 230, 231, 
232, 233, 234, 235, 236, 237, 238, 239, 273, 324, 242, 243, 244, 337, 246, 347, 369, 249, 250, 251, 252, 281, 
539, 255,
+            ],
+            'KOI8-R': [
+                9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612, 
9616, 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553, 
9554, 1105, 9555, 9556, 9557, 9558, 9559, 9560, 9561, 9562, 9563, 9564, 9565, 9566, 9567, 9568, 9569, 1025, 
9570, 9571, 9572, 9573, 9574, 9575, 9576, 9577, 9578, 9579, 9580, 169, 1102, 1072, 1073, 1094, 1076, 1077, 
1092, 1075, 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074, 
1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098, 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048, 
1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064, 
1069, 1065, 1063, 1066,
+            ],
+            'KOI8-U': [
+                9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612, 
9616, 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553, 
9554, 1105, 1108, 9556, 1110, 1111, 9559, 9560, 9561, 9562, 9563, 1169, 1118, 9566, 9567, 9568, 9569, 1025, 
1028, 9571, 1030, 1031, 9574, 9575, 9576, 9577, 9578, 1168, 1038, 169, 1102, 1072, 1073, 1094, 1076, 1077, 
1092, 1075, 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074, 
1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098, 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048, 
1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064, 
1069, 1065, 1063, 1066,
+            ],
+            'macintosh': [
+                196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233, 232, 234, 235, 
237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249, 251, 252, 8224, 176, 162, 163, 167, 8226, 182, 
223, 174, 169, 8482, 180, 168, 8800, 198, 216, 8734, 177, 8804, 8805, 165, 181, 8706, 8721, 8719, 960, 8747, 
170, 186, 937, 230, 248, 191, 161, 172, 8730, 402, 8776, 8710, 171, 187, 8230, 160, 192, 195, 213, 338, 339, 
8211, 8212, 8220, 8221, 8216, 8217, 247, 9674, 255, 376, 8260, 8364, 8249, 8250, 64257, 64258, 8225, 183, 
8218, 8222, 8240, 194, 202, 193, 203, 200, 205, 206, 207, 204, 211, 212, 63743, 210, 218, 219, 217, 305, 710, 
732, 175, 728, 729, 730, 184, 733, 731, 711,
+            ],
+            'windows-874': [
+                8364, 129, 130, 131, 132, 8230, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 8216, 
8217, 8220, 8221, 8226, 8211, 8212, 152, 153, 154, 155, 156, 157, 158, 159, 160, 3585, 3586, 3587, 3588, 
3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606, 
3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, 
3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 3642, 
null, null, null, null, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660, 
3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, null, null, null, 
null,
+            ],
+            'windows-1250': [
+                8364, 129, 8218, 131, 8222, 8230, 8224, 8225, 136, 8240, 352, 8249, 346, 356, 381, 377, 144, 
8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 353, 8250, 347, 357, 382, 378, 160, 711, 728, 321, 164, 
260, 166, 167, 168, 169, 350, 171, 172, 173, 174, 379, 176, 177, 731, 322, 180, 181, 182, 183, 184, 261, 351, 
187, 317, 733, 318, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 272, 
323, 327, 211, 212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 263, 
231, 269, 233, 281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 252, 
253, 355, 729,
+            ],
+            'windows-1251': [
+                1026, 1027, 8218, 1107, 8222, 8230, 8224, 8225, 8364, 8240, 1033, 8249, 1034, 1036, 1035, 
1039, 1106, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 1113, 8250, 1114, 1116, 1115, 1119, 160, 
1038, 1118, 1032, 164, 1168, 166, 167, 1025, 169, 1028, 171, 172, 173, 174, 1031, 176, 177, 1030, 1110, 1169, 
181, 182, 183, 1105, 8470, 1108, 187, 1112, 1029, 1109, 1111, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 
1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 
1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 
1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 
1102, 1103,
+            ],
+            'windows-1252': [
+                8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143, 144, 
8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376, 160, 161, 162, 163, 164, 
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 
187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 
209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 
231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 
253, 254, 255,
+            ],
+            'windows-1253': [
+                8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 136, 8240, 138, 8249, 140, 141, 142, 143, 144, 
8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 154, 8250, 156, 157, 158, 159, 160, 901, 902, 163, 164, 
165, 166, 167, 168, 169, null, 171, 172, 173, 174, 8213, 176, 177, 178, 179, 900, 181, 182, 183, 904, 905, 
906, 187, 908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 
928, 929, null, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 
949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 
971, 972, 973, 974, null,
+            ],
+            'windows-1254': [
+                8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 142, 143, 144, 
8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 158, 376, 160, 161, 162, 163, 164, 
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 
187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 286, 
209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 304, 350, 223, 224, 225, 226, 227, 228, 229, 230, 
231, 232, 233, 234, 235, 236, 237, 238, 239, 287, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 
305, 351, 255,
+            ],
+            'windows-1255': [
+                8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 138, 8249, 140, 141, 142, 143, 144, 
8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 154, 8250, 156, 157, 158, 159, 160, 161, 162, 163, 8362, 
165, 166, 167, 168, 169, 215, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 247, 
187, 188, 189, 190, 191, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 
1470, 1471, 1472, 1473, 1474, 1475, 1520, 1521, 1522, 1523, 1524, null, null, null, null, null, null, null,
+                1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 
1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, null, null, 8206, 8207, null,
+            ],
+            'windows-1256': [
+                8364, 1662, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 1657, 8249, 338, 1670, 1688, 1672, 
1711, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 1705, 8482, 1681, 8250, 339, 8204, 8205, 1722, 160, 1548, 
162, 163, 164, 165, 166, 167, 168, 169, 1726, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 
183, 184, 185, 1563, 187, 188, 189, 190, 1567, 1729, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 
1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 215, 1591, 1592, 1593, 1594, 
1600, 1601, 1602, 1603, 224, 1604, 226, 1605, 1606, 1607, 1608, 231, 232, 233, 234, 235, 1609, 1610, 238, 
239, 1611, 1612, 1613, 1614, 244, 1615, 1616, 247, 1617, 249, 1618, 251, 252, 8206, 8207, 1746,
+            ],
+            'windows-1257': [
+                8364, 129, 8218, 131, 8222, 8230, 8224, 8225, 136, 8240, 138, 8249, 140, 168, 711, 184, 144, 
8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 154, 8250, 156, 175, 731, 159, 160, null, 162, 163, 164, 
null, 166, 167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 180, 181, 182, 183, 248, 185, 
343, 187, 188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 315, 
352, 323, 325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 229, 
281, 275, 269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 363, 
252, 380, 382, 729,
+            ],
+            'windows-1258': [
+                8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 138, 8249, 338, 141, 142, 143, 144, 
8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 154, 8250, 339, 157, 158, 376, 160, 161, 162, 163, 164, 
165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 
187, 188, 189, 190, 191, 192, 193, 194, 258, 196, 197, 198, 199, 200, 201, 202, 203, 768, 205, 206, 207, 272, 
209, 777, 211, 212, 416, 214, 215, 216, 217, 218, 219, 220, 431, 771, 223, 224, 225, 226, 259, 228, 229, 230, 
231, 232, 233, 234, 235, 769, 237, 238, 239, 273, 241, 803, 243, 244, 417, 246, 247, 248, 249, 250, 251, 252, 
432, 8363, 255,
+            ],
+            'x-mac-cyrillic': [
+                1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 
1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 8224, 
176, 1168, 163, 167, 8226, 182, 1030, 174, 169, 8482, 1026, 1106, 8800, 1027, 1107, 8734, 177, 8804, 8805, 
1110, 181, 1169, 1032, 1028, 1108, 1031, 1111, 1033, 1113, 1034, 1114, 1112, 1029, 172, 8730, 402, 8776, 
8710, 171, 187, 8230, 160, 1035, 1115, 1036, 1116, 1109, 8211, 8212, 8220, 8221, 8216, 8217, 247, 8222, 1038, 
1118, 1039, 1119, 8470, 1025, 1105, 1103, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 
1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 
1101, 1102, 8364,
+            ],
+        };
+
+        function assertDecode(data, encoding) {
+            if (encoding === 'ISO-8859-8-I')
+                encoding = 'ISO-8859-8';
+
+            for (let i = 0, l = data.length; i < l; i++) {
+                const cp = data.charCodeAt(i);
+                let expectedCp = i < 0x80 ? i : singleByteIndexes[encoding][i - 0x80];
+                if (typeof expectedCp === 'undefined' || expectedCp === null)
+                    expectedCp = 0xfffd;
+
+                expect(cp).toBe(expectedCp);
+            }
+        }
+        const buffer = new ArrayBuffer(255);
+        const view = new Uint8Array(buffer);
+
+        for (let i = 0, l = view.byteLength; i < l; i++)
+            view[i] = i;
+
+
+        for (let i = 0, l = singleByteEncodings.length; i < l; i++) {
+            const encoding = singleByteEncodings[i];
+
+            it(`singleByteEncoding ${encoding.name}`, function () {
+                for (let i2 = 0, l2 = encoding.labels.length; i2 < l2; i2++) {
+                    const label = encoding.labels[i2];
+                    const decoder = new TextDecoder(label);
+                    const data = decoder.decode(view);
+
+                    expect(decoder.encoding).toBe(encoding.name.toLowerCase());
+                    assertDecode(data, encoding.name);
+                }
+            });
+        }
+    });
+});
diff --git a/js.gresource.xml b/js.gresource.xml
index fc55e597..fc64a8f6 100644
--- a/js.gresource.xml
+++ b/js.gresource.xml
@@ -42,8 +42,11 @@
 
     <file>modules/core/_cairo.js</file>
     <file>modules/core/_common.js</file>
+    <file>modules/core/_encodings.js</file>
+    <file>modules/core/_singleByteEncodings.js</file>
     <file>modules/core/_format.js</file>
     <file>modules/core/_gettext.js</file>
     <file>modules/core/_signals.js</file>
+    <file>modules/core/_text.js</file>
   </gresource>
 </gresources>
diff --git a/modules/core/_encodings.js b/modules/core/_encodings.js
new file mode 100644
index 00000000..cfbfc2a5
--- /dev/null
+++ b/modules/core/_encodings.js
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Node.js contributors. All rights reserved.
+
+// Modified from 
https://github.com/nodejs/node/blob/78680c1cbc8b0c435963bc512e826b2a6227c315/lib/internal/encoding.js
+// Data from https://encoding.spec.whatwg.org/encodings.json
+
+/* exported getEncodingFromLabel */
+
+const encodings = new Map([
+    ['unicode-1-1-utf-8', 'utf-8'],
+    ['unicode11utf8', 'utf-8'],
+    ['unicode20utf8', 'utf-8'],
+    ['utf-8', 'utf-8'],
+    ['utf8', 'utf-8'],
+    ['x-unicode20utf8', 'utf-8'],
+    ['866', 'ibm866'],
+    ['cp866', 'ibm866'],
+    ['csibm866', 'ibm866'],
+    ['ibm866', 'ibm866'],
+    ['csisolatin2', 'iso-8859-2'],
+    ['iso-8859-2', 'iso-8859-2'],
+    ['iso-ir-101', 'iso-8859-2'],
+    ['iso8859-2', 'iso-8859-2'],
+    ['iso88592', 'iso-8859-2'],
+    ['iso_8859-2', 'iso-8859-2'],
+    ['iso_8859-2:1987', 'iso-8859-2'],
+    ['l2', 'iso-8859-2'],
+    ['latin2', 'iso-8859-2'],
+    ['csisolatin3', 'iso-8859-3'],
+    ['iso-8859-3', 'iso-8859-3'],
+    ['iso-ir-109', 'iso-8859-3'],
+    ['iso8859-3', 'iso-8859-3'],
+    ['iso88593', 'iso-8859-3'],
+    ['iso_8859-3', 'iso-8859-3'],
+    ['iso_8859-3:1988', 'iso-8859-3'],
+    ['l3', 'iso-8859-3'],
+    ['latin3', 'iso-8859-3'],
+    ['csisolatin4', 'iso-8859-4'],
+    ['iso-8859-4', 'iso-8859-4'],
+    ['iso-ir-110', 'iso-8859-4'],
+    ['iso8859-4', 'iso-8859-4'],
+    ['iso88594', 'iso-8859-4'],
+    ['iso_8859-4', 'iso-8859-4'],
+    ['iso_8859-4:1988', 'iso-8859-4'],
+    ['l4', 'iso-8859-4'],
+    ['latin4', 'iso-8859-4'],
+    ['csisolatincyrillic', 'iso-8859-5'],
+    ['cyrillic', 'iso-8859-5'],
+    ['iso-8859-5', 'iso-8859-5'],
+    ['iso-ir-144', 'iso-8859-5'],
+    ['iso8859-5', 'iso-8859-5'],
+    ['iso88595', 'iso-8859-5'],
+    ['iso_8859-5', 'iso-8859-5'],
+    ['iso_8859-5:1988', 'iso-8859-5'],
+    ['arabic', 'iso-8859-6'],
+    ['asmo-708', 'iso-8859-6'],
+    ['csiso88596e', 'iso-8859-6'],
+    ['csiso88596i', 'iso-8859-6'],
+    ['csisolatinarabic', 'iso-8859-6'],
+    ['ecma-114', 'iso-8859-6'],
+    ['iso-8859-6', 'iso-8859-6'],
+    ['iso-8859-6-e', 'iso-8859-6'],
+    ['iso-8859-6-i', 'iso-8859-6'],
+    ['iso-ir-127', 'iso-8859-6'],
+    ['iso8859-6', 'iso-8859-6'],
+    ['iso88596', 'iso-8859-6'],
+    ['iso_8859-6', 'iso-8859-6'],
+    ['iso_8859-6:1987', 'iso-8859-6'],
+    ['csisolatingreek', 'iso-8859-7'],
+    ['ecma-118', 'iso-8859-7'],
+    ['elot_928', 'iso-8859-7'],
+    ['greek', 'iso-8859-7'],
+    ['greek8', 'iso-8859-7'],
+    ['iso-8859-7', 'iso-8859-7'],
+    ['iso-ir-126', 'iso-8859-7'],
+    ['iso8859-7', 'iso-8859-7'],
+    ['iso88597', 'iso-8859-7'],
+    ['iso_8859-7', 'iso-8859-7'],
+    ['iso_8859-7:1987', 'iso-8859-7'],
+    ['sun_eu_greek', 'iso-8859-7'],
+    ['csiso88598e', 'iso-8859-8'],
+    ['csisolatinhebrew', 'iso-8859-8'],
+    ['hebrew', 'iso-8859-8'],
+    ['iso-8859-8', 'iso-8859-8'],
+    ['iso-8859-8-e', 'iso-8859-8'],
+    ['iso-ir-138', 'iso-8859-8'],
+    ['iso8859-8', 'iso-8859-8'],
+    ['iso88598', 'iso-8859-8'],
+    ['iso_8859-8', 'iso-8859-8'],
+    ['iso_8859-8:1988', 'iso-8859-8'],
+    ['visual', 'iso-8859-8'],
+    ['csiso88598i', 'iso-8859-8-i'],
+    ['iso-8859-8-i', 'iso-8859-8-i'],
+    ['logical', 'iso-8859-8-i'],
+    ['csisolatin6', 'iso-8859-10'],
+    ['iso-8859-10', 'iso-8859-10'],
+    ['iso-ir-157', 'iso-8859-10'],
+    ['iso8859-10', 'iso-8859-10'],
+    ['iso885910', 'iso-8859-10'],
+    ['l6', 'iso-8859-10'],
+    ['latin6', 'iso-8859-10'],
+    ['iso-8859-13', 'iso-8859-13'],
+    ['iso8859-13', 'iso-8859-13'],
+    ['iso885913', 'iso-8859-13'],
+    ['iso-8859-14', 'iso-8859-14'],
+    ['iso8859-14', 'iso-8859-14'],
+    ['iso885914', 'iso-8859-14'],
+    ['csisolatin9', 'iso-8859-15'],
+    ['iso-8859-15', 'iso-8859-15'],
+    ['iso8859-15', 'iso-8859-15'],
+    ['iso885915', 'iso-8859-15'],
+    ['iso_8859-15', 'iso-8859-15'],
+    ['l9', 'iso-8859-15'],
+    ['iso-8859-16', 'iso-8859-16'],
+    ['cskoi8r', 'koi8-r'],
+    ['koi', 'koi8-r'],
+    ['koi8', 'koi8-r'],
+    ['koi8-r', 'koi8-r'],
+    ['koi8_r', 'koi8-r'],
+    ['koi8-ru', 'koi8-u'],
+    ['koi8-u', 'koi8-u'],
+    ['csmacintosh', 'macintosh'],
+    ['mac', 'macintosh'],
+    ['macintosh', 'macintosh'],
+    ['x-mac-roman', 'macintosh'],
+    ['dos-874', 'windows-874'],
+    ['iso-8859-11', 'windows-874'],
+    ['iso8859-11', 'windows-874'],
+    ['iso885911', 'windows-874'],
+    ['tis-620', 'windows-874'],
+    ['windows-874', 'windows-874'],
+    ['cp1250', 'windows-1250'],
+    ['windows-1250', 'windows-1250'],
+    ['x-cp1250', 'windows-1250'],
+    ['cp1251', 'windows-1251'],
+    ['windows-1251', 'windows-1251'],
+    ['x-cp1251', 'windows-1251'],
+    ['ansi_x3.4-1968', 'windows-1252'],
+    ['ascii', 'windows-1252'],
+    ['cp1252', 'windows-1252'],
+    ['cp819', 'windows-1252'],
+    ['csisolatin1', 'windows-1252'],
+    ['ibm819', 'windows-1252'],
+    ['iso-8859-1', 'windows-1252'],
+    ['iso-ir-100', 'windows-1252'],
+    ['iso8859-1', 'windows-1252'],
+    ['iso88591', 'windows-1252'],
+    ['iso_8859-1', 'windows-1252'],
+    ['iso_8859-1:1987', 'windows-1252'],
+    ['l1', 'windows-1252'],
+    ['latin1', 'windows-1252'],
+    ['us-ascii', 'windows-1252'],
+    ['windows-1252', 'windows-1252'],
+    ['x-cp1252', 'windows-1252'],
+    ['cp1253', 'windows-1253'],
+    ['windows-1253', 'windows-1253'],
+    ['x-cp1253', 'windows-1253'],
+    ['cp1254', 'windows-1254'],
+    ['csisolatin5', 'windows-1254'],
+    ['iso-8859-9', 'windows-1254'],
+    ['iso-ir-148', 'windows-1254'],
+    ['iso8859-9', 'windows-1254'],
+    ['iso88599', 'windows-1254'],
+    ['iso_8859-9', 'windows-1254'],
+    ['iso_8859-9:1989', 'windows-1254'],
+    ['l5', 'windows-1254'],
+    ['latin5', 'windows-1254'],
+    ['windows-1254', 'windows-1254'],
+    ['x-cp1254', 'windows-1254'],
+    ['cp1255', 'windows-1255'],
+    ['windows-1255', 'windows-1255'],
+    ['x-cp1255', 'windows-1255'],
+    ['cp1256', 'windows-1256'],
+    ['windows-1256', 'windows-1256'],
+    ['x-cp1256', 'windows-1256'],
+    ['cp1257', 'windows-1257'],
+    ['windows-1257', 'windows-1257'],
+    ['x-cp1257', 'windows-1257'],
+    ['cp1258', 'windows-1258'],
+    ['windows-1258', 'windows-1258'],
+    ['x-cp1258', 'windows-1258'],
+    ['x-mac-cyrillic', 'x-mac-cyrillic'],
+    ['x-mac-ukrainian', 'x-mac-cyrillic'],
+    ['chinese', 'gbk'],
+    ['csgb2312', 'gbk'],
+    ['csiso58gb231280', 'gbk'],
+    ['gb2312', 'gbk'],
+    ['gb_2312', 'gbk'],
+    ['gb_2312-80', 'gbk'],
+    ['gbk', 'gbk'],
+    ['iso-ir-58', 'gbk'],
+    ['x-gbk', 'gbk'],
+    ['gb18030', 'gb18030'],
+    ['big5', 'big5'],
+    ['big5-hkscs', 'big5'],
+    ['cn-big5', 'big5'],
+    ['csbig5', 'big5'],
+    ['x-x-big5', 'big5'],
+    ['cseucpkdfmtjapanese', 'euc-jp'],
+    ['euc-jp', 'euc-jp'],
+    ['x-euc-jp', 'euc-jp'],
+    ['csiso2022jp', 'iso-2022-jp'],
+    ['iso-2022-jp', 'iso-2022-jp'],
+    ['csshiftjis', 'shift_jis'],
+    ['ms932', 'shift_jis'],
+    ['ms_kanji', 'shift_jis'],
+    ['shift-jis', 'shift_jis'],
+    ['shift_jis', 'shift_jis'],
+    ['sjis', 'shift_jis'],
+    ['windows-31j', 'shift_jis'],
+    ['x-sjis', 'shift_jis'],
+    ['cseuckr', 'euc-kr'],
+    ['csksc56011987', 'euc-kr'],
+    ['euc-kr', 'euc-kr'],
+    ['iso-ir-149', 'euc-kr'],
+    ['korean', 'euc-kr'],
+    ['ks_c_5601-1987', 'euc-kr'],
+    ['ks_c_5601-1989', 'euc-kr'],
+    ['ksc5601', 'euc-kr'],
+    ['ksc_5601', 'euc-kr'],
+    ['windows-949', 'euc-kr'],
+    ['csiso2022kr', 'replacement'],
+    ['hz-gb-2312', 'replacement'],
+    ['iso-2022-cn', 'replacement'],
+    ['iso-2022-cn-ext', 'replacement'],
+    ['iso-2022-kr', 'replacement'],
+    ['replacement', 'replacement'],
+    ['unicodefffe', 'utf-16be'],
+    ['utf-16be', 'utf-16be'],
+    ['csunicode', 'utf-16le'],
+    ['iso-10646-ucs-2', 'utf-16le'],
+    ['ucs-2', 'utf-16le'],
+    ['unicode', 'utf-16le'],
+    ['unicodefeff', 'utf-16le'],
+    ['utf-16', 'utf-16le'],
+    ['utf-16le', 'utf-16le'],
+    ['x-user-defined', 'x-user-defined'],
+]);
+
+
+// Some of the web-specified encodings use
+// aliases which aren't supported in iconv
+const internalEncodings = new Map([
+    ['x-mac-cyrillic', 'MacCyrillic'],
+    // For our purposes we can encode 8-i as 8
+    ['iso-8859-8-i', 'iso-8859-8'],
+]);
+
+/**
+ * Trims ASCII whitespace from a string.
+ * `String.prototype.trim` removes non-ASCII whitespace.
+ *
+ * @param {string} label the label to trim
+ * @returns {string}
+ */
+const trimAsciiWhitespace = label => {
+    let s = 0;
+    let e = label.length;
+    while (s < e && (
+        label[s] === '\u0009' ||
+        label[s] === '\u000a' ||
+        label[s] === '\u000c' ||
+        label[s] === '\u000d' ||
+        label[s] === '\u0020'))
+        s++;
+
+    while (e > s && (
+        label[e - 1] === '\u0009' ||
+        label[e - 1] === '\u000a' ||
+        label[e - 1] === '\u000c' ||
+        label[e - 1] === '\u000d' ||
+        label[e - 1] === '\u0020'))
+        e--;
+
+    return label.slice(s, e);
+};
+
+/**
+ * @typedef Encoding
+ * @property {string} internalLabel
+ * @property {string} label
+ */
+
+/**
+ * @param {string} label the encoding label
+ * @returns {Encoding | null}
+ */
+function getEncodingFromLabel(label) {
+    let encoding = encodings.get(label);
+
+    if (encoding === undefined) {
+        const trimmedLabel = trimAsciiWhitespace(label.toLowerCase());
+        encoding = encodings.get(trimmedLabel);
+    }
+
+    if (!encoding)
+        return null;
+
+    let internalEncoding = internalEncodings.get(encoding);
+
+    return {
+        label: encoding,
+        internalLabel: internalEncoding ?? encoding,
+    };
+}
diff --git a/modules/core/_singleByteEncodings.js b/modules/core/_singleByteEncodings.js
new file mode 100644
index 00000000..eb0cfb94
--- /dev/null
+++ b/modules/core/_singleByteEncodings.js
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Evan Welsh
+
+/* exported singleByteEncodings */
+
+// These single byte encodings are considered "legacy" and have inconsistent implementations
+// by platform. The WHATWG Encoding specification standardizes a set of encodings and
+// their character mapping to UTF-8.
+
+// Taken from https://encoding.spec.whatwg.org/#legacy-single-byte-encodings
+
+var singleByteEncodings = {
+    'ibm866': [1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 
1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 
1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 9617, 9618, 9619, 
9474, 9508, 9569, 9570, 9558, 9557, 9571, 9553, 9559, 9565, 9564, 9563, 9488, 9492, 9524, 9516, 9500, 9472, 
9532, 9566, 9567, 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575, 9576, 9572, 9573, 9561, 9560, 9554, 9555, 
9579, 9578, 9496, 9484, 9608, 9604, 9612, 9616, 9600, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 
1097, 1098, 1099, 1100, 1101, 1102, 1103, 1025, 1105, 1028, 1108, 1031, 1111, 1038, 1118, 176, 8729, 183, 
8730, 8470, 164, 9632, 160],
+    'iso-8859-2': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 728, 321, 164, 317, 346, 167, 
168, 352, 350, 356, 377, 173, 381, 379, 176, 261, 731, 322, 180, 318, 347, 711, 184, 353, 351, 357, 378, 733, 
382, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 272, 323, 327, 211, 
212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 263, 231, 269, 233, 
281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 252, 253, 355, 729],
+    'iso-8859-3': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 294, 728, 163, 164, null, 292, 
167, 168, 304, 350, 286, 308, 173, null, 379, 176, 295, 178, 179, 180, 181, 293, 183, 184, 305, 351, 287, 
309, 189, null, 380, 192, 193, 194, null, 196, 266, 264, 199, 200, 201, 202, 203, 204, 205, 206, 207, null, 
209, 210, 211, 212, 288, 214, 215, 284, 217, 218, 219, 220, 364, 348, 223, 224, 225, 226, null, 228, 267, 
265, 231, 232, 233, 234, 235, 236, 237, 238, 239, null, 241, 242, 243, 244, 289, 246, 247, 285, 249, 250, 
251, 252, 365, 349, 729],
+    'iso-8859-4': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 312, 342, 164, 296, 315, 167, 
168, 352, 274, 290, 358, 173, 381, 175, 176, 261, 731, 343, 180, 297, 316, 711, 184, 353, 275, 291, 359, 330, 
382, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 298, 272, 325, 332, 310, 
212, 213, 214, 215, 216, 370, 218, 219, 220, 360, 362, 223, 257, 225, 226, 227, 228, 229, 230, 303, 269, 233, 
281, 235, 279, 237, 238, 299, 273, 326, 333, 311, 244, 245, 246, 247, 248, 371, 250, 251, 252, 361, 363, 729],
+    'iso-8859-5': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 1025, 1026, 1027, 1028, 1029, 
1030, 1031, 1032, 1033, 1034, 1035, 1036, 173, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 
1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 
1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 
1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 
1102, 1103, 8470, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 167, 1118, 1119],
+    'iso-8859-6': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, null, null, null, 164, null, null, 
null, null, null, null, null, 1548, 173, null, null, null, null, null, null, null, null, null, null, null, 
null, null, 1563, null, null, null, 1567, null, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 
1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, null, null, 
null, null, null, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 
1615, 1616, 1617, 1618, null, null, null, null, null, null, null, null, null, null, null, null, null],
+    'iso-8859-7': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8216, 8217, 163, 8364, 8367, 166, 
167, 168, 169, 890, 171, 172, 173, null, 8213, 176, 177, 178, 179, 900, 901, 902, 183, 904, 905, 906, 187, 
908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 
null, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 
951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 
973, 974, null],
+    'iso-8859-8': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, null, 162, 163, 164, 165, 166, 
167, 168, 169, 215, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 247, 187, 188, 
189, 190, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, 
null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, 8215, 1488, 
1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 
1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, null, null, 8206, 8207, null],
+    'iso-8859-10': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 274, 290, 298, 296, 310, 
167, 315, 272, 352, 358, 381, 173, 362, 330, 176, 261, 275, 291, 299, 297, 311, 183, 316, 273, 353, 359, 382, 
8213, 363, 331, 256, 193, 194, 195, 196, 197, 198, 302, 268, 201, 280, 203, 278, 205, 206, 207, 208, 325, 
332, 211, 212, 213, 214, 360, 216, 370, 218, 219, 220, 221, 222, 223, 257, 225, 226, 227, 228, 229, 230, 303, 
269, 233, 281, 235, 279, 237, 238, 239, 240, 326, 333, 243, 244, 245, 246, 361, 248, 371, 250, 251, 252, 253, 
254, 312],
+    'iso-8859-13': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 8221, 162, 163, 164, 8222, 
166, 167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 8220, 181, 182, 183, 248, 185, 343, 
187, 188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 315, 352, 
323, 325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 229, 281, 
275, 269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 363, 252, 
380, 382, 8217],
+    'iso-8859-14': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 7682, 7683, 163, 266, 267, 
7690, 167, 7808, 169, 7810, 7691, 7922, 173, 174, 376, 7710, 7711, 288, 289, 7744, 7745, 182, 7766, 7809, 
7767, 7811, 7776, 7923, 7812, 7813, 7777, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 
205, 206, 207, 372, 209, 210, 211, 212, 213, 214, 7786, 216, 217, 218, 219, 220, 221, 374, 223, 224, 225, 
226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 373, 241, 242, 243, 244, 245, 246, 
7787, 248, 249, 250, 251, 252, 253, 375, 255],
+    'iso-8859-15': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 8364, 165, 
352, 167, 353, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 381, 181, 182, 183, 382, 185, 186, 187, 
338, 339, 376, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 
232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 
254, 255],
+    'iso-8859-16': [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 260, 261, 321, 8364, 8222, 
352, 167, 353, 169, 536, 171, 377, 173, 378, 379, 176, 177, 268, 322, 381, 8221, 182, 183, 382, 269, 537, 
187, 338, 339, 376, 380, 192, 193, 194, 258, 196, 262, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 272, 
323, 210, 211, 212, 336, 214, 346, 368, 217, 218, 219, 220, 280, 538, 223, 224, 225, 226, 259, 228, 263, 230, 
231, 232, 233, 234, 235, 236, 237, 238, 239, 273, 324, 242, 243, 244, 337, 246, 347, 369, 249, 250, 251, 252, 
281, 539, 255],
+    'koi8-r': [9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612, 
9616, 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553, 
9554, 1105, 9555, 9556, 9557, 9558, 9559, 9560, 9561, 9562, 9563, 9564, 9565, 9566, 9567, 9568, 9569, 1025, 
9570, 9571, 9572, 9573, 9574, 9575, 9576, 9577, 9578, 9579, 9580, 169, 1102, 1072, 1073, 1094, 1076, 1077, 
1092, 1075, 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074, 
1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098, 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048, 
1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064, 
1069, 1065, 1063, 1066],
+    'koi8-u': [9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508, 9516, 9524, 9532, 9600, 9604, 9608, 9612, 
9616, 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776, 8804, 8805, 160, 8993, 176, 178, 183, 247, 9552, 9553, 
9554, 1105, 1108, 9556, 1110, 1111, 9559, 9560, 9561, 9562, 9563, 1169, 1118, 9566, 9567, 9568, 9569, 1025, 
1028, 9571, 1030, 1031, 9574, 9575, 9576, 9577, 9578, 1168, 1038, 169, 1102, 1072, 1073, 1094, 1076, 1077, 
1092, 1075, 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074, 
1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098, 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043, 1061, 1048, 
1049, 1050, 1051, 1052, 1053, 1054, 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042, 1068, 1067, 1047, 1064, 
1069, 1065, 1063, 1066],
+    'macintosh': [196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233, 232, 234, 235, 
237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249, 251, 252, 8224, 176, 162, 163, 167, 8226, 182, 
223, 174, 169, 8482, 180, 168, 8800, 198, 216, 8734, 177, 8804, 8805, 165, 181, 8706, 8721, 8719, 960, 8747, 
170, 186, 937, 230, 248, 191, 161, 172, 8730, 402, 8776, 8710, 171, 187, 8230, 160, 192, 195, 213, 338, 339, 
8211, 8212, 8220, 8221, 8216, 8217, 247, 9674, 255, 376, 8260, 8364, 8249, 8250, 64257, 64258, 8225, 183, 
8218, 8222, 8240, 194, 202, 193, 203, 200, 205, 206, 207, 204, 211, 212, 63743, 210, 218, 219, 217, 305, 710, 
732, 175, 728, 729, 730, 184, 733, 731, 711],
+    'windows-874': [8364, 129, 130, 131, 132, 8230, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 
8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 153, 154, 155, 156, 157, 158, 159, 160, 3585, 3586, 3587, 
3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 
3606, 3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 
3624, 3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 
3642, null, null, null, null, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 
3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, null, null, 
null, null],
+    'windows-1250': [8364, 129, 8218, 131, 8222, 8230, 8224, 8225, 136, 8240, 352, 8249, 346, 356, 381, 377, 
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 353, 8250, 347, 357, 382, 378, 160, 711, 728, 321, 
164, 260, 166, 167, 168, 169, 350, 171, 172, 173, 174, 379, 176, 177, 731, 322, 180, 181, 182, 183, 184, 261, 
351, 187, 317, 733, 318, 380, 340, 193, 194, 258, 196, 313, 262, 199, 268, 201, 280, 203, 282, 205, 206, 270, 
272, 323, 327, 211, 212, 336, 214, 215, 344, 366, 218, 368, 220, 221, 354, 223, 341, 225, 226, 259, 228, 314, 
263, 231, 269, 233, 281, 235, 283, 237, 238, 271, 273, 324, 328, 243, 244, 337, 246, 247, 345, 367, 250, 369, 
252, 253, 355, 729],
+    'windows-1251': [1026, 1027, 8218, 1107, 8222, 8230, 8224, 8225, 8364, 8240, 1033, 8249, 1034, 1036, 
1035, 1039, 1106, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 1113, 8250, 1114, 1116, 1115, 1119, 
160, 1038, 1118, 1032, 164, 1168, 166, 167, 1025, 169, 1028, 171, 172, 173, 174, 1031, 176, 177, 1030, 1110, 
1169, 181, 182, 183, 1105, 8470, 1108, 187, 1112, 1029, 1109, 1111, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 
1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 
1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 
1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 
1101, 1102, 1103],
+    'windows-1252': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 381, 143, 
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 382, 376, 160, 161, 162, 163, 
164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 
186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 
208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 
230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 
252, 253, 254, 255],
+    'windows-1253': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 136, 8240, 138, 8249, 140, 141, 142, 143, 
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 154, 8250, 156, 157, 158, 159, 160, 901, 902, 163, 
164, 165, 166, 167, 168, 169, null, 171, 172, 173, 174, 8213, 176, 177, 178, 179, 900, 181, 182, 183, 904, 
905, 906, 187, 908, 189, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 
927, 928, 929, null, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 
948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 
970, 971, 972, 973, 974, null],
+    'windows-1254': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 352, 8249, 338, 141, 142, 143, 
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 353, 8250, 339, 157, 158, 376, 160, 161, 162, 163, 
164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 
186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 
286, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 304, 350, 223, 224, 225, 226, 227, 228, 229, 
230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 287, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 
252, 305, 351, 255],
+    'windows-1255': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 138, 8249, 140, 141, 142, 143, 
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 154, 8250, 156, 157, 158, 159, 160, 161, 162, 163, 
8362, 165, 166, 167, 168, 169, 215, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 
185, 247, 187, 188, 189, 190, 191, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1466, 1467, 
1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1520, 1521, 1522, 1523, 1524, null, null, null, null, null, 
null, null, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 
1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, null, null, 8206, 8207, null],
+    'windows-1256': [8364, 1662, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 1657, 8249, 338, 1670, 1688, 
1672, 1711, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 1705, 8482, 1681, 8250, 339, 8204, 8205, 1722, 160, 
1548, 162, 163, 164, 165, 166, 167, 168, 169, 1726, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 
182, 183, 184, 185, 1563, 187, 188, 189, 190, 1567, 1729, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 
1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 215, 1591, 1592, 1593, 
1594, 1600, 1601, 1602, 1603, 224, 1604, 226, 1605, 1606, 1607, 1608, 231, 232, 233, 234, 235, 1609, 1610, 
238, 239, 1611, 1612, 1613, 1614, 244, 1615, 1616, 247, 1617, 249, 1618, 251, 252, 8206, 8207, 1746],
+    'windows-1257': [8364, 129, 8218, 131, 8222, 8230, 8224, 8225, 136, 8240, 138, 8249, 140, 168, 711, 184, 
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152, 8482, 154, 8250, 156, 175, 731, 159, 160, null, 162, 163, 
164, null, 166, 167, 216, 169, 342, 171, 172, 173, 174, 198, 176, 177, 178, 179, 180, 181, 182, 183, 248, 
185, 343, 187, 188, 189, 190, 230, 260, 302, 256, 262, 196, 197, 280, 274, 268, 201, 377, 278, 290, 310, 298, 
315, 352, 323, 325, 211, 332, 213, 214, 215, 370, 321, 346, 362, 220, 379, 381, 223, 261, 303, 257, 263, 228, 
229, 281, 275, 269, 233, 378, 279, 291, 311, 299, 316, 353, 324, 326, 243, 333, 245, 246, 247, 371, 322, 347, 
363, 252, 380, 382, 729],
+    'windows-1258': [8364, 129, 8218, 402, 8222, 8230, 8224, 8225, 710, 8240, 138, 8249, 338, 141, 142, 143, 
144, 8216, 8217, 8220, 8221, 8226, 8211, 8212, 732, 8482, 154, 8250, 339, 157, 158, 376, 160, 161, 162, 163, 
164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 
186, 187, 188, 189, 190, 191, 192, 193, 194, 258, 196, 197, 198, 199, 200, 201, 202, 203, 768, 205, 206, 207, 
272, 209, 777, 211, 212, 416, 214, 215, 216, 217, 218, 219, 220, 431, 771, 223, 224, 225, 226, 259, 228, 229, 
230, 231, 232, 233, 234, 235, 769, 237, 238, 239, 273, 241, 803, 243, 244, 417, 246, 247, 248, 249, 250, 251, 
252, 432, 8363, 255],
+    'x-mac-cyrillic': [1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 
1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 
8224, 176, 1168, 163, 167, 8226, 182, 1030, 174, 169, 8482, 1026, 1106, 8800, 1027, 1107, 8734, 177, 8804, 
8805, 1110, 181, 1169, 1032, 1028, 1108, 1031, 1111, 1033, 1113, 1034, 1114, 1112, 1029, 172, 8730, 402, 
8776, 8710, 171, 187, 8230, 160, 1035, 1115, 1036, 1116, 1109, 8211, 8212, 8220, 8221, 8216, 8217, 247, 8222, 
1038, 1118, 1039, 1119, 8470, 1025, 1105, 1103, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 
1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 
1100, 1101, 1102, 8364],
+};
diff --git a/modules/core/_text.js b/modules/core/_text.js
new file mode 100644
index 00000000..a59b4df7
--- /dev/null
+++ b/modules/core/_text.js
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Evan Welsh
+
+const Encoding = imports._encodingNative;
+
+const { getEncodingFromLabel } = imports._encodings;
+const { singleByteEncodings } = imports._singleByteEncodings;
+
+/**
+ * @param {number[]} encoding 
+ * @param {number} byte
+ * @param {boolean} fatal
+ * @returns {number}
+ */
+const decodeSingleByteEncodingCharacter = (encoding, byte, fatal) => {
+    if (byte < 0x80) {
+        return byte;
+    } else {
+        if (encoding[byte - 0x80] == null && fatal) {
+            throw new TypeError(`Invalid character in decode.`);
+        }
+        return encoding[byte - 0x80] ?? 0xFFFD;
+    }
+}
+
+/**
+ * @param {number[]} encoding
+ * @param {Uint8Array} bytes 
+ * @param {boolean} fatal
+ * @returns {string}
+ */
+const decodeSingleByteEncoding = (encoding, bytes, fatal) => {
+    const decoded = [...bytes.values()].map(byte => decodeSingleByteEncodingCharacter(encoding, byte, 
fatal));
+
+    return String.fromCodePoint(...decoded);
+};
+
+const isSingleByteEncoding = (label) => label in singleByteEncodings;
+const getSingleByteEncoding = (label) => singleByteEncodings[label];
+
+var TextDecoder = class TextDecoder {
+    /**  
+     * @type {string}
+     */
+    encoding;
+
+    /**  
+     * @type {boolean}
+     */
+    ignoreBOM;
+
+    /**  
+     * @type {boolean}
+     */
+    fatal;
+
+    get [Symbol.toStringTag]() {
+        return 'TextDecoder';
+    }
+
+    /**
+     * @param {string} encoding 
+     * @param {object} [options]
+     * @param {boolean=} options.fatal
+     * @param {boolean=} options.ignoreBOM 
+     */
+    constructor(encoding = 'utf-8', options = {}) {
+        const { fatal = false, ignoreBOM = false } = options;
+
+        const encodingDefinition = getEncodingFromLabel(`${encoding}`);
+
+        if (!encodingDefinition) {
+            throw new RangeError(`Invalid encoding label: '${encoding}'`);
+        }
+
+        if (encodingDefinition.label === 'replacement') {
+            throw new RangeError(`Unsupported replacement encoding: '${encoding}'`);
+        }
+
+        Object.defineProperty(this, '_internalEncoding', {
+            value: encodingDefinition.internalLabel,
+            enumerable: true,
+            writable: false,
+            configurable: false,
+        });
+
+        Object.defineProperty(this, 'encoding', {
+            value: encodingDefinition.label,
+            enumerable: true,
+            writable: false,
+            configurable: false,
+        });
+
+        Object.defineProperty(this, 'ignoreBOM', {
+            value: ignoreBOM,
+            enumerable: true,
+            writable: false,
+            configurable: false,
+        });
+
+        Object.defineProperty(this, 'fatal', {
+            value: fatal,
+            enumerable: true,
+            writable: false,
+            configurable: false,
+        });
+    }
+
+
+
+    /**
+     * @param {unknown} bytes 
+     * @param {object} [options]
+     * @param {boolean=} options.stream
+     * @returns 
+     */
+    decode(bytes, options = {}) {
+        const { stream = false } = options;
+
+        if (stream) {
+            throw new Error(`TextDecoder does not implement the 'stream' option.`);
+        }
+
+        /** @type {Uint8Array} */
+        let input;
+
+        if (bytes instanceof ArrayBuffer) {
+            input = new Uint8Array(bytes);
+        } else if (bytes instanceof Uint8Array) {
+            input = bytes;
+        } else if (bytes instanceof Object.getPrototypeOf(Uint8Array)) {
+            let { buffer, byteLength, byteOffset } = /** @type {Uint32Array} */ (bytes);
+            input = new Uint8Array(buffer, byteOffset, byteLength);
+        } else if (
+            typeof bytes === "object" &&
+            bytes !== null &&
+            "buffer" in bytes &&
+            bytes.buffer instanceof ArrayBuffer
+        ) {
+            let { buffer, byteLength, byteOffset } = bytes;
+            input = new Uint8Array(
+                buffer,
+                byteOffset,
+                byteLength
+            );
+        } else if (bytes === undefined) {
+            input = new Uint8Array(0);
+        } else {
+            throw new Error(`Provided input cannot be converted to ArrayBufferView or ArrayBuffer`);
+        }
+
+        if (this.ignoreBOM && input.length > 2 && input[0] === 0xEF && input[1] === 0xBB && input[2] === 
0xBF) {
+            if (this.encoding !== 'utf-8') {
+                throw new Error(`Cannot ignore BOM for non-UTF8 encoding.`);
+            }
+
+            let { buffer, byteLength, byteOffset } = input;
+            input = new Uint8Array(buffer, byteOffset + 3, byteLength - 3);
+        }
+
+        if (isSingleByteEncoding(this._internalEncoding)) {
+            const encoding = getSingleByteEncoding(this._internalEncoding);
+
+            return decodeSingleByteEncoding(encoding, input, this.fatal);
+        }
+
+        return Encoding.decode(input, this._internalEncoding, this.fatal);
+    }
+}
+
+var TextEncoder = class TextEncoder {
+    get [Symbol.toStringTag]() {
+        return 'TextEncoder';
+    }
+
+    get encoding() {
+        return 'utf-8';
+    }
+
+    encode(input = '') {
+        // The TextEncoder specification only allows for UTF-8 encoding.
+        return Encoding.encode(`${input}`, 'UTF-8');
+    }
+
+    encodeInto(input = '', output = new Uint8Array()) {
+        // The TextEncoder specification only allows for UTF-8 encoding.
+        return Encoding.encodeInto(`${input}`, output);
+    }
+}
\ No newline at end of file
diff --git a/modules/script/_bootstrap/default.js b/modules/script/_bootstrap/default.js
index 952d7fe3..fe354a02 100644
--- a/modules/script/_bootstrap/default.js
+++ b/modules/script/_bootstrap/default.js
@@ -6,6 +6,7 @@
     'use strict';
 
     const {print, printerr, log, logError} = imports._print;
+    const {TextEncoder, TextDecoder} = imports._text;
 
     Object.defineProperties(exports, {
         ARGV: {
@@ -16,6 +17,18 @@
                 return imports.system.programArgs;
             },
         },
+        TextEncoder: {
+            configurable: false,
+            enumerable: true,
+            writable: false,
+            value: TextEncoder,
+        },
+        TextDecoder: {
+            configurable: false,
+            enumerable: true,
+            writable: false,
+            value: TextDecoder,
+        },
         print: {
             configurable: false,
             enumerable: true,
diff --git a/modules/script/byteArray.js b/modules/script/byteArray.js
index e0b650ac..6669e348 100644
--- a/modules/script/byteArray.js
+++ b/modules/script/byteArray.js
@@ -2,9 +2,8 @@
 // SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
 // SPDX-FileCopyrightText: 2017 Philip Chimento <philip chimento gmail com>
 
-var {fromGBytes, defineToString} = imports._byteArrayNative;
+var {fromGBytes, fromString, toString} = imports._byteArrayNative;
 
-const Encoding = imports._encodingNative;
 const {GLib} = imports.gi;
 
 // For backwards compatibility
@@ -32,31 +31,6 @@ function toGBytes(array) {
 
 /* eslint no-redeclare: ["error", { "builtinGlobals": false }] */
 
-/**
- * @param {Uint8Array} array the byte array to decode into a string
- * @param {string} [encoding] a text encoding tag
- * @returns {string}
- */
-function toString(array, encoding = 'utf-8') {
-    if (!(array instanceof Uint8Array))
-        throw new Error('Argument to ByteArray.toString() must be a Uint8Array');
-
-    return Encoding.toString(array, encoding);
-}
-
-/**
- * @param {string} str the string to encode into bytes
- * @param {string} [encoding] a text encoding tag
- * @returns {Uint8Array}
- */
-function fromString(str, encoding = 'utf-8') {
-    const array = Encoding.fromString(str, encoding);
-
-    defineToString(array);
-
-    return array;
-}
-
 var ByteArray = class ByteArray {
     constructor(arg = 0) {
         if (arg instanceof Uint8Array)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]