[vte] lib: Use ICU for legacy charset support

From: Christian Persch <chpe src gnome org>
To: commits-list gnome org
Cc:
Subject: [vte] lib: Use ICU for legacy charset support
Date: Sun, 17 Nov 2019 20:58:35 +0000 (UTC)
commit 9e4fbae2cabcd937ac4d1a984ba844d24b44b83f
Author: Christian Persch <chpe src gnome org>
Date:   Sun Nov 17 21:58:09 2019 +0100

    lib: Use ICU for legacy charset support
    
    Instead of converting the whole chunk of input from the input
    charset to UTF-8 in one go, we need a decoder that consumes the
    input one byte at a time. Since the iconv API is not particularly
    suited to this (or, really, any) task, switch to using ICU for this.
    
    Add functions to get the list of supported legacy charsets, and
    to check whether a particular string is a supported charset.
    
    Fixes https://gitlab.gnome.org/GNOME/vte/issues/40

 doc/reference/vte-sections.txt |   2 +
 meson.build                    |  11 +-
 meson_options.txt              |   4 +-
 src/app/app.cc                 |   4 +-
 src/debug.cc                   |   1 +
 src/debug.h                    |   1 +
 src/decoder-cat.cc             | 550 +++++++++++++++++++++++++++++++
 src/icu-converter.cc           | 139 ++++++++
 src/icu-converter.hh           |  81 +++++
 src/icu-decoder.cc             | 151 +++++++++
 src/icu-decoder.hh             | 102 ++++++
 src/meson.build                |  33 +-
 src/parser-cat.cc              |   6 +-
 src/utf8-test.cc               |   8 +-
 src/utf8.hh                    |   7 +
 src/vte.cc                     | 713 ++++++++++++++++++++++-------------------
 src/vte/vtedeprecated.h        |   8 +
 src/vtegtk.cc                  |  73 ++++-
 src/vteinternal.hh             |  47 ++-
 src/widget.hh                  |   5 +-
 20 files changed, 1580 insertions(+), 366 deletions(-)
---
diff --git a/doc/reference/vte-sections.txt b/doc/reference/vte-sections.txt
index ad3d203d..5ce2093e 100644
--- a/doc/reference/vte-sections.txt
+++ b/doc/reference/vte-sections.txt
@@ -94,6 +94,8 @@ vte_terminal_event_check_regex_simple
 <SUBSECTION>
 vte_get_user_shell
 vte_get_features
+vte_get_encodings
+vte_get_encoding_supported
 
 <SUBSECTION>
 VteTerminalSpawnAsyncCallback
diff --git a/meson.build b/meson.build
index 4e79657a..c3b08772 100644
--- a/meson.build
+++ b/meson.build
@@ -42,6 +42,7 @@ glib_req_version          = '2.44.0'
 glib_min_req_version      = '2.44'
 glib_max_allowed_version  = '2.44'
 gnutls_req_version        = '3.2.7'
+icu_uc_req_version        = '4.8'
 pango_req_version         = '1.22.0'
 pcre2_req_version         = '10.21'
 
@@ -117,7 +118,7 @@ config_h.set('VTE_DEBUG', enable_debug)
 config_h.set('WITH_A11Y', get_option('a11y'))
 config_h.set('WITH_FRIBIDI', get_option('fribidi'))
 config_h.set('WITH_GNUTLS', get_option('gnutls'))
-config_h.set('WITH_ICONV', get_option('iconv'))
+config_h.set('WITH_ICU', get_option('icu'))
 
 ver = glib_min_req_version.split('.')
 config_h.set('GLIB_VERSION_MIN_REQUIRED', '(G_ENCODE_VERSION(' + ver[0] + ',' + ver[1] + '))')
@@ -428,6 +429,12 @@ else
   gtk4_dep = dependency('', required: false)
 endif
 
+if get_option('icu')
+  icu_dep = dependency('icu-uc', version: '>=' + icu_uc_req_version)
+else
+  icu_dep = dependency('', required: false)
+endif
+
 # Write config.h
 
 configure_file(
@@ -486,7 +493,7 @@ output += '  FRIBIDI:      ' + get_option('fribidi').to_string() + '\n'
 output += '  GNUTLS:       ' + get_option('gnutls').to_string() + '\n'
 output += '  GTK+ 3.0:     ' + get_option('gtk3').to_string() + '\n'
 output += '  GTK+ 4.0:     ' + get_option('gtk4').to_string() + '\n'
-output += '  IConv:        ' + get_option('iconv').to_string() + '\n'
+output += '  ICU:          ' + get_option('icu').to_string() + '\n'
 output += '  GIR:          ' + get_option('gir').to_string() + '\n'
 output += '  Vala:         ' + get_option('vapi').to_string() + '\n'
 output += '\n'
diff --git a/meson_options.txt b/meson_options.txt
index bb59bc67..47d54b78 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -79,10 +79,10 @@ option(
 )
 
 option(
-  'iconv',
+  'icu',
   type: 'boolean',
   value: true,
-  description: 'Enable legacy charset support using iconv',
+  description: 'Enable legacy charset support using ICU',
 )
 
 option(
diff --git a/src/app/app.cc b/src/app/app.cc
index 5e9f852f..e554418e 100644
--- a/src/app/app.cc
+++ b/src/app/app.cc
@@ -1911,7 +1911,7 @@ vteapp_window_constructed(GObject *object)
         if (options.encoding != nullptr) {
                 GError* err = nullptr;
                 if (!vte_terminal_set_encoding(window->terminal, options.encoding, &err)) {
-                        verbose_printerr("Failed to set encoding: %s\n", err->message);
+                        g_printerr("Failed to set encoding: %s\n", err->message);
                         g_error_free(err);
                 }
         }
@@ -2209,7 +2209,7 @@ main(int argc,
                 verbose_printerr("VTE_CJK_WIDTH is not supported anymore, use --cjk-width instead\n");
 
        if (options.version) {
-               g_print("VTE Application %s\n", VERSION);
+               g_print("VTE Application %s %s\n", VERSION, vte_get_features());
                return EXIT_SUCCESS;
        }
 
diff --git a/src/debug.cc b/src/debug.cc
index e8dd8d08..5b1b0d8a 100644
--- a/src/debug.cc
+++ b/src/debug.cc
@@ -59,6 +59,7 @@ _vte_debug_init(void)
     { "emulation",    VTE_DEBUG_EMULATION    },
     { "ringview",     VTE_DEBUG_RINGVIEW     },
     { "bidi",         VTE_DEBUG_BIDI         },
+    { "conversion",   VTE_DEBUG_CONVERSION   },
   };
 
   _vte_debug_flags = g_parse_debug_string (g_getenv("VTE_DEBUG"),
diff --git a/src/debug.h b/src/debug.h
index 8601dad0..4fd498a9 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -66,6 +66,7 @@ typedef enum {
         VTE_DEBUG_EMULATION     = 1 << 26,
         VTE_DEBUG_RINGVIEW      = 1 << 27,
         VTE_DEBUG_BIDI          = 1 << 28,
+        VTE_DEBUG_CONVERSION    = 1 << 29,
 } VteDebugFlags;
 
 void _vte_debug_init(void);
diff --git a/src/decoder-cat.cc b/src/decoder-cat.cc
new file mode 100644
index 00000000..5640c851
--- /dev/null
+++ b/src/decoder-cat.cc
@@ -0,0 +1,550 @@
+/*
+ * Copyright © 2017, 2018, 2019 Christian Persch
+ *
+ * This programme is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This programme is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <glib.h>
+
+#include <fcntl.h>
+#include <locale.h>
+#include <unistd.h>
+
+#include <cassert>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include <string>
+
+#include "debug.h"
+#include "icu-decoder.hh"
+#include "icu-glue.hh"
+#include "utf8.hh"
+
+using namespace std::literals;
+
+class Options {
+private:
+        bool m_benchmark{false};
+        bool m_codepoints{false};
+        bool m_list{false};
+        bool m_quiet{false};
+        bool m_statistics{false};
+        bool m_utf8{false};
+        int m_repeat{1};
+        char* m_charset{nullptr};
+        char** m_filenames{nullptr};
+
+        template<typename T1, typename T2 = T1>
+        class OptionArg {
+        private:
+                T1* m_return_ptr;
+                T2 m_value;
+        public:
+                OptionArg(T1* ptr, T2 v) : m_return_ptr{ptr}, m_value{v} { }
+                ~OptionArg() { *m_return_ptr = m_value; }
+
+                inline constexpr T2* ptr() noexcept { return &m_value; }
+        };
+
+        using BoolArg = OptionArg<bool, gboolean>;
+        using IntArg = OptionArg<int>;
+        using StrArg = OptionArg<char*>;
+        using StrvArg = OptionArg<char**>;
+
+public:
+
+        Options() noexcept = default;
+        Options(Options const&) = delete;
+        Options(Options&&) = delete;
+
+        ~Options() {
+                if (m_filenames != nullptr)
+                        g_strfreev(m_filenames);
+        }
+
+        Options& operator=(Options const&) = delete;
+        Options& operator=(Options&&) = delete;
+
+        inline constexpr bool benchmark()  const noexcept { return m_benchmark;  }
+        inline constexpr bool codepoints() const noexcept { return m_codepoints; }
+        inline constexpr bool list()       const noexcept { return m_list;       }
+        inline constexpr bool statistics() const noexcept { return m_statistics; }
+        inline constexpr int  quiet()      const noexcept { return m_quiet;      }
+        inline constexpr bool utf8()       const noexcept { return m_utf8;       }
+        inline constexpr int  repeat()     const noexcept { return m_repeat;     }
+        inline constexpr char const* charset()          const noexcept { return m_charset;   }
+        inline constexpr char const* const* filenames() const noexcept { return m_filenames; }
+
+        bool parse(int argc,
+                   char* argv[],
+                   GError** error) noexcept
+        {
+                {
+                        auto benchmark = BoolArg{&m_benchmark, false};
+                        auto codepoints = BoolArg{&m_codepoints, false};
+                        auto list = BoolArg{&m_list, false};
+                        auto quiet = BoolArg{&m_quiet, false};
+                        auto statistics = BoolArg{&m_statistics, false};
+                        auto utf8 = BoolArg{&m_utf8, false};
+                        auto repeat = IntArg{&m_repeat, 1};
+                        auto charset = StrArg{&m_charset, nullptr};
+                        auto filenames = StrvArg{&m_filenames, nullptr};
+                        GOptionEntry const entries[] = {
+                                { "benchmark", 'b', 0, G_OPTION_ARG_NONE, benchmark.ptr(),
+                                  "Measure time spent parsing each file", nullptr },
+                                { "codepoints", 'u', 0, G_OPTION_ARG_NONE, codepoints.ptr(),
+                                  "Output unicode code points by number", nullptr },
+                                { "charset", 'f', 0, G_OPTION_ARG_STRING, charset.ptr(),
+                                  "Input charset", "CHARSET" },
+                                { "list-charsets", 'l', 0, G_OPTION_ARG_NONE, list.ptr(),
+                                  "List available charsets", nullptr },
+                                { "quiet", 'q', 0, G_OPTION_ARG_NONE, quiet.ptr(),
+                                  "Suppress output except for statistics and benchmark", nullptr },
+                                { "repeat", 'r', 0, G_OPTION_ARG_INT, repeat.ptr(),
+                                  "Repeat each file COUNT times", "COUNT" },
+                                { "statistics", 's', 0, G_OPTION_ARG_NONE, statistics.ptr(),
+                                  "Output statistics", nullptr },
+                                { "utf-8", '8', 0, G_OPTION_ARG_NONE, utf8.ptr(),
+                                  "UTF-8 input (default)", nullptr },
+                                { G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, filenames.ptr(),
+                                  nullptr, nullptr },
+                                { nullptr },
+                        };
+
+                        auto context = g_option_context_new("[FILE…] — decoder cat");
+                        g_option_context_set_help_enabled(context, true);
+                        g_option_context_add_main_entries(context, entries, nullptr);
+
+                        auto rv = bool{g_option_context_parse(context, &argc, &argv, error) != false};
+                        g_option_context_free(context);
+                        if (!rv)
+                                return rv;
+                }
+
+                return true;
+        }
+}; // class Options
+
+class Printer {
+private:
+        std::string m_str{};
+        bool m_codepoints{false};
+
+        void
+        print(char const* buf,
+              size_t len) noexcept
+        {
+                m_str.append(buf, len);
+        }
+
+        G_GNUC_PRINTF(2, 3)
+        void
+        print_format(char const* format,
+                     ...)
+        {
+                char buf[256];
+                va_list args;
+                va_start(args, format);
+                auto const len = g_vsnprintf(buf, sizeof(buf), format, args);
+                va_end(args);
+
+                m_str.append(buf, len);
+        }
+
+        void
+        print_u32(uint32_t const c) noexcept
+        {
+                char ubuf[7];
+                auto const len = g_unichar_to_utf8(c, ubuf);
+
+                if (m_codepoints) {
+                        ubuf[len] = 0;
+                        if (g_unichar_isprint(c)) {
+                                print_format("[%04X %s]", c, ubuf);
+                        } else {
+                                print_format("[%04X]", c);
+                        }
+                } else {
+                        print(ubuf, len);
+                }
+        }
+
+        void
+        printout(bool force_lf = false) noexcept
+        {
+                if (m_codepoints || force_lf)
+                        m_str.push_back('\n');
+
+                write(STDOUT_FILENO, m_str.data(), m_str.size());
+                m_str.clear();
+        }
+
+        static inline auto const k_LF = uint32_t{0xau};
+
+public:
+
+        Printer(bool codepoints = false) noexcept
+                : m_codepoints{codepoints}
+        {
+        }
+
+        ~Printer() noexcept
+        {
+                printout(true);
+        }
+
+        void operator()(uint32_t const c) noexcept
+        {
+                print_u32(c);
+                if (c == k_LF)
+                        printout();
+        }
+
+}; // class Printer
+
+class Sink {
+public:
+        void operator()(uint32_t c) noexcept { }
+
+}; // class Sink
+
+static std::unique_ptr<vte::base::ICUDecoder>
+make_decoder(Options const& options)
+{
+        auto err = icu::ErrorCode{};
+
+        auto converter = std::shared_ptr<UConverter>{ucnv_open(options.charset(), err), &ucnv_close};
+        if (err.isFailure()) {
+                if (!options.quiet())
+                        g_printerr("Failure to open converter for \"%s\": %s\n",
+                                   options.charset(), err.errorName());
+                return {};
+        }
+
+        if (err.get() == U_AMBIGUOUS_ALIAS_WARNING) {
+                err.reset();
+                auto canonical = ucnv_getName(converter.get(), err);
+                if (err.isSuccess() && !options.quiet())
+                        g_printerr("Warning: charset \"%s\" is ambigous alias for \"%s\"\n",
+                                   options.charset(), canonical);
+        }
+
+        err.reset();
+        auto u32_converter = std::shared_ptr<UConverter>{ucnv_open("utf32platformendian", err), &ucnv_close};
+        if (err.isFailure()) {
+                if (!options.quiet())
+                        g_printerr("Failure to open converter for \"%s\": %s\n",
+                                   "UTF-32", err.errorName());
+                return {};
+        }
+
+        return std::make_unique<vte::base::ICUDecoder>(converter, u32_converter);
+}
+
+class Processor {
+private:
+        gsize m_input_bytes{0};
+        gsize m_output_chars{0};
+        gsize m_errors{0};
+        GArray* m_bench_times{nullptr};
+
+        template<class Functor>
+        void
+        process_file_utf8(int fd,
+                          Functor& func)
+        {
+                auto decoder = vte::base::UTF8Decoder{};
+
+                auto const buf_size = size_t{16384};
+                auto buf = g_new0(uint8_t, buf_size);
+
+                auto start_time = g_get_monotonic_time();
+
+                auto buf_start = size_t{0};
+                for (;;) {
+                        auto len = read(fd, buf + buf_start, buf_size - buf_start);
+                        if (!len)
+                                break;
+                        if (len == -1) {
+                                if (errno == EAGAIN)
+                                        continue;
+                                break;
+                        }
+
+                        m_input_bytes += len;
+
+                        auto const bufend = buf + len;
+                        for (auto sptr = buf; sptr < bufend; ++sptr) {
+                                switch (decoder.decode(*sptr)) {
+                                case vte::base::UTF8Decoder::REJECT_REWIND:
+                                        /* Rewind the stream.
+                                         * Note that this will never lead to a loop, since in the
+                                         * next round this byte *will* be consumed.
+                                         */
+                                        --sptr;
+                                        [[fallthrough]];
+                                case vte::base::UTF8Decoder::REJECT:
+                                        decoder.reset();
+                                        /* Fall through to insert the U+FFFD replacement character. */
+                                        [[fallthrough]];
+                                case vte::base::UTF8Decoder::ACCEPT:
+                                        func(decoder.codepoint());
+                                        m_output_chars++;
+
+                                default:
+                                        break;
+                                }
+                        }
+                }
+
+                /* Flush remaining output; at most one character */
+                if (decoder.flush()) {
+                        func(decoder.codepoint());
+                        m_output_chars++;
+                }
+
+                auto const time_spent = int64_t{g_get_monotonic_time() - start_time};
+                g_array_append_val(m_bench_times, time_spent);
+
+                g_free(buf);
+        }
+
+        template<class Functor>
+        void
+        process_file_icu(int fd,
+                         vte::base::ICUDecoder* decoder,
+                         Functor& func)
+        {
+                decoder->reset();
+
+                auto const buf_size = size_t{16384};
+                auto buf = g_new0(uint8_t, buf_size);
+
+                auto start_time = g_get_monotonic_time();
+
+                auto buf_start = size_t{0};
+                while (true) {
+                        auto len = read(fd, buf + buf_start, buf_size - buf_start);
+                        if (!len) /* EOF */
+                                break;
+                        if (len == -1) {
+                                if (errno == EAGAIN)
+                                        continue;
+                                break;
+                        }
+
+                        m_input_bytes += len;
+
+                        auto sptr = reinterpret_cast<uint8_t const*>(buf);
+                        auto const sptrend = buf + len;
+                        while (sptr < sptrend) {
+                                /* Note that rewinding will never lead to an infinite loop,
+                                 * since when the decoder runs out of output, this input byte
+                                 * *will* be consumed.
+                                 */
+                                switch (decoder->decode(&sptr)) {
+                                case vte::base::ICUDecoder::Result::eSomething:
+                                        func(decoder->codepoint());
+                                        m_output_chars++;
+                                        break;
+
+                                case vte::base::ICUDecoder::Result::eNothing:
+                                        break;
+
+                                case vte::base::ICUDecoder::Result::eError:
+                                        // FIXMEchpe need do ++sptr here?
+                                        m_errors++;
+                                        decoder->reset();
+                                        break;
+                                }
+                        }
+                }
+
+                /* Flush remaining output */
+                auto sptr = reinterpret_cast<uint8_t const*>(buf + buf_size);
+                auto result = vte::base::ICUDecoder::Result{};
+                while ((result = decoder->decode(&sptr, true)) == vte::base::ICUDecoder::Result::eSomething) 
{
+                        func(decoder->codepoint());
+                        m_output_chars++;
+                }
+
+                auto const time_spent = int64_t{g_get_monotonic_time() - start_time};
+                g_array_append_val(m_bench_times, time_spent);
+
+                g_free(buf);
+        }
+
+        template<class Functor>
+        bool
+        process_file(int fd,
+                     Options const& options,
+                     Functor& func)
+        {
+                auto decoder = std::unique_ptr<vte::base::ICUDecoder>{};
+                if (options.charset()) {
+                        decoder = make_decoder(options);
+                        if (!decoder)
+                                return false;
+                }
+
+                assert(decoder != nullptr || options.charset() == nullptr);
+
+                for (auto i = 0; i < options.repeat(); ++i) {
+                        if (i > 0 && lseek(fd, 0, SEEK_SET) != 0) {
+                                g_printerr("Failed to seek: %m\n");
+                                return false;
+                        }
+
+                        if (decoder) {
+                                process_file_icu(fd, decoder.get(), func);
+                        } else {
+                                process_file_utf8(fd, func);
+                        }
+                }
+
+                return true;
+        }
+
+public:
+
+        Processor() noexcept
+        {
+                m_bench_times = g_array_new(false, true, sizeof(int64_t));
+        }
+
+        ~Processor() noexcept
+        {
+                g_array_free(m_bench_times, true);
+        }
+
+        template<class Functor>
+        bool
+        process_files(Options const& options,
+                      Functor& func)
+        {
+                auto r = bool{true};
+                if (auto filenames = options.filenames(); filenames != nullptr) {
+                        for (auto i = 0; filenames[i] != nullptr; i++) {
+                                auto filename = filenames[i];
+
+                                auto fd = int{-1};
+                                if (g_str_equal(filename, "-")) {
+                                        fd = STDIN_FILENO;
+
+                                        if (options.repeat() != 1) {
+                                                g_printerr("Cannot consume STDIN more than once\n");
+                                                return false;
+                                        }
+                                } else {
+                                        fd = ::open(filename, O_RDONLY);
+                                        if (fd == -1) {
+                                                g_printerr("Error opening file %s: %m\n", filename);
+                                        }
+                                }
+                                if (fd != -1) {
+                                        r = process_file(fd, options, func);
+                                        if (fd != STDIN_FILENO)
+                                                close(fd);
+                                        if (!r)
+                                                break;
+                                }
+                        }
+                } else {
+                        r = process_file(STDIN_FILENO, options, func);
+                }
+
+                return r;
+        }
+
+        void print_statistics() const noexcept
+        {
+                g_printerr("%\'16" G_GSIZE_FORMAT " input bytes produced %\'16" G_GSIZE_FORMAT
+                           " unichars and %" G_GSIZE_FORMAT " errors\n",
+                           m_input_bytes, m_output_chars, m_errors);
+        }
+
+        void print_benchmark() const noexcept
+        {
+                g_array_sort(m_bench_times,
+                             [](void const* p1, void const* p2) -> int {
+                                     int64_t const t1 = *(int64_t const*)p1;
+                                     int64_t const t2 = *(int64_t const*)p2;
+                                     return t1 == t2 ? 0 : (t1 < t2 ? -1 : 1);
+                             });
+
+                auto total_time = int64_t{0};
+                for (unsigned int i = 0; i < m_bench_times->len; ++i)
+                        total_time += g_array_index(m_bench_times, int64_t, i);
+
+                g_printerr("\nTimes: best %\'" G_GINT64_FORMAT "µs "
+                           "worst %\'" G_GINT64_FORMAT "µs "
+                           "average %\'" G_GINT64_FORMAT "µs\n",
+                           g_array_index(m_bench_times, int64_t, 0),
+                           g_array_index(m_bench_times, int64_t, m_bench_times->len - 1),
+                           total_time / (int64_t)m_bench_times->len);
+                for (unsigned int i = 0; i < m_bench_times->len; ++i)
+                        g_printerr("  %\'" G_GINT64_FORMAT "µs\n",
+                                   g_array_index(m_bench_times, int64_t, i));
+        }
+
+}; // class Processor
+
+// main
+
+int
+main(int argc,
+     char *argv[])
+{
+        setlocale(LC_ALL, "");
+        _vte_debug_init();
+
+        auto options = Options{};
+        GError* err{nullptr};
+        if (!options.parse(argc, argv, &err)) {
+                g_printerr("Failed to parse arguments: %s\n", err->message);
+                g_error_free(err);
+                return EXIT_FAILURE;
+        }
+
+        if (options.list()) {
+                auto charsets = vte::base::get_icu_charsets(true);
+                for (auto i = 0; charsets[i]; ++i)
+                        g_print("%s\n", charsets[i]);
+                g_strfreev(charsets);
+
+                return EXIT_SUCCESS;
+        }
+
+        auto rv = bool{};
+        auto proc = Processor{};
+        if (options.quiet()) {
+                auto sink = Sink{};
+                rv = proc.process_files(options, sink);
+        } else {
+                auto printer = Printer{options.codepoints()};
+                rv = proc.process_files(options, printer);
+        }
+
+        if (options.statistics())
+                proc.print_statistics();
+        if (options.benchmark())
+                proc.print_benchmark();
+
+        return rv ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/src/icu-converter.cc b/src/icu-converter.cc
new file mode 100644
index 00000000..7ac18a34
--- /dev/null
+++ b/src/icu-converter.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright © 2019 Christian Persch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include "icu-converter.hh"
+
+#include <cassert>
+#include <memory>
+
+#include <unicode/errorcode.h>
+
+#include "debug.h"
+#include "icu-glue.hh"
+
+namespace vte::base {
+
+std::unique_ptr<ICUConverter>
+ICUConverter::make(char const* charset,
+                   GError** error)
+{
+        if (vte::base::get_icu_charset_is_ecma35(charset))
+                return {};
+
+        auto charset_converter = vte::base::make_icu_converter(charset, error);
+        if (!charset_converter)
+                return {};
+
+        auto u32_converter = vte::base::make_icu_converter("utf32platformendian", error);
+        if (!u32_converter)
+                return {};
+
+        auto u8_converter = vte::base::make_icu_converter("utf8", error);
+        if (!u8_converter)
+                return {};
+
+        return std::make_unique<ICUConverter>(charset, charset_converter, u32_converter, u8_converter);
+}
+
+std::string
+ICUConverter::convert(char const* data,
+                      size_t length)
+{
+        /* We can't use ucnv_convertEx since that doesn't support preflighting.
+         * Instead, convert to UTF-16 first, and the to the target, with
+         * preflighting both times. This is slow, but this is the legacy
+         * code path, so we don't care.
+         */
+
+        if (length == 0)
+                return {};
+
+        ucnv_resetToUnicode(m_u8_converter.get());
+
+        auto err = icu::ErrorCode{};
+        auto u16_size = ucnv_toUChars(m_u8_converter.get(),
+                                      nullptr, 0,
+                                      data, length,
+                                      err);
+        if (err.isFailure() && (err.get() != U_BUFFER_OVERFLOW_ERROR)) {
+                _vte_debug_print(VTE_DEBUG_CONVERSION,
+                                 "Error converting from UTF-8 to UTF-16 in preflight: %s\n",
+                                 err.errorName());
+                return {};
+        }
+
+        auto u16_buffer = std::u16string{};
+        if ((size_t)u16_size > u16_buffer.max_size()) // prevent exceptions
+                return {};
+        u16_buffer.resize(u16_size);
+
+        err.reset();
+        u16_size = ucnv_toUChars(m_u8_converter.get(),
+                                 u16_buffer.data(),
+                                 u16_buffer.size(),
+                                 data,
+                                 length,
+                                 err);
+        if (err.isFailure()) {
+                _vte_debug_print(VTE_DEBUG_CONVERSION,
+                                 "Error converting from UTF-8 to UTF-16: %s\n",
+                                 err.errorName());
+                return {};
+        }
+
+        /* Now convert to target */
+        ucnv_resetFromUnicode(m_charset_converter.get());
+        err.reset();
+        auto target_size = ucnv_fromUChars(m_charset_converter.get(),
+                                           nullptr, 0,
+                                           u16_buffer.data(),
+                                           u16_size,
+                                           err);
+        if (err.isFailure() && (err.get() != U_BUFFER_OVERFLOW_ERROR)) {
+                _vte_debug_print(VTE_DEBUG_CONVERSION,
+                                 "Error converting from UTF-8 to %s in preflight: %s\n",
+                                 m_charset.c_str(),
+                                 err.errorName());
+                return {};
+        }
+
+        auto target_buffer = std::string{};
+        if ((size_t)target_size > target_buffer.max_size()) // prevent exceptions
+                return {};
+        target_buffer.resize(target_size);
+
+        err.reset();
+        target_size = ucnv_fromUChars(m_charset_converter.get(),
+                                      target_buffer.data(),
+                                      target_buffer.capacity(),
+                                      u16_buffer.data(),
+                                      u16_size,
+                                      err);
+        if (err.isFailure()) {
+                _vte_debug_print(VTE_DEBUG_CONVERSION,
+                                 "Error converting from UTF-16 to %s: %s\n",
+                                 m_charset.c_str(),
+                                 err.errorName());
+                return {};
+        }
+
+        return target_buffer;
+}
+
+} // namespace vte::base
diff --git a/src/icu-converter.hh b/src/icu-converter.hh
new file mode 100644
index 00000000..9f1c3731
--- /dev/null
+++ b/src/icu-converter.hh
@@ -0,0 +1,81 @@
+/*
+ * Copyright © 2019 Christian Persch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <glib.h>
+
+#include <memory>
+#include <string>
+
+#include <unicode/ucnv.h>
+
+#include "icu-decoder.hh"
+
+namespace vte::base {
+
+class ICUConverter {
+public:
+        using converter_shared_type = std::shared_ptr<UConverter>;
+
+        static std::unique_ptr<ICUConverter> make(char const *charset,
+                                                  GError** error = nullptr);
+
+        ICUConverter(char const* charset,
+                     converter_shared_type charset_converter,
+                     converter_shared_type u32_converter,
+                     converter_shared_type u8_converter)
+                : m_charset(charset),
+                  m_charset_converter(charset_converter),
+                  m_u32_converter(u32_converter),
+                  m_u8_converter(u8_converter),
+                  m_decoder(charset_converter, u32_converter)
+        {
+        }
+
+        ~ICUConverter() = default;
+
+        ICUConverter(ICUConverter const&) = delete;
+        ICUConverter(ICUConverter&&) = delete;
+        ICUConverter& operator= (ICUConverter const&) = delete;
+        ICUConverter& operator= (ICUConverter&&) = delete;
+
+        constexpr auto const& charset() const noexcept { return m_charset; }
+        constexpr auto& decoder() noexcept { return m_decoder; }
+
+        auto charset_converter() noexcept { return m_charset_converter.get(); }
+        auto u32_converter() noexcept     { return m_u32_converter.get();     }
+        auto u8_converter() noexcept      { return m_u8_converter.get();      }
+
+        std::string convert(char const* data,
+                            size_t length);
+
+private:
+        std::string m_charset;
+        converter_shared_type m_charset_converter;
+        converter_shared_type m_u32_converter;
+        converter_shared_type m_u8_converter;
+        vte::base::ICUDecoder m_decoder;
+
+        /* Note that m_decoder will share m_charset_converter and only use it in the
+         * toUnicode direction; and m_u32_decoder, and will use that only in the
+         * fromUnicode direction.
+         * convert() will only use m_charset_converter in the fromUnicode direction.
+         */
+};
+
+} // namespace vte::base
diff --git a/src/icu-decoder.cc b/src/icu-decoder.cc
new file mode 100644
index 00000000..5589da40
--- /dev/null
+++ b/src/icu-decoder.cc
@@ -0,0 +1,151 @@
+/*
+ * Copyright © 2019 Christian Persch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <glib.h>
+
+#include <cassert>
+#include <memory>
+
+#include "icu-decoder.hh"
+
+namespace vte::base {
+
+/*
+ * ICUDecoder::decode:
+ * @sptr: inout pointer to input data
+ * @flush: whether to flush
+ *
+ * Decodes input, and advances *@sptr for input consumed. At most
+ * one byte of input is consumed; if flushing, no input is consumed.
+ *
+ * Returns: whether there is an output character available
+ */
+ICUDecoder::Result
+ICUDecoder::decode(uint8_t const** sptr,
+                   bool flush) noexcept
+{
+        switch (m_state) {
+        case State::eOutput:
+                if (++m_index < m_available)
+                        return Result::eSomething;
+
+                m_state = State::eInput;
+                [[fallthrough]];
+        case State::eInput: {
+                /* Convert in two stages from charset to UTF-32, pivoting through UTF-16.
+                 * This is similar to ucnv_convertEx(), but that API does not fit our
+                 * requirements completely.
+                 *
+                 * This function is similar to ucnv_getNextUChar, except that it works
+                 * with streaming (and thus may produce no output in some steps), while
+                 * ucnv_getNextUChar does not.
+                 */
+
+                auto source_ptr = reinterpret_cast<char const**>(sptr);
+                auto source_start = *source_ptr;
+                auto source_limit = source_start + (1 - flush);
+
+                auto target_u16_start = u16_buffer();
+                auto target_u16_limit = u16_buffer_end();
+                auto target_u16 = target_u16_start;
+
+                m_err.reset();
+                ucnv_toUnicode(m_charset_converter.get(),
+                               &target_u16, target_u16_limit,
+                               source_ptr, source_limit,
+                               nullptr /* offsets */,
+                               flush,
+                               m_err);
+
+                /* There should be no error here. We use the default callback
+                 * which replaces invalid input with replacment character (either
+                 * U+FFFD or SUB), and we should never hit U_BUFFER_OVERFLOW_ERROR,
+                 * since we process only one input byte at a time and the output
+                 * buffer holds at most 1 UTF-16 character (a high surrogate), and
+                 * there are no charsets where the state is so deep as to suddently
+                 * output 32 characters.
+                 */
+                if (m_err.isFailure()) {
+                        m_state = State::eError;
+                        return Result::eError;
+                }
+
+                /* Now convert from UTF-16 to UTF-32. There will be no overflow here
+                 * either, since the output buffer is empty, and for each UTF-16 code
+                 * point of input, the decoder will output at most one UTF-32 code
+                 * point.
+                 */
+
+                auto target_u32_start = reinterpret_cast<char*>(u32_buffer());
+                auto target_u32_limit = reinterpret_cast<char const*>(u32_buffer_end());
+                auto target_u32 = target_u32_start;
+                auto target_u16_u32_start = const_cast<char16_t const*>(target_u16_start);
+                auto target_u16_u32_limit = target_u16;
+
+                ucnv_fromUnicode(m_u32_converter.get(),
+                                 &target_u32, target_u32_limit,
+                                 &target_u16_u32_start, target_u16_u32_limit,
+                                 nullptr /* offsets */,
+                                 flush,
+                                 m_err);
+                if (m_err.isFailure()) {
+                        m_state = State::eError;
+                        return Result::eError;
+                }
+
+                if (target_u32 == target_u32_start) {
+                        if (*source_ptr == source_start && !flush) {
+                                /* The decoder produced neither output nor consumed input, and
+                                 * wan't flushing? That shouldn't happen; go to error state,
+                                 * requiring an explicit reset() to proceed further.
+                                 */
+                                m_state = State::eError;
+                                return Result::eError;
+                        }
+
+                        return Result::eNothing;
+                }
+
+                /* We have some output! */
+                assert((target_u32 - target_u32_start) % sizeof(m_u32_buffer[0]) == 0);
+                m_available = (target_u32 - target_u32_start) / sizeof(m_u32_buffer[0]);
+                assert(m_available >= 1);
+
+                m_index = 0;
+                return Result::eSomething;
+        }
+
+        case State::eError:
+        default:
+                return Result::eError;
+        }
+}
+
+void
+ICUDecoder::reset() noexcept
+{
+        ucnv_resetToUnicode(m_charset_converter.get());
+        ucnv_resetFromUnicode(m_u32_converter.get());
+        m_err.reset();
+        m_state = State::eInput;
+        m_available = 0;
+        m_index = 0;
+}
+
+} // namespace vte::base
diff --git a/src/icu-decoder.hh b/src/icu-decoder.hh
new file mode 100644
index 00000000..8c4eada1
--- /dev/null
+++ b/src/icu-decoder.hh
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2019 Christian Persch
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <memory>
+
+#include <unicode/errorcode.h>
+#include <unicode/ucnv.h>
+
+namespace vte::base {
+
+/*
+ * vte::base::Decoder:
+ *
+ * Converts input from any ICU-supported charset to UTF-32, one input byte at a time.
+ */
+class ICUDecoder {
+public:
+
+        using converter_shared_type = std::shared_ptr<UConverter>;
+
+        ICUDecoder(converter_shared_type charset_converter,
+                   converter_shared_type u32_converter)
+                : m_charset_converter{charset_converter},
+                  m_u32_converter{u32_converter}
+        { }
+
+        ~ICUDecoder() noexcept { }
+
+        ICUDecoder(ICUDecoder const&) = delete;
+        ICUDecoder(ICUDecoder&&) = delete;
+
+        ICUDecoder& operator=(ICUDecoder const&) = delete;
+        ICUDecoder& operator=(ICUDecoder&&) = delete;
+
+        /*
+         * eNothing: there is no output character available
+         * eSomething: there is an output character available
+         * eError: an error occurred; you must call reset()
+         */
+        enum class Result {
+                eNothing   = 0,
+                eSomething = 1,
+                eError     = 2,
+        };
+
+        constexpr auto const& error() const noexcept { return m_err; }
+
+        constexpr auto codepoint() const noexcept { return m_u32_buffer[m_index]; }
+
+        Result decode(uint8_t const** sptr,
+                      bool flush = false) noexcept;
+
+        void reset() noexcept;
+
+private:
+        enum class State {
+                eInput  = 0,
+                eOutput = 1,
+                eError  = 2,
+        };
+
+        State m_state{State::eInput};
+
+        converter_shared_type m_charset_converter;
+        converter_shared_type m_u32_converter;
+
+        icu::ErrorCode m_err{};
+
+        int m_available{0}; /* how many output characters are available */
+        int m_index{0};     /* index of current output character in m_u32_buffer */
+
+        /* 32 is large enough to avoid UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH,
+         * see comment in icu4c/source/common/ucnv.cpp:ucnv_convertEx().
+         */
+        char32_t m_u32_buffer[32];
+        char16_t m_u16_buffer[32];
+
+        constexpr auto u16_buffer() noexcept { return &m_u16_buffer[0]; }
+        constexpr auto u32_buffer() noexcept { return &m_u32_buffer[0]; }
+
+        constexpr auto u16_buffer_end() const noexcept { return &m_u16_buffer[0] + 32; }
+        constexpr auto u32_buffer_end() const noexcept { return &m_u32_buffer[0] + 32; }
+
+}; // class ICUDecoder
+
+} // namespace vte::base
diff --git a/src/meson.build b/src/meson.build
index 4b2f19b9..78ee6341 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -28,6 +28,15 @@ debug_sources = files(
   'debug.h',
 )
 
+icu_sources = files(
+  'icu-converter.cc',
+  'icu-converter.hh',
+  'icu-decoder.cc',
+  'icu-decoder.hh',
+  'icu-glue.cc',
+  'icu-glue.hh',
+)
+
 modes_sources = files(
   'modes-ecma.hh',
   'modes-private.hh',
@@ -125,6 +134,10 @@ if get_option('a11y')
   libvte_common_sources += a11y_sources
 endif
 
+if get_option('icu')
+  libvte_common_sources += icu_sources
+endif
+
 libvte_common_doc_sources = files(
   # These file contain gtk-doc comments to be extracted for docs and gir
   'pty.cc',
@@ -167,6 +180,7 @@ libvte_common_public_deps = [
 libvte_common_deps = libvte_common_public_deps + [
   fribidi_dep,
   gnutls_dep,
+  icu_dep,
   pcre2_dep,
   libm_dep,
   pthreads_dep,
@@ -223,7 +237,22 @@ endif
 
 ## Tests
 
-# cat
+# decoder cat
+
+decoder_cat_sources = icu_sources + utf8_sources + debug_sources + files(
+  'decoder-cat.cc'
+)
+
+decoder_cat = executable(
+  'decoder-cat',
+  decoder_cat_sources,
+  dependencies: [glib_dep, icu_dep,],
+  cpp_args: [],
+  include_directories: top_inc,
+  install: false,
+)
+
+# parser cat
 
 parser_cat_sources = parser_sources + utf8_sources + debug_sources + files(
   'parser-cat.cc'
@@ -232,7 +261,7 @@ parser_cat_sources = parser_sources + utf8_sources + debug_sources + files(
 parser_cat = executable(
   'parser-cat',
   parser_cat_sources,
-  dependencies: [glib_dep],
+  dependencies: [glib_dep,],
   cpp_args: ['-DPARSER_INCLUDE_NOP'],
   include_directories: top_inc,
   install: false,
diff --git a/src/parser-cat.cc b/src/parser-cat.cc
index 9ba4e214..dbccedab 100644
--- a/src/parser-cat.cc
+++ b/src/parser-cat.cc
@@ -358,7 +358,8 @@ private:
         void
         printout() noexcept
         {
-                g_print("%s\n", m_str.c_str());
+                m_str.push_back('\n');
+                write(STDOUT_FILENO, m_str.data(), m_str.size());
                 m_str.clear();
         }
 
@@ -747,7 +748,8 @@ public:
                                 }
                                 if (fd != -1) {
                                         r = process_file(fd, repeat, func);
-                                        close(fd);
+                                        if (fd != STDIN_FILENO)
+                                                close(fd);
                                         if (!r)
                                                 break;
                                 }
diff --git a/src/utf8-test.cc b/src/utf8-test.cc
index 975c5b74..4fae7f8e 100644
--- a/src/utf8-test.cc
+++ b/src/utf8-test.cc
@@ -56,9 +56,8 @@ decode(uint8_t const* in,
         decoder.reset();
 
         auto const iend = in + len;
-        uint32_t state = UTF8Decoder::ACCEPT;
         for (auto iptr = in; iptr < iend; ++iptr) {
-                switch ((state = decoder.decode(*iptr))) {
+                switch (decoder.decode(*iptr)) {
                 case vte::base::UTF8Decoder::REJECT_REWIND:
                         /* Note that this will never lead to a loop, since in the
                          * next round this byte *will* be consumed.
@@ -67,7 +66,6 @@ decode(uint8_t const* in,
                         [[fallthrough]];
                 case vte::base::UTF8Decoder::REJECT:
                         decoder.reset();
-                        state = UTF8Decoder::ACCEPT;
                         /* Fall through to insert the U+FFFD replacement character. */
                         [[fallthrough]];
                 case vte::base::UTF8Decoder::ACCEPT:
@@ -82,8 +80,8 @@ decode(uint8_t const* in,
          * we need to insert a replacement character since we're
          * aborting a sequence mid-way.
          */
-        if (state != UTF8Decoder::ACCEPT) {
-                out.push_back(0xfffdu);
+        if (decoder.flush()) {
+                out.push_back(decoder.codepoint());
         }
 }
 
diff --git a/src/utf8.hh b/src/utf8.hh
index 8b90bd75..108ca824 100644
--- a/src/utf8.hh
+++ b/src/utf8.hh
@@ -67,6 +67,13 @@ public:
                 m_codepoint = 0xfffdU;
         }
 
+        inline bool flush() noexcept {
+                auto state = m_state;
+                if (m_state != ACCEPT)
+                        reset();
+                return state != m_state;
+        }
+
 private:
         uint32_t m_state{ACCEPT};
         uint32_t m_codepoint{0};
diff --git a/src/vte.cc b/src/vte.cc
index 54e8a587..960b041b 100644
--- a/src/vte.cc
+++ b/src/vte.cc
@@ -728,8 +728,11 @@ Terminal::emit_selection_changed()
 /* Emit a "commit" signal. */
 void
 Terminal::emit_commit(char const* text,
-                                gssize length)
+                      gssize length)
 {
+        if (length == 0)
+                return;
+
        char const* result = NULL;
        char *wrapped = NULL;
 
@@ -746,6 +749,23 @@ Terminal::emit_commit(char const* text,
                wrapped[length] = '\0';
        }
 
+        _VTE_DEBUG_IF(VTE_DEBUG_KEYBOARD) {
+                for (gssize i = 0; i < length; i++) {
+                        if ((((guint8) result[i]) < 32) ||
+                            (((guint8) result[i]) > 127)) {
+                                g_printerr(
+                                           "Sending <%02x> "
+                                           "to child.\n",
+                                           result[i]);
+                        } else {
+                                g_printerr(
+                                           "Sending '%c' "
+                                           "to child.\n",
+                                           result[i]);
+                        }
+                }
+        }
+
        g_signal_emit(m_terminal, signals[SIGNAL_COMMIT], 0, result, (guint)length);
 
        if(wrapped)
@@ -1949,97 +1969,53 @@ Terminal::maybe_scroll_to_bottom()
 
 /*
  * Terminal::set_encoding:
- * @codeset: (allow-none): a valid #GIConv target, or %NULL to use UTF-8
+ * @charset: (allow-none): target charset, or %NULL to use UTF-8
  *
  * Changes the encoding the terminal will expect data from the child to
- * be encoded with.  For certain terminal types, applications executing in the
- * terminal can change the encoding. If @codeset is %NULL, it uses "UTF-8".
+ * be encoded with.  If @charset is %NULL, it uses "UTF-8".
  *
  * Returns: %true if the encoding could be changed to the specified one
  */
 bool
-Terminal::set_encoding(char const* codeset)
+Terminal::set_encoding(char const* charset,
+                       GError** error)
 {
-#ifdef WITH_ICONV
-       if (codeset == nullptr) {
-                codeset = "UTF-8";
-       }
+#ifdef WITH_ICU
+        auto const using_utf8 = bool{charset == nullptr || g_ascii_strcasecmp(charset, "UTF-8") == 0};
+        auto const syntax = using_utf8 ? DataSyntax::eECMA48_UTF8 : DataSyntax::eECMA48_PCTERM;
+
+        if (syntax == data_syntax())
+                return true;
+
+        /* Note: we DON'T convert any pending output from the previous charset to
+         * the new charset, since that is in general not possible without loss, and
+         * also the output may include binary data (Terminal::feed_child_binary()).
+         * So we just clear the outgoing queue. (FIXMEchpe: instead, we could flush
+         * the outgooing and only change charsets once it's empty.)
+         * Do not clear the incoming queue.
+         */
 
-        bool const using_utf8 = g_str_equal(codeset, "UTF-8");
+        _vte_byte_array_clear(m_outgoing);
 
         if (using_utf8) {
-                if (m_incoming_conv != ((GIConv)-1))
-                        g_iconv_close(m_incoming_conv);
-                if (m_outgoing_conv != ((GIConv)-1))
-                        g_iconv_close(m_outgoing_conv);
-                m_incoming_conv = (GIConv)-1;
-                m_outgoing_conv = (GIConv)-1;
+                m_converter.reset();
         } else {
-                auto outconv = g_iconv_open(codeset, "UTF-8");
-                if (outconv == ((GIConv)-1))
+                m_converter = vte::base::ICUConverter::make(charset, error);
+                if (!m_converter)
                         return false;
-
-                auto inconv = g_iconv_open("UTF-8", codeset);
-                if (inconv == ((GIConv)-1)) {
-                        g_iconv_close(outconv);
-                        return FALSE;
-                }
-
-                if (m_outgoing_conv != ((GIConv)-1)) {
-                        g_iconv_close(m_outgoing_conv);
-                }
-                m_outgoing_conv = outconv; /* adopted */
-
-                if (m_incoming_conv != ((GIConv)-1)) {
-                        g_iconv_close(m_incoming_conv);
-                }
-                m_incoming_conv = inconv; /* adopted */
-
-                /* Set the terminal's encoding to the new value. */
-                auto old_codeset = m_encoding ? m_encoding : "UTF-8";
-                m_encoding = g_intern_string(codeset);
-
-                /* Convert any buffered output bytes. */
-                if ((_vte_byte_array_length(m_outgoing) > 0) &&
-                    (old_codeset != nullptr)) {
-                        char *obuf1, *obuf2;
-                        gsize bytes_written;
-
-                        /* Convert back to UTF-8. */
-                        obuf1 = g_convert((char *)m_outgoing->data,
-                                          _vte_byte_array_length(m_outgoing),
-                                          "UTF-8",
-                                          old_codeset,
-                                          NULL,
-                                          &bytes_written,
-                                          NULL);
-                        if (obuf1 != NULL) {
-                                /* Convert to the new encoding. */
-                                obuf2 = g_convert(obuf1,
-                                                  bytes_written,
-                                                  codeset,
-                                                  "UTF-8",
-                                                  NULL,
-                                                  &bytes_written,
-                                                  NULL);
-                                if (obuf2 != NULL) {
-                                        _vte_byte_array_clear(m_outgoing);
-                                        _vte_byte_array_append(m_outgoing,
-                                                               obuf2, bytes_written);
-                                        g_free(obuf2);
-                                }
-                                g_free(obuf1);
-                        }
-                }
         }
 
-        m_using_utf8 = using_utf8;
+        m_data_syntax = syntax;
+        reset_decoder();
+
+        if (pty())
+                pty()->set_utf8(using_utf8);
 
        _vte_debug_print(VTE_DEBUG_IO,
-                       "Set terminal encoding to `%s'.\n",
-                       m_encoding);
+                         "Set terminal encoding to `%s'.\n",
+                         encoding());
        _vte_debug_print(VTE_DEBUG_SIGNALS,
-                       "Emitting `encoding-changed'.\n");
+                         "Emitting `encoding-changed'.\n");
 
         GObject *object = G_OBJECT(m_terminal);
        g_signal_emit(object, signals[SIGNAL_ENCODING_CHANGED], 0);
@@ -3195,6 +3171,10 @@ Terminal::connect_pty_write()
 
         g_warn_if_fail(m_input_enabled);
 
+        /* Anything to write? */
+        if (_vte_byte_array_length(m_outgoing) == 0)
+                return;
+
         /* Do one write. FIXMEchpe why? */
         if (!pty_io_write (pty()->fd(), G_IO_OUT))
                 return;
@@ -3331,178 +3311,295 @@ Terminal::im_reset()
         }
 }
 
-#ifdef WITH_ICONV
+void
+Terminal::process_incoming()
+{
+        switch (data_syntax()) {
+        case DataSyntax::eECMA48_UTF8:   process_incoming_utf8();    break;
+#ifdef WITH_ICU
+        case DataSyntax::eECMA48_PCTERM: process_incoming_pcterm(); break;
+#endif
+        default: g_assert_not_reached(); break;
+        }
+}
+
 
-static size_t
-_vte_conv(GIConv conv,
-         char **inbuf, gsize *inbytes_left,
-         gchar **outbuf, gsize *outbytes_left)
+/* Note that this code is mostly copied to process_incoming_pcterm() below; any non-charset-decoding
+ * related changes made here need to be made there, too.
+ * FIXMEchpe: refactor this to share more code with process_incoming_pcterm().
+ */
+void
+Terminal::process_incoming_utf8()
 {
-       size_t ret, tmp;
-       gchar *work_inbuf_start, *work_inbuf_working;
-       gchar *work_outbuf_start, *work_outbuf_working;
-       gsize work_inbytes, work_outbytes;
+       VteVisualPosition saved_cursor;
+       gboolean saved_cursor_visible;
+        VteCursorStyle saved_cursor_style;
+        vte::grid::row_t bbox_top, bbox_bottom;
+       gboolean modified, bottom;
+       gboolean invalidated_text;
+       gboolean in_scroll_region;
+
+       _vte_debug_print(VTE_DEBUG_IO,
+                         "Handler processing %" G_GSIZE_FORMAT " bytes over %" G_GSIZE_FORMAT " chunks.\n",
+                         m_input_bytes,
+                         m_incoming_queue.size());
+       _vte_debug_print (VTE_DEBUG_WORK, "(");
 
-       g_assert(conv != (GIConv) -1);
+        auto previous_screen = m_screen;
 
-       work_inbuf_start = work_inbuf_working = *inbuf;
-       work_outbuf_start = work_outbuf_working = *outbuf;
-       work_inbytes = *inbytes_left;
-       work_outbytes = *outbytes_left;
+        bottom = m_screen->insert_delta == (long)m_screen->scroll_delta;
 
-       /* Call the underlying conversion. */
-       ret = 0;
-       do {
-               tmp = g_iconv(conv,
-                                        &work_inbuf_working,
-                                        &work_inbytes,
-                                        &work_outbuf_working,
-                                        &work_outbytes);
-               if (tmp == (size_t) -1) {
-                       /* Check for zero bytes, which we pass right through. */
-                       if (errno == EILSEQ) {
-                               if ((work_inbytes > 0) &&
-                                   (work_inbuf_working[0] == '\0') &&
-                                   (work_outbytes > 0)) {
-                                       work_outbuf_working[0] = '\0';
-                                       work_outbuf_working++;
-                                       work_inbuf_working++;
-                                       work_outbytes--;
-                                       work_inbytes--;
-                                       ret++;
-                               } else {
-                                       /* No go. */
-                                       ret = -1;
-                                       break;
-                               }
-                       } else {
-                               ret = -1;
-                               break;
-                       }
-               } else {
-                       ret += tmp;
-                       break;
-               }
-       } while (work_inbytes > 0);
+       /* Save the current cursor position. */
+        saved_cursor = m_screen->cursor;
+       saved_cursor_visible = m_modes_private.DEC_TEXT_CURSOR();
+        saved_cursor_style = m_cursor_style;
 
-       /* We can't handle this particular failure, and it should
-        * never happen.  (If it does, our caller needs fixing.)  */
-       g_assert((ret != (size_t)-1) || (errno != E2BIG));
+        in_scroll_region = m_scrolling_restricted
+            && (m_screen->cursor.row >= (m_screen->insert_delta + m_scrolling_region.start))
+            && (m_screen->cursor.row <= (m_screen->insert_delta + m_scrolling_region.end));
 
-        /* Pass on the output results. */
-        *outbuf = work_outbuf_working;
-        *outbytes_left -= (work_outbuf_working - work_outbuf_start);
+       /* We should only be called when there's data to process. */
+       g_assert(!m_incoming_queue.empty());
 
-        /* Pass on the input results. */
-        *inbuf = work_inbuf_working;
-        *inbytes_left -= (work_inbuf_working - work_inbuf_start);
+       modified = FALSE;
+       invalidated_text = FALSE;
 
-       return ret;
-}
+        bbox_bottom = -G_MAXINT;
+        bbox_top = G_MAXINT;
+
+        vte::parser::Sequence seq{m_parser};
+
+        m_line_wrapped = false;
+
+        size_t bytes_processed = 0;
 
-void
-Terminal::convert_incoming() noexcept
-{
-        /* This is for legacy applications, so efficiency is not
-         * of any concern. Flatten the chunks into one big buffer,
-         * process that, and put the resulting UTF-8 back into
-         * chunks.
-         */
-        auto buf = _vte_byte_array_new();
-        _vte_byte_array_append(buf, m_incoming_leftover->data, m_incoming_leftover->len);
-        _vte_byte_array_clear(m_incoming_leftover);
         while (!m_incoming_queue.empty()) {
-                auto chunk = m_incoming_queue.front().get();
-                _vte_byte_array_append(buf, chunk->data, chunk->len);
+                auto chunk = std::move(m_incoming_queue.front());
                 m_incoming_queue.pop();
-        }
 
-        /* Convert the data to UTF-8 */
-        auto inbuf = (char*)buf->data;
-        size_t inbytes = buf->len;
+                g_assert_nonnull(chunk.get());
 
-        _VTE_DEBUG_IF(VTE_DEBUG_IO) {
-                _vte_debug_hexdump("Incoming buffer before conversion to UTF-8",
-                                   (uint8_t const*)inbuf, inbytes);
-        }
+                _VTE_DEBUG_IF(VTE_DEBUG_IO) {
+                        _vte_debug_hexdump("Incoming buffer", chunk->data, chunk->len);
+                }
 
-        auto unibuf = _vte_byte_array_new();
-        _vte_byte_array_set_minimum_size(unibuf, VTE_UTF8_BPC * inbytes);
-        auto outbuf = (char*)unibuf->data;
-        size_t outbytes = unibuf->len;
+                bytes_processed += chunk->len;
 
-        bool stop = false;
-        do {
-                auto converted = _vte_conv(m_incoming_conv,
-                                           &inbuf, &inbytes,
-                                           &outbuf, &outbytes);
-                switch (converted) {
-                case ((gsize)-1):
-                        switch (errno) {
-                        case EILSEQ: {
-                                /* Munge the input. */
-                                inbuf++;
-                                inbytes--;
-                                auto l = g_unichar_to_utf8(0xfffdU, (char*)outbuf);
-                                outbuf += l;
-                                outbytes -= l;
+                auto const* ip = chunk->data;
+                auto const* iend = chunk->data + chunk->len;
+
+                for ( ; ip < iend; ++ip) {
+
+                        switch (m_utf8_decoder.decode(*ip)) {
+                        case vte::base::UTF8Decoder::REJECT_REWIND:
+                                /* Rewind the stream.
+                                 * Note that this will never lead to a loop, since in the
+                                 * next round this byte *will* be consumed.
+                                 */
+                                --ip;
+                                [[fallthrough]];
+                        case vte::base::UTF8Decoder::REJECT:
+                                m_utf8_decoder.reset();
+                                /* Fall through to insert the U+FFFD replacement character. */
+                                [[fallthrough]];
+                        case vte::base::UTF8Decoder::ACCEPT: {
+                                auto rv = m_parser.feed(m_utf8_decoder.codepoint());
+                                if (G_UNLIKELY(rv < 0)) {
+#ifdef DEBUG
+                                        uint32_t c = m_utf8_decoder.codepoint();
+                                        char c_buf[7];
+                                        g_snprintf(c_buf, sizeof(c_buf), "%lc", c);
+                                        char const* wp_str = g_unichar_isprint(c) ? c_buf : 
_vte_debug_sequence_to_string(c_buf, -1);
+                                        _vte_debug_print(VTE_DEBUG_PARSER, "Parser error on U+%04X [%s]!\n",
+                                                         c, wp_str);
+#endif
+                                        break;
+                                }
+
+#ifdef VTE_DEBUG
+                                if (rv != VTE_SEQ_NONE)
+                                        g_assert((bool)seq);
+#endif
+
+                                _VTE_DEBUG_IF(VTE_DEBUG_PARSER) {
+                                        if (rv != VTE_SEQ_NONE) {
+                                                seq.print();
+                                        }
+                                }
+
+                                // FIXMEchpe this assumes that the only handler inserting
+                                // a character is GRAPHIC, which isn't true (at least ICH, REP, SUB
+                                // also do, and invalidate directly for now)...
+
+                                switch (rv) {
+                                case VTE_SEQ_GRAPHIC: {
+
+                                        bbox_top = std::min(bbox_top,
+                                                            m_screen->cursor.row);
+
+                                        // does insert_char(c, false, false)
+                                        GRAPHIC(seq);
+                                        _vte_debug_print(VTE_DEBUG_PARSER,
+                                                         "Last graphic is now U+%04X %lc\n",
+                                                         m_last_graphic_character,
+                                                         g_unichar_isprint(m_last_graphic_character) ? 
m_last_graphic_character : 0xfffd);
+
+                                        if (m_line_wrapped) {
+                                                m_line_wrapped = false;
+                                                /* line wrapped, correct bbox */
+                                                if (invalidated_text &&
+                                                    (m_screen->cursor.row > bbox_bottom + 
VTE_CELL_BBOX_SLACK ||
+                                                     m_screen->cursor.row < bbox_top - VTE_CELL_BBOX_SLACK)) 
{
+                                                        invalidate_rows_and_context(bbox_top, bbox_bottom);
+                                                        bbox_bottom = -G_MAXINT;
+                                                        bbox_top = G_MAXINT;
+                                                }
+                                                bbox_top = std::min(bbox_top,
+                                                                    m_screen->cursor.row);
+                                        }
+                                        /* Add the cells over which we have moved to the region
+                                         * which we need to refresh for the user. */
+                                        bbox_bottom = std::max(bbox_bottom,
+                                                               m_screen->cursor.row);
+                                        invalidated_text = TRUE;
+
+                                        /* We *don't* emit flush pending signals here. */
+                                        modified = TRUE;
+
+                                        break;
+                                }
+
+                                case VTE_SEQ_NONE:
+                                case VTE_SEQ_IGNORE:
+                                        break;
+
+                                default: {
+                                        switch (seq.command()) {
+#define _VTE_CMD(cmd)   case VTE_CMD_##cmd: cmd(seq); break;
+#define _VTE_NOP(cmd)
+#include "parser-cmd.hh"
+#undef _VTE_CMD
+#undef _VTE_NOP
+                                        default:
+                                                _vte_debug_print(VTE_DEBUG_PARSER,
+                                                                 "Unknown parser command %d\n", 
seq.command());
+                                                break;
+                                        }
+
+                                        m_last_graphic_character = 0;
+
+                                        modified = TRUE;
+
+                                        // FIXME m_screen may be != previous_screen, check for that!
+
+                                        gboolean new_in_scroll_region = m_scrolling_restricted
+                                                && (m_screen->cursor.row >= (m_screen->insert_delta + 
m_scrolling_region.start))
+                                                && (m_screen->cursor.row <= (m_screen->insert_delta + 
m_scrolling_region.end));
+
+                                        /* if we have moved greatly during the sequence handler, or moved
+                                         * into a scroll_region from outside it, restart the bbox.
+                                         */
+                                        if (invalidated_text &&
+                                            ((new_in_scroll_region && !in_scroll_region) ||
+                                             (m_screen->cursor.row > bbox_bottom + VTE_CELL_BBOX_SLACK ||
+                                              m_screen->cursor.row < bbox_top - VTE_CELL_BBOX_SLACK))) {
+                                                invalidate_rows_and_context(bbox_top, bbox_bottom);
+                                                invalidated_text = FALSE;
+                                                bbox_bottom = -G_MAXINT;
+                                                bbox_top = G_MAXINT;
+                                        }
+
+                                        in_scroll_region = new_in_scroll_region;
+
+                                        break;
+                                }
+                                }
                                 break;
                         }
-                        case EINVAL:
-                                /* Incomplete. Save for later. */
-                                stop = true;
-                                break;
-                        case E2BIG:
-                                /* Should never happen. */
-                                g_assert_not_reached();
-                                break;
-                        default:
-                                /* Should never happen. */
-                                g_assert_not_reached();
-                                break;
                         }
-                default:
-                        break;
                 }
-        } while ((inbytes > 0) && !stop);
+        }
 
-        /* FIXMEchpe this code used to skip NUL bytes,
-         * while the _vte_conv call passes NUL bytes through
-         * specifically. What's goint on!?
-         */
+#ifdef VTE_DEBUG
+               /* Some safety checks: ensure the visible parts of the buffer
+                * are all in the buffer. */
+               g_assert_cmpint(m_screen->insert_delta, >=, _vte_ring_delta(m_screen->row_data));
 
-        /* Done. */
-        auto processed = buf->len - inbytes;
-        unibuf->len = unibuf->len - outbytes;
+               /* The cursor shouldn't be above or below the addressable
+                * part of the display buffer. */
+                g_assert_cmpint(m_screen->cursor.row, >=, m_screen->insert_delta);
+#endif
 
-        /* If anything is left unconverted, store it for the next processing round. */
-        if (processed != buf->len) {
-                _vte_byte_array_append(m_incoming_leftover,
-                                       buf->data + processed,
-                                       buf->len - processed);
-        }
+       if (modified) {
+               /* Keep the cursor on-screen if we scroll on output, or if
+                * we're currently at the bottom of the buffer. */
+               update_insert_delta();
+               if (m_scroll_on_output || bottom) {
+                       maybe_scroll_to_bottom();
+               }
+               /* Deselect the current selection if its contents are changed
+                * by this insertion. */
+                if (!m_selection_resolved.empty()) {
+                        //FIXMEchpe: this is atrocious
+                       auto selection = get_selected_text();
+                       if ((selection == nullptr) ||
+                           (m_selection[VTE_SELECTION_PRIMARY] == nullptr) ||
+                           (strcmp(selection->str, m_selection[VTE_SELECTION_PRIMARY]->str) != 0)) {
+                               deselect_all();
+                       }
+                        if (selection)
+                                g_string_free(selection, TRUE);
+               }
+       }
 
-        auto outlen = unibuf->len;
-        while (outlen > 0) {
-                outbuf = (char*)unibuf->data;
-                while (outlen > 0) {
-                        m_incoming_queue.push(vte::base::Chunk::get());
-                        auto chunk = m_incoming_queue.back().get();
-                        auto len = std::min(size_t(outlen), chunk->capacity());
-                        memcpy(chunk->data, outbuf, len);
-                        chunk->len = len;
-                        outbuf += len;
-                        outlen -= len;
-                }
+       if (modified || (m_screen != previous_screen)) {
+                m_ringview.invalidate();
+               /* Signal that the visible contents changed. */
+               queue_contents_changed();
+       }
 
-                g_assert_cmpuint(outlen, ==, 0);
-        }
+       emit_pending_signals();
+
+       if (invalidated_text) {
+                invalidate_rows_and_context(bbox_top, bbox_bottom);
+       }
+
+        if ((saved_cursor.col != m_screen->cursor.col) ||
+            (saved_cursor.row != m_screen->cursor.row)) {
+               /* invalidate the old and new cursor positions */
+               if (saved_cursor_visible)
+                        invalidate_row(saved_cursor.row);
+               invalidate_cursor_once();
+               check_cursor_blink();
+               /* Signal that the cursor moved. */
+               queue_cursor_moved();
+        } else if ((saved_cursor_visible != m_modes_private.DEC_TEXT_CURSOR()) ||
+                   (saved_cursor_style != m_cursor_style)) {
+                invalidate_row(saved_cursor.row);
+               check_cursor_blink();
+       }
+
+       /* Tell the input method where the cursor is. */
+        im_update_cursor();
+
+        /* After processing some data, do a hyperlink GC. The multiplier is totally arbitrary, feel free to 
fine tune. */
+        _vte_ring_hyperlink_maybe_gc(m_screen->row_data, bytes_processed * 8);
+
+       _vte_debug_print (VTE_DEBUG_WORK, ")");
+       _vte_debug_print (VTE_DEBUG_IO,
+                          "%" G_GSIZE_FORMAT " bytes in %" G_GSIZE_FORMAT " chunks left to process.\n",
+                          m_input_bytes,
+                          m_incoming_queue.size());
 }
 
-#endif /* WITH_ICONV */
+#ifdef WITH_ICU
 
+/* Note that this is mostly a copy of process_incoming_utf8() above; any non-charset-decoding
+ * related changes made here need to be made there, too.
+ * FIXMEchpe: refactor this to share more code with process_incoming_utf8().
+ */
 void
-Terminal::process_incoming()
+Terminal::process_incoming_pcterm()
 {
        VteVisualPosition saved_cursor;
        gboolean saved_cursor_visible;
@@ -3534,14 +3631,6 @@ Terminal::process_incoming()
        /* We should only be called when there's data to process. */
        g_assert(!m_incoming_queue.empty());
 
-#ifdef WITH_ICONV
-        /* If we're using a legacy encoding for I/O, we need to
-         * convert the input to UTF-8 now.
-         */
-        if (G_UNLIKELY(!m_using_utf8))
-                convert_incoming();
-#endif
-
        modified = FALSE;
        invalidated_text = FALSE;
 
@@ -3554,6 +3643,8 @@ Terminal::process_incoming()
 
         size_t bytes_processed = 0;
 
+        auto& decoder = m_converter->decoder();
+
         while (!m_incoming_queue.empty()) {
                 auto chunk = std::move(m_incoming_queue.front());
                 m_incoming_queue.pop();
@@ -3569,25 +3660,15 @@ Terminal::process_incoming()
                 auto const* ip = chunk->data;
                 auto const* iend = chunk->data + chunk->len;
 
-                for ( ; ip < iend; ++ip) {
+                auto flush = bool{false};
+                while (ip < iend || flush) {
 
-                        switch (m_utf8_decoder.decode(*ip)) {
-                        case vte::base::UTF8Decoder::REJECT_REWIND:
-                                /* Rewind the stream.
-                                 * Note that this will never lead to a loop, since in the
-                                 * next round this byte *will* be consumed.
-                                 */
-                                --ip;
-                                [[fallthrough]];
-                        case vte::base::UTF8Decoder::REJECT:
-                                m_utf8_decoder.reset();
-                                /* Fall through to insert the U+FFFD replacement character. */
-                                [[fallthrough]];
-                        case vte::base::UTF8Decoder::ACCEPT: {
-                                auto rv = m_parser.feed(m_utf8_decoder.codepoint());
+                        switch (decoder.decode(&ip, flush)) {
+                        case vte::base::ICUDecoder::Result::eSomething: {
+                                auto rv = m_parser.feed(decoder.codepoint());
                                 if (G_UNLIKELY(rv < 0)) {
-#ifdef DEBUG
-                                        uint32_t c = m_utf8_decoder.codepoint();
+#ifdef VTE_DEBUG
+                                        uint32_t c = decoder.codepoint();
                                         char c_buf[7];
                                         g_snprintf(c_buf, sizeof(c_buf), "%lc", c);
                                         char const* wp_str = g_unichar_isprint(c) ? c_buf : 
_vte_debug_sequence_to_string(c_buf, -1);
@@ -3697,6 +3778,15 @@ Terminal::process_incoming()
                                 }
                                 break;
                         }
+                        case vte::base::ICUDecoder::Result::eNothing:
+                                flush = false;
+                                break;
+
+                        case vte::base::ICUDecoder::Result::eError:
+                                // FIXMEchpe do we need ++ip here?
+                                decoder.reset();
+                                break;
+
                         }
                 }
         }
@@ -3773,6 +3863,8 @@ Terminal::process_incoming()
                           m_incoming_queue.size());
 }
 
+#endif /* WITH_ICU */
+
 bool
 Terminal::pty_io_read(int const fd,
                       GIOCondition const condition)
@@ -3998,80 +4090,47 @@ Terminal::pty_io_write(int const fd,
         return _vte_byte_array_length(m_outgoing) != 0;
 }
 
-/* Convert some UTF-8 data to send to the child. */
+/* Send some UTF-8 data to the child. */
 void
 Terminal::send_child(char const* data,
                      gssize length) noexcept
 {
-       gchar *cooked;
-       long cooked_length, i;
-
         if (!m_input_enabled)
                 return;
 
         if (length == -1)
                 length = strlen(data);
+        if (length == 0)
+                return;
 
-#ifdef WITH_ICONV
-        if (m_using_utf8) {
-#endif /* WITH_ICONV */
-                cooked = (char*)data;
-                cooked_length = length;
-#ifdef WITH_ICONV
-        } else {
-                if (m_outgoing_conv == ((GIConv)-1))
-                        return;
+        /* If there's a place for it to go, add the data to the
+         * outgoing buffer. */
+        // FIXMEchpe: shouldn't require pty for this
+        if (!pty())
+                return;
 
-                gsize icount;
-                icount = length;
-                auto ibuf = (char*)data;
-                gsize ocount = ((length + 1) * VTE_UTF8_BPC) + 1;
-                _vte_byte_array_set_minimum_size(m_conv_buffer, ocount);
-                char *obuf, *obufptr;
-                obuf = obufptr = (char*)m_conv_buffer->data;
-
-                if (_vte_conv(m_outgoing_conv, &ibuf, &icount, &obuf, &ocount) == (gsize)-1) {
-                        int errsv = errno;
-                        g_warning(_("Error (%s) converting data for child, dropping."),
-                                  g_strerror(errsv));
-                        return;
-                }
+        switch (data_syntax()) {
+        case DataSyntax::eECMA48_UTF8:
+                emit_commit(data, length);
+                _vte_byte_array_append(m_outgoing, data, length);
+                break;
+
+        case DataSyntax::eECMA48_PCTERM: {
+                auto converted = m_converter->convert(data, length);
 
-                cooked = (gchar *)obufptr;
-                cooked_length = obuf - obufptr;
+                emit_commit(converted.data(), converted.size());
+                _vte_byte_array_append(m_outgoing, converted.data(), converted.size());
+                break;
         }
-#endif /* WITH_ICONV */
 
-        /* Tell observers that we're sending this to the child. */
-        if (cooked_length > 0) {
-                emit_commit(cooked, cooked_length);
+        default:
+                g_assert_not_reached();
+                return;
         }
 
-        /* If there's a place for it to go, add the data to the
-         * outgoing buffer. */
-        // FIXMEchpe: shouldn't require pty for this
-        if ((cooked_length > 0) && pty()) {
-                _vte_byte_array_append(m_outgoing, cooked, cooked_length);
-                _VTE_DEBUG_IF(VTE_DEBUG_KEYBOARD) {
-                        for (i = 0; i < cooked_length; i++) {
-                                if ((((guint8) cooked[i]) < 32) ||
-                                    (((guint8) cooked[i]) > 127)) {
-                                        g_printerr(
-                                                   "Sending <%02x> "
-                                                   "to child.\n",
-                                                   cooked[i]);
-                                } else {
-                                        g_printerr(
-                                                   "Sending '%c' "
-                                                   "to child.\n",
-                                                   cooked[i]);
-                                }
-                        }
-                }
-                /* If we need to start waiting for the child pty to
-                 * become available for writing, set that up here. */
-                connect_pty_write();
-       }
+        /* If we need to start waiting for the child pty to
+         * become available for writing, set that up here. */
+        connect_pty_write();
 }
 
 /*
@@ -7818,7 +7877,6 @@ Terminal::Terminal(vte::platform::Widget* w,
                m_palette[i].sources[VTE_COLOR_SOURCE_ESCAPE].is_set = FALSE;
 
        /* Set up I/O encodings. */
-        g_assert_true(m_using_utf8);
         m_utf8_ambiguous_width = VTE_DEFAULT_UTF8_AMBIGUOUS_WIDTH;
        m_max_input_bytes = VTE_MAX_INPUT_READ;
        m_cursor_blink_tag = 0;
@@ -7826,11 +7884,6 @@ Terminal::Terminal(vte::platform::Widget* w,
        m_outgoing = _vte_byte_array_new();
         m_last_graphic_character = 0;
 
-#ifdef WITH_ICONV
-        m_incoming_leftover = _vte_byte_array_new();
-       m_conv_buffer = _vte_byte_array_new();
-#endif
-
        /* Setting the terminal type and size requires the PTY master to
         * be set up properly first. */
         set_size(VTE_COLUMNS, VTE_ROWS);
@@ -8171,19 +8224,6 @@ Terminal::~Terminal()
                }
        }
 
-#ifdef WITH_ICONV
-       /* Free conversion descriptors. */
-       if (m_incoming_conv != ((GIConv)-1)) {
-               g_iconv_close(m_incoming_conv);
-       }
-       if (m_outgoing_conv != ((GIConv)-1)) {
-               g_iconv_close(m_outgoing_conv);
-       }
-
-       _vte_byte_array_free(m_conv_buffer);
-        _vte_byte_array_free(m_incoming_leftover);
-#endif
-
         /* Stop listening for child-exited signals. */
         if (m_reaper) {
                 g_signal_handlers_disconnect_by_func(m_reaper,
@@ -9993,6 +10033,23 @@ Terminal::set_mouse_autohide(bool autohide)
         return true;
 }
 
+void
+Terminal::reset_decoder()
+{
+        switch (data_syntax()) {
+        case DataSyntax::eECMA48_UTF8:
+                m_utf8_decoder.reset();
+                break;
+
+        case DataSyntax::eECMA48_PCTERM:
+                m_converter->decoder().reset();
+                break;
+
+        default:
+                g_assert_not_reached();
+        }
+}
+
 /*
  * Terminal::reset:
  * @clear_tabstops: whether to reset tabstops
@@ -10019,17 +10076,11 @@ Terminal::reset(bool clear_tabstops,
 
        /* Clear the output buffer. */
        _vte_byte_array_clear(m_outgoing);
-       /* Reset charset substitution state. */
 
-        m_utf8_decoder.reset();
+       /* Reset charset substitution state. */
 
-#ifdef WITH_ICONV
-        if (m_incoming_conv != ((GIConv)-1)) {
-                /* Reset the converter state */
-                g_iconv(m_incoming_conv, nullptr, nullptr, nullptr, nullptr);
-        }
-        _vte_byte_array_clear(m_incoming_leftover);
-#endif
+        /* Reset decoder */
+        reset_decoder();
 
         /* Reset parser */
         m_parser.reset();
@@ -10136,7 +10187,7 @@ Terminal::unset_pty(bool notify_widget,
         }
         stop_processing(this);
 
-        m_utf8_decoder.reset(); // FIXMEchpe necessary here?
+        reset_decoder();
 
         /* Clear the outgoing buffer as well. */
         _vte_byte_array_clear(m_outgoing);
@@ -10164,7 +10215,7 @@ Terminal::set_pty(vte::base::Pty *new_pty,
 
         set_size(m_column_count, m_row_count);
 
-        if (!pty()->set_utf8(m_using_utf8))
+        if (!pty()->set_utf8(data_syntax() == DataSyntax::eECMA48_UTF8))
                 g_warning ("Failed to set UTF8 mode: %m\n");
 
         /* Open channels to listen for input on. */
diff --git a/src/vte/vtedeprecated.h b/src/vte/vtedeprecated.h
index 0b4b3c6a..6f6f56c9 100644
--- a/src/vte/vtedeprecated.h
+++ b/src/vte/vtedeprecated.h
@@ -154,6 +154,14 @@ void vte_terminal_feed_child_binary(VteTerminal *terminal,
                                     const guint8 *data,
                                     gsize length) _VTE_GNUC_NONNULL(1);
 
+_VTE_DEPRECATED
+_VTE_PUBLIC
+char **vte_get_encodings(gboolean include_aliases);
+
+_VTE_DEPRECATED
+_VTE_PUBLIC
+gboolean vte_get_encoding_supported(const char *encoding);
+
 G_END_DECLS
 
 #undef _VTE_DEPRECATED
diff --git a/src/vtegtk.cc b/src/vtegtk.cc
index 352e5c54..b51de4d3 100644
--- a/src/vtegtk.cc
+++ b/src/vtegtk.cc
@@ -62,6 +62,10 @@
 #include "vteaccess.h"
 #endif
 
+#ifdef WITH_ICU
+#include "icu-glue.hh"
+#endif
+
 #define I_(string) (g_intern_static_string(string))
 #define _VTE_PARAM_DEPRECATED (_vte_debug_on(VTE_DEBUG_SIGNALS) ? G_PARAM_DEPRECATED : 0)
 
@@ -1833,6 +1837,12 @@ vte_get_features (void)
                 "+GNUTLS"
 #else
                 "-GNUTLS"
+#endif
+                " "
+#ifdef WITH_ICU
+                "+ICU"
+#else
+                "-ICU"
 #endif
                 ;
 }
@@ -1929,6 +1939,61 @@ vte_set_test_flags(guint64 flags)
 #endif
 }
 
+/**
+ * vte_get_encodings:
+ * @include_aliases: whether to include alias names
+ *
+ * Gets the list of supported legacy encodings.
+ *
+ * If ICU support is not available, this returns an empty vector.
+ * Note that UTF-8 is always supported; you can select it by
+ * passing %NULL to vte_terminal_set_encoding().
+ *
+ * Returns: (transfer full): the list of supported encodings; free with
+ *   g_strfreev()
+ *
+ * Since: 0.60
+ * Deprecated: 0.60
+ */
+char **
+vte_get_encodings(gboolean include_aliases)
+{
+#ifdef WITH_ICU
+        return vte::base::get_icu_charsets(include_aliases != FALSE);
+#else
+        char *empty[] = { nullptr };
+        return g_strdupv(empty);
+#endif
+}
+
+/**
+ * vte_get_encoding_supported:
+ * @encoding: the name of the legacy encoding
+ *
+ * Queries whether the legacy encoding @encoding is supported.
+ *
+ * If ICU support is not available, this function always returns %FALSE.
+ *
+ * Note that UTF-8 is always supported; you can select it by
+ * passing %NULL to vte_terminal_set_encoding().
+ *
+ * Returns: %TRUE iff the legacy encoding @encoding is supported
+ *
+ * Since: 0.60
+ * Deprecated: 0.60
+ */
+gboolean
+vte_get_encoding_supported(const char *encoding)
+{
+        g_return_val_if_fail(encoding != nullptr, false);
+
+#ifdef WITH_ICU
+        return vte::base::get_icu_charset_supported(encoding);
+#else
+        return false;
+#endif
+}
+
 /* VteTerminal public API */
 
 /**
@@ -3863,7 +3928,7 @@ vte_terminal_get_encoding(VteTerminal *terminal)
 /**
  * vte_terminal_set_encoding:
  * @terminal: a #VteTerminal
- * @codeset: (allow-none): a valid #GIConv target, or %NULL to use UTF-8
+ * @codeset: (allow-none): target charset, or %NULL to use UTF-8
  * @error: (allow-none): return location for a #GError, or %NULL
  *
  * Changes the encoding the terminal will expect data from the child to
@@ -3890,13 +3955,9 @@ vte_terminal_set_encoding(VteTerminal *terminal,
         GObject *object = G_OBJECT(terminal);
         g_object_freeze_notify(object);
 
-        bool rv = IMPL(terminal)->set_encoding(codeset);
+        auto const rv = IMPL(terminal)->set_encoding(codeset, error);
         if (rv)
                 g_object_notify_by_pspec(object, pspecs[PROP_ENCODING]);
-        else
-                g_set_error(error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
-                            _("Unable to convert characters from %s to %s."),
-                            "UTF-8", codeset);
 
         g_object_thaw_notify(object);
         return rv;
diff --git a/src/vteinternal.hh b/src/vteinternal.hh
index f32add77..38e98e2a 100644
--- a/src/vteinternal.hh
+++ b/src/vteinternal.hh
@@ -63,6 +63,10 @@
 #include <variant>
 #include <vector>
 
+#ifdef WITH_ICU
+#include "icu-converter.hh"
+#endif
+
 typedef enum {
         VTE_REGEX_CURSOR_GDKCURSOR,
         VTE_REGEX_CURSOR_GDKCURSORTYPE,
@@ -372,8 +376,19 @@ public:
         std::queue<vte::base::Chunk::unique_type, std::list<vte::base::Chunk::unique_type>> m_incoming_queue;
 
         vte::base::UTF8Decoder m_utf8_decoder;
-        bool m_using_utf8{true};
-        const char *m_encoding;            /* the pty's encoding */
+
+        enum class DataSyntax {
+                eECMA48_UTF8,
+                #ifdef WITH_ICU
+                eECMA48_PCTERM,
+                #endif
+                /* eECMA48_ECMA35, not supported */
+        };
+
+        DataSyntax m_data_syntax{DataSyntax::eECMA48_UTF8};
+
+        auto data_syntax() const noexcept { return m_data_syntax; }
+
         int m_utf8_ambiguous_width;
         gunichar m_last_graphic_character; /* for REP */
         /* Array of dirty rectangles in view coordinates; need to
@@ -392,15 +407,21 @@ public:
        /* Output data queue. */
         VteByteArray *m_outgoing; /* pending input characters */
 
-#ifdef WITH_ICONV
+#ifdef WITH_ICU
         /* Legacy charset support */
-        GIConv m_incoming_conv{GIConv(-1)};
-        VteByteArray* m_incoming_leftover;
-        GIConv m_outgoing_conv{GIConv(-1)};
-        VteByteArray *m_conv_buffer;
+        std::unique_ptr<vte::base::ICUConverter> m_converter;
+#endif /* WITH_ICU */
 
-        void convert_incoming() noexcept;
-#endif
+        char const* encoding() const noexcept
+        {
+                switch (m_data_syntax) {
+                case DataSyntax::eECMA48_UTF8:   return "UTF-8";
+                #ifdef WITH_ICU
+                case DataSyntax::eECMA48_PCTERM: return m_converter->charset().c_str();
+                #endif
+                default: g_assert_not_reached(); return nullptr;
+                }
+        }
 
        /* Screen data.  We support the normal screen, and an alternate
         * screen, which seems to be a DEC-specific feature. */
@@ -788,6 +809,10 @@ public:
         bool invalidate_dirty_rects_and_process_updates();
         void time_process_incoming();
         void process_incoming();
+        void process_incoming_utf8();
+        #ifdef WITH_ICU
+        void process_incoming_pcterm();
+        #endif
         bool process(bool emit_adj_changed);
         inline bool is_processing() const { return m_active_terminals_link != nullptr; }
         void start_processing();
@@ -964,6 +989,7 @@ public:
         void reset(bool clear_tabstops,
                    bool clear_history,
                    bool from_api = false);
+        void reset_decoder();
 
         void feed(char const* data,
                   gssize length,
@@ -1253,7 +1279,8 @@ public:
         bool set_delete_binding(VteEraseBinding binding);
         bool set_enable_bidi(bool setting);
         bool set_enable_shaping(bool setting);
-        bool set_encoding(char const* codeset);
+        bool set_encoding(char const* codeset,
+                          GError** error);
         bool set_font_desc(PangoFontDescription const* desc);
         bool set_font_scale(double scale);
         bool set_input_enabled(bool enabled);
diff --git a/src/widget.hh b/src/widget.hh
index e4b71869..5e3b3ac4 100644
--- a/src/widget.hh
+++ b/src/widget.hh
@@ -104,10 +104,7 @@ public:
         int hscroll_policy() const noexcept { return m_terminal->m_hscroll_policy; }
         int vscroll_policy() const noexcept { return m_terminal->m_vscroll_policy; }
 
-        char const* encoding() const noexcept
-        {
-                return m_terminal->m_encoding ? m_terminal->m_encoding : "UTF-8";
-        }
+        char const* encoding() const noexcept { return m_terminal->encoding(); }
 
         void emit_child_exited(int status) noexcept;
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]