[vte] parser: Add some more charsets

From: Christian Persch <chpe src gnome org>
To: commits-list gnome org
Cc:
Subject: [vte] parser: Add some more charsets
Date: Thu, 12 Nov 2020 20:37:26 +0000 (UTC)
commit f044a720d4ba94806d65fade94dd01fbeea6bd5e
Author: Christian Persch <chpe src gnome org>
Date:   Thu Nov 12 21:37:16 2020 +0100

    parser: Add some more charsets

 src/parser-charset-tables.hh | 17 +++++++++++++++++
 src/parser-charset.hh        |  6 ++++++
 src/parser-test.cc           |  8 +++++++-
 src/parser.cc                |  6 ++++++
 4 files changed, 36 insertions(+), 1 deletion(-)
---
diff --git a/src/parser-charset-tables.hh b/src/parser-charset-tables.hh
index 1d079f7e..5e1ac218 100644
--- a/src/parser-charset-tables.hh
+++ b/src/parser-charset-tables.hh
@@ -167,6 +167,23 @@ static uint8_t const charset_graphic_94_n[] = {
         NA, NA, NA, NA, NA, NA, EMPTY,
 };
 
+/* Multibyte graphic character sets, with third intermediate byte 2/1:
+ * G0: ESC 2/4 2/8 2/1 F
+ * G1: ESC 2/4 2/9 2/1 F
+ * G2: ESC 2/4 2/10 2/1 F
+ * G3: ESC 2/4 2/11 2/1 F
+ * C0: -
+ * C1: -
+ *
+ * Note that these are not registed in ISO-IR.
+ *
+ * [Source: ecma35lib/ecma35/data/graphdata.py]
+ */
+static uint8_t const charset_graphic_94_n_with_2_1[] = {
+        /* 3/0..3/15 */
+        NA, VTE_CHARSET_EUCTW_G2, VTE_CHARSET_HKCS_EXT, VTE_CHARSET_MS_950_UTC_EXT
+};
+
 /* C0 control character sets:
  * G0: -
  * G1: -
diff --git a/src/parser-charset.hh b/src/parser-charset.hh
index 740beeee..75653d6c 100644
--- a/src/parser-charset.hh
+++ b/src/parser-charset.hh
@@ -295,6 +295,12 @@ ALIAS(SUPPLEMENTAL_NAME(LATIN_2), IR_NAME(101))
 ALIAS(SUPPLEMENTAL_NAME(LATIN_5), IR_NAME(148))
 ALIAS(SUPPLEMENTAL_NAME(LATIN_CYRILLIC), IR_NAME(144))
 
+/* Multi-byte charsets not registered in ISO IR */
+
+_VTE_CHARSET(EUCTW_G2) /* 4-byte */
+_VTE_CHARSET(HKCS_EXT)
+_VTE_CHARSET(MS_950_UTC_EXT)
+
 /* Other coding systems */
 
 DEC(HPPCL) /* DEC HPPCL emulation mode on DEC LJ250; ESC 2/5 3/8 */
diff --git a/src/parser-test.cc b/src/parser-test.cc
index 2863da0c..59c4ec4c 100644
--- a/src/parser-test.cc
+++ b/src/parser-test.cc
@@ -507,8 +507,14 @@ test_seq_esc_charset_94_n(void)
                 test_seq_esc_charset(i, 3, nullptr, 0, 0,
                                      VTE_CMD_GnDMm, VTE_CHARSET_DRCS, slot);
 
+                i[2] = 0x21;
+                test_seq_esc_charset(i, 3,
+                                     charset_graphic_94_n_with_2_1,
+                                     G_N_ELEMENTS(charset_graphic_94_n_with_2_1),
+                                     0x30, VTE_CMD_GnDMm, VTE_CHARSET_NONE, slot);
+
                 /* There could be one more intermediate byte. */
-                for (i[2] = 0x21; i[2] < 0x28; i[2]++) {
+                for (i[2] = 0x22; i[2] < 0x28; i[2]++) {
                         if (i[2] == 0x24) /* TODO */
                                 continue;
 
diff --git a/src/parser.cc b/src/parser.cc
index c81144b6..0b424d3b 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -237,6 +237,12 @@ vte_parse_charset_94_n(uint32_t raw,
 
         case VTE_SEQ_INTERMEDIATE_SPACE:
                 return VTE_CHARSET_DRCS;
+
+        case VTE_SEQ_INTERMEDIATE_BANG:
+                if (remaining_intermediates == 0 &&
+                    raw < (0x30 + G_N_ELEMENTS(charset_graphic_94_n_with_2_1)))
+                        return charset_graphic_94_n_with_2_1[raw - 0x30];
+                break;
         }
 
         return VTE_CHARSET_NONE;
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]