[evolution-data-server] I#300 - Camel: Conversion between UTF-8 and UTF-7 misbehaves for emoji



commit 1a37ef016eefc71e1094964cb90a5201b57f01b5
Author: Milan Crha <mcrha redhat com>
Date:   Thu Feb 11 19:23:34 2021 +0100

    I#300 - Camel: Conversion between UTF-8 and UTF-7 misbehaves for emoji
    
    Convert to/from UTF-16 and use the 16-bits, instead of Unicode chars,
    which can be longer than 16-bits, like emoji's are.
    
    Closes https://gitlab.gnome.org/GNOME/evolution-data-server/-/issues/300

 src/camel/camel-utf8.c      | 52 +++++++++++++++++++++++++++++++++------------
 src/camel/tests/misc/utf7.c |  8 +++++++
 2 files changed, 47 insertions(+), 13 deletions(-)
---
diff --git a/src/camel/camel-utf8.c b/src/camel/camel-utf8.c
index bd80f2f1b..2f62554e5 100644
--- a/src/camel/camel-utf8.c
+++ b/src/camel/camel-utf8.c
@@ -203,24 +203,33 @@ camel_utf7_utf8 (const gchar *ptr)
        const guchar *p = (guchar *) ptr;
        guint c;
        guint32 v = 0, x;
-       GString *out;
        gint i = 0;
        gint state = 0;
        gchar *ret;
+       gunichar2 *utf16, *pos;
+       gsize block_size;
+
+       g_return_val_if_fail (ptr != NULL, NULL);
+
+       block_size = sizeof (gunichar2) * (1 + strlen (ptr));
+       utf16 = g_slice_alloc (block_size);
+       pos = utf16;
 
-       out = g_string_new ("");
        do {
                c = *p++;
                switch (state) {
                case 0:
-                       if (c == '&')
+                       if (c == '&') {
                                state = 1;
-                       else
-                               g_string_append_c (out, c);
+                       } else {
+                               *pos = c;
+                               pos++;
+                       }
                        break;
                case 1:
                        if (c == '-') {
-                               g_string_append_c (out, '&');
+                               *pos = '&';
+                               pos++;
                                state = 0;
                        } else if (utf7_rank[c] != 0xff) {
                                v = utf7_rank[c];
@@ -228,7 +237,10 @@ camel_utf7_utf8 (const gchar *ptr)
                                state = 2;
                        } else {
                                /* invalid */
-                               g_string_append (out, "&-");
+                               *pos = '&';
+                               pos++;
+                               *pos = '-';
+                               pos++;
                                state = 0;
                        }
                        break;
@@ -240,19 +252,22 @@ camel_utf7_utf8 (const gchar *ptr)
                                i+=6;
                                if (i >= 16) {
                                        x = (v >> (i - 16)) & 0xffff;
-                                       g_string_append_unichar (out, x);
+                                       *pos = x;
+                                       pos++;
                                        i-=16;
                                }
                        } else {
-                               g_string_append_unichar (out, c);
+                               *pos = c;
+                               pos++;
                                state = 0;
                        }
                        break;
                }
        } while (c);
 
-       ret = g_strdup (out->str);
-       g_string_free (out, TRUE);
+       ret = g_utf16_to_utf8 (utf16, -1, NULL, NULL, NULL);
+
+       g_slice_free1 (block_size, utf16);
 
        return ret;
 }
@@ -282,7 +297,8 @@ static void utf7_closeb64 (GString *out, guint32 v, guint32 i)
 gchar *
 camel_utf8_utf7 (const gchar *ptr)
 {
-       const guchar *p = (guchar *) ptr;
+       gunichar2 *utf16, *up;
+       const guchar *cp = (guchar *) ptr;
        guint c;
        guint32 x, v = 0;
        gint state = 0;
@@ -290,9 +306,17 @@ camel_utf8_utf7 (const gchar *ptr)
        gint i = 0;
        gchar *ret;
 
+       g_return_val_if_fail (ptr != NULL, NULL);
+
+       utf16 = g_utf8_to_utf16 (ptr, -1, NULL, NULL, NULL);
+       up = utf16;
+
        out = g_string_new ("");
 
-       while ((c = camel_utf8_getc (&p))) {
+       while ((c = utf16 ? *up : camel_utf8_getc (&cp))) {
+               if (utf16)
+                       up++;
+
                if (c >= 0x20 && c <= 0x7e) {
                        if (state == 1) {
                                utf7_closeb64 (out, v, i);
@@ -307,6 +331,7 @@ camel_utf8_utf7 (const gchar *ptr)
                        if (state == 0) {
                                g_string_append_c (out, '&');
                                state = 1;
+                               v = 0;
                        }
                        v = (v << 16) | c;
                        i += 16;
@@ -323,6 +348,7 @@ camel_utf8_utf7 (const gchar *ptr)
 
        ret = g_strdup (out->str);
        g_string_free (out, TRUE);
+       g_free (utf16);
 
        return ret;
 }
diff --git a/src/camel/tests/misc/utf7.c b/src/camel/tests/misc/utf7.c
index 567de2f7f..1daf59d35 100644
--- a/src/camel/tests/misc/utf7.c
+++ b/src/camel/tests/misc/utf7.c
@@ -51,6 +51,12 @@ static struct {
        { "\"The sayings of Confucius,\" James R. Ware, trans.  
\xe5\x8f\xb0\xe5\x8c\x97:\xe6\x96\x87\xe8\x87\xb4\xe5\x87\xba\xe7\x89\x88\xe7\xa4\xbe, 1980.  (Chinese text 
with English translation)\xe5\x9b\x9b\xe6\x9b\xb8\xe4\xba\x94\xe7\xb6\x93, 
\xe5\xae\x8b\xe5\x85\x83\xe4\xba\xba\xe6\xb3\xa8, \xe5\x8c\x97\xe4\xba\xac:  
\xe4\xb8\xad\xe5\x9c\x8b\xe6\x9b\xb8\xe5\xba\x97, 1990.",
          "\"The sayings of Confucius,\" James R. Ware, trans.  &U,BTFw-:&ZYeB9FH6ckh5Pg-, 1980.  (Chinese 
text with English translation)&Vttm+E6UfZM-, &W4tRQ066bOg-, &UxdOrA-:  &Ti1XC2b4Xpc-, 1990.",
          {  0x0022, 0x0054, 0x0068, 0x0065, 0x0020, 0x0073, 0x0061, 0x0079, 0x0069, 0x006e, 0x0067, 0x0073, 
0x0020, 0x006f, 0x0066, 0x0020, 0x0043, 0x006f, 0x006e, 0x0066, 0x0075, 0x0063, 0x0069, 0x0075, 0x0073, 
0x002c, 0x0022, 0x0020, 0x004a, 0x0061, 0x006d, 0x0065, 0x0073, 0x0020, 0x0052, 0x002e, 0x0020, 0x0057, 
0x0061, 0x0072, 0x0065, 0x002c, 0x0020, 0x0074, 0x0072, 0x0061, 0x006e, 0x0073, 0x002e, 0x0020, 0x0020, 
0x53f0, 0x5317, 0x003a, 0x6587, 0x81f4, 0x51fa, 0x7248, 0x793e, 0x002c, 0x0020, 0x0031, 0x0039, 0x0038, 
0x0030, 0x002e, 0x0020, 0x0020, 0x0028, 0x0043, 0x0068, 0x0069, 0x006e, 0x0065, 0x0073, 0x0065, 0x0020, 
0x0074, 0x0065, 0x0078, 0x0074, 0x0020, 0x0077, 0x0069, 0x0074, 0x0068, 0x0020, 0x0045, 0x006e, 0x0067, 
0x006c, 0x0069, 0x0073, 0x0068, 0x0020, 0x0074, 0x0072, 0x0061, 0x006e, 0x0073, 0x006c, 0x0061, 0x0074, 
0x0069, 0x006f, 0x006e, 0x0029, 0x56db, 0x66f8, 0x4e94, 0x7d93, 0x002c, 0x0020, 0x5b8b, 0x5143, 0x4eba, 
0x6ce8, 0x002c, 0x0020, 0x5317, 0x4eac, 0x003a, 0x0020, 0x0020,
  0x4e2d, 0x570b, 0x66f8, 0x5e97, 0x002c, 0x0020, 0x0031, 0x0039, 0x0039, 0x0030, 0x002e, } },
+       { "a\xf0\x9f\x98\x8b" "o", "a&2D3eCw-o",
+         {  0x0061, 0x1f60b, 0x006f, } },
+       { "R\xc3\xa4" "s\xc3\xb6" "r\xc3\xa5" "s", "R&AOQ-s&APY-r&AOU-s",
+         {  0x0052, 0x00e4, 0x0073, 0x00f6, 0x0072, 0x00e5, 0x0073, } },
+       { "\xf0\x9f\x93\xb0\xf0\x9f\x98\x8e\xef\xb8\x8f\xf0\x9f\x98\x8b\xef\xb8\x8f", 
"&2D3c8Ng93g7+D9g93gv+Dw-",
+         {  0x1f4f0, 0x1f60e, 0xfe0f, 0x1f60b, 0xfe0f, } }
 };
 
 gint
@@ -114,5 +120,7 @@ main (gint argc,
 
        camel_test_end ();
 
+       g_string_free (out, TRUE);
+
        return 0;
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]