[evolution-data-server] I#300 - Camel: Conversion between UTF-8 and UTF-7 misbehaves for emoji
- From: Milan Crha <mcrha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evolution-data-server] I#300 - Camel: Conversion between UTF-8 and UTF-7 misbehaves for emoji
- Date: Thu, 11 Feb 2021 18:25:27 +0000 (UTC)
commit 1a37ef016eefc71e1094964cb90a5201b57f01b5
Author: Milan Crha <mcrha redhat com>
Date: Thu Feb 11 19:23:34 2021 +0100
I#300 - Camel: Conversion between UTF-8 and UTF-7 misbehaves for emoji
Convert to/from UTF-16 and use the 16-bits, instead of Unicode chars,
which can be longer than 16-bits, like emoji's are.
Closes https://gitlab.gnome.org/GNOME/evolution-data-server/-/issues/300
src/camel/camel-utf8.c | 52 +++++++++++++++++++++++++++++++++------------
src/camel/tests/misc/utf7.c | 8 +++++++
2 files changed, 47 insertions(+), 13 deletions(-)
---
diff --git a/src/camel/camel-utf8.c b/src/camel/camel-utf8.c
index bd80f2f1b..2f62554e5 100644
--- a/src/camel/camel-utf8.c
+++ b/src/camel/camel-utf8.c
@@ -203,24 +203,33 @@ camel_utf7_utf8 (const gchar *ptr)
const guchar *p = (guchar *) ptr;
guint c;
guint32 v = 0, x;
- GString *out;
gint i = 0;
gint state = 0;
gchar *ret;
+ gunichar2 *utf16, *pos;
+ gsize block_size;
+
+ g_return_val_if_fail (ptr != NULL, NULL);
+
+ block_size = sizeof (gunichar2) * (1 + strlen (ptr));
+ utf16 = g_slice_alloc (block_size);
+ pos = utf16;
- out = g_string_new ("");
do {
c = *p++;
switch (state) {
case 0:
- if (c == '&')
+ if (c == '&') {
state = 1;
- else
- g_string_append_c (out, c);
+ } else {
+ *pos = c;
+ pos++;
+ }
break;
case 1:
if (c == '-') {
- g_string_append_c (out, '&');
+ *pos = '&';
+ pos++;
state = 0;
} else if (utf7_rank[c] != 0xff) {
v = utf7_rank[c];
@@ -228,7 +237,10 @@ camel_utf7_utf8 (const gchar *ptr)
state = 2;
} else {
/* invalid */
- g_string_append (out, "&-");
+ *pos = '&';
+ pos++;
+ *pos = '-';
+ pos++;
state = 0;
}
break;
@@ -240,19 +252,22 @@ camel_utf7_utf8 (const gchar *ptr)
i+=6;
if (i >= 16) {
x = (v >> (i - 16)) & 0xffff;
- g_string_append_unichar (out, x);
+ *pos = x;
+ pos++;
i-=16;
}
} else {
- g_string_append_unichar (out, c);
+ *pos = c;
+ pos++;
state = 0;
}
break;
}
} while (c);
- ret = g_strdup (out->str);
- g_string_free (out, TRUE);
+ ret = g_utf16_to_utf8 (utf16, -1, NULL, NULL, NULL);
+
+ g_slice_free1 (block_size, utf16);
return ret;
}
@@ -282,7 +297,8 @@ static void utf7_closeb64 (GString *out, guint32 v, guint32 i)
gchar *
camel_utf8_utf7 (const gchar *ptr)
{
- const guchar *p = (guchar *) ptr;
+ gunichar2 *utf16, *up;
+ const guchar *cp = (guchar *) ptr;
guint c;
guint32 x, v = 0;
gint state = 0;
@@ -290,9 +306,17 @@ camel_utf8_utf7 (const gchar *ptr)
gint i = 0;
gchar *ret;
+ g_return_val_if_fail (ptr != NULL, NULL);
+
+ utf16 = g_utf8_to_utf16 (ptr, -1, NULL, NULL, NULL);
+ up = utf16;
+
out = g_string_new ("");
- while ((c = camel_utf8_getc (&p))) {
+ while ((c = utf16 ? *up : camel_utf8_getc (&cp))) {
+ if (utf16)
+ up++;
+
if (c >= 0x20 && c <= 0x7e) {
if (state == 1) {
utf7_closeb64 (out, v, i);
@@ -307,6 +331,7 @@ camel_utf8_utf7 (const gchar *ptr)
if (state == 0) {
g_string_append_c (out, '&');
state = 1;
+ v = 0;
}
v = (v << 16) | c;
i += 16;
@@ -323,6 +348,7 @@ camel_utf8_utf7 (const gchar *ptr)
ret = g_strdup (out->str);
g_string_free (out, TRUE);
+ g_free (utf16);
return ret;
}
diff --git a/src/camel/tests/misc/utf7.c b/src/camel/tests/misc/utf7.c
index 567de2f7f..1daf59d35 100644
--- a/src/camel/tests/misc/utf7.c
+++ b/src/camel/tests/misc/utf7.c
@@ -51,6 +51,12 @@ static struct {
{ "\"The sayings of Confucius,\" James R. Ware, trans.
\xe5\x8f\xb0\xe5\x8c\x97:\xe6\x96\x87\xe8\x87\xb4\xe5\x87\xba\xe7\x89\x88\xe7\xa4\xbe, 1980. (Chinese text
with English translation)\xe5\x9b\x9b\xe6\x9b\xb8\xe4\xba\x94\xe7\xb6\x93,
\xe5\xae\x8b\xe5\x85\x83\xe4\xba\xba\xe6\xb3\xa8, \xe5\x8c\x97\xe4\xba\xac:
\xe4\xb8\xad\xe5\x9c\x8b\xe6\x9b\xb8\xe5\xba\x97, 1990.",
"\"The sayings of Confucius,\" James R. Ware, trans. &U,BTFw-:&ZYeB9FH6ckh5Pg-, 1980. (Chinese
text with English translation)&Vttm+E6UfZM-, &W4tRQ066bOg-, &UxdOrA-: &Ti1XC2b4Xpc-, 1990.",
{ 0x0022, 0x0054, 0x0068, 0x0065, 0x0020, 0x0073, 0x0061, 0x0079, 0x0069, 0x006e, 0x0067, 0x0073,
0x0020, 0x006f, 0x0066, 0x0020, 0x0043, 0x006f, 0x006e, 0x0066, 0x0075, 0x0063, 0x0069, 0x0075, 0x0073,
0x002c, 0x0022, 0x0020, 0x004a, 0x0061, 0x006d, 0x0065, 0x0073, 0x0020, 0x0052, 0x002e, 0x0020, 0x0057,
0x0061, 0x0072, 0x0065, 0x002c, 0x0020, 0x0074, 0x0072, 0x0061, 0x006e, 0x0073, 0x002e, 0x0020, 0x0020,
0x53f0, 0x5317, 0x003a, 0x6587, 0x81f4, 0x51fa, 0x7248, 0x793e, 0x002c, 0x0020, 0x0031, 0x0039, 0x0038,
0x0030, 0x002e, 0x0020, 0x0020, 0x0028, 0x0043, 0x0068, 0x0069, 0x006e, 0x0065, 0x0073, 0x0065, 0x0020,
0x0074, 0x0065, 0x0078, 0x0074, 0x0020, 0x0077, 0x0069, 0x0074, 0x0068, 0x0020, 0x0045, 0x006e, 0x0067,
0x006c, 0x0069, 0x0073, 0x0068, 0x0020, 0x0074, 0x0072, 0x0061, 0x006e, 0x0073, 0x006c, 0x0061, 0x0074,
0x0069, 0x006f, 0x006e, 0x0029, 0x56db, 0x66f8, 0x4e94, 0x7d93, 0x002c, 0x0020, 0x5b8b, 0x5143, 0x4eba,
0x6ce8, 0x002c, 0x0020, 0x5317, 0x4eac, 0x003a, 0x0020, 0x0020,
0x4e2d, 0x570b, 0x66f8, 0x5e97, 0x002c, 0x0020, 0x0031, 0x0039, 0x0039, 0x0030, 0x002e, } },
+ { "a\xf0\x9f\x98\x8b" "o", "a&2D3eCw-o",
+ { 0x0061, 0x1f60b, 0x006f, } },
+ { "R\xc3\xa4" "s\xc3\xb6" "r\xc3\xa5" "s", "R&AOQ-s&APY-r&AOU-s",
+ { 0x0052, 0x00e4, 0x0073, 0x00f6, 0x0072, 0x00e5, 0x0073, } },
+ { "\xf0\x9f\x93\xb0\xf0\x9f\x98\x8e\xef\xb8\x8f\xf0\x9f\x98\x8b\xef\xb8\x8f",
"&2D3c8Ng93g7+D9g93gv+Dw-",
+ { 0x1f4f0, 0x1f60e, 0xfe0f, 0x1f60b, 0xfe0f, } }
};
gint
@@ -114,5 +120,7 @@ main (gint argc,
camel_test_end ();
+ g_string_free (out, TRUE);
+
return 0;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]