[evolution-data-server/gnome-3-32] I#105 - Invalid UTF-8 letters in a mail body get lost
- From: Milan Crha <mcrha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evolution-data-server/gnome-3-32] I#105 - Invalid UTF-8 letters in a mail body get lost
- Date: Tue, 23 Apr 2019 13:14:10 +0000 (UTC)
commit 974fda135e834dd2d35a7864fb236e16ea14ac2a
Author: Milan Crha <mcrha redhat com>
Date: Tue Apr 23 15:15:46 2019 +0200
I#105 - Invalid UTF-8 letters in a mail body get lost
Closes https://gitlab.gnome.org/GNOME/evolution-data-server/issues/105
src/camel/camel-mime-filter-tohtml.c | 59 ++++++++++++++++++------------
src/camel/camel-utf8.c | 65 +++++++++++++++++++++++++++++----
src/camel/camel-utf8.h | 2 +
src/libedataserver/e-data-server-util.c | 3 +-
4 files changed, 97 insertions(+), 32 deletions(-)
---
diff --git a/src/camel/camel-mime-filter-tohtml.c b/src/camel/camel-mime-filter-tohtml.c
index b9fdfd12a..cbdfbb6ad 100644
--- a/src/camel/camel-mime-filter-tohtml.c
+++ b/src/camel/camel-mime-filter-tohtml.c
@@ -164,16 +164,31 @@ exit:
static gchar *
writeln (CamelMimeFilter *mime_filter,
- const guchar *in,
- const guchar *inend,
+ const gchar *in_anycharset,
+ const gchar *inend_char,
gchar *outptr,
gchar **outend)
{
CamelMimeFilterToHTMLPrivate *priv;
- const guchar *inptr = in;
+ const guchar *inptr, *inend, *inbegin;
+ gchar *in_utf8 = NULL;
priv = CAMEL_MIME_FILTER_TOHTML_GET_PRIVATE (mime_filter);
+ if (!g_utf8_validate (in_anycharset, inend_char - in_anycharset, NULL)) {
+ in_utf8 = camel_utf8_make_valid_len (in_anycharset, inend_char - in_anycharset);
+
+ if (!in_utf8)
+ return outptr;
+
+ inptr = (const guchar *) in_utf8;
+ inend = inptr + strlen (in_utf8);
+ } else {
+ inptr = (const guchar *) in_anycharset;
+ inend = (const guchar *) inend_char;
+ }
+
+ inbegin = inptr;
while (inptr < inend) {
guint32 u;
@@ -216,7 +231,7 @@ writeln (CamelMimeFilter *mime_filter,
/* falls through */
case ' ':
if (priv->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES
- && ((inptr == (in + 1) || (inptr < inend && (*inptr == ' ' || *inptr == '\t')))))
{
+ && ((inptr == (inbegin + 1) || (inptr < inend && (*inptr == ' ' || *inptr ==
'\t'))))) {
outptr = g_stpcpy (outptr, " ");
priv->column++;
break;
@@ -228,7 +243,7 @@ writeln (CamelMimeFilter *mime_filter,
only if not converting the new-line breaks */
if (!(priv->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_NL))
*outptr++ = u;
- } else if (u >= 20 && u <0x80) {
+ } else if (u >= 0x20 && u < 0x80) {
*outptr++ = u;
} else {
if (priv->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT)
@@ -241,6 +256,8 @@ writeln (CamelMimeFilter *mime_filter,
}
}
+ g_free (in_utf8);
+
return outptr;
}
@@ -375,9 +392,8 @@ html_convert (CamelMimeFilter *mime_filter,
/* write out anything before the first regex match */
outptr = writeln (
mime_filter,
- (const guchar *) start,
- (const guchar *) start +
- match.um_so,
+ start,
+ start + match.um_so,
outptr, &outend);
start += match.um_so;
@@ -390,15 +406,13 @@ html_convert (CamelMimeFilter *mime_filter,
/* prefix shouldn't need escaping, but let's be safe */
outptr = writeln (
mime_filter,
- (const guchar *) match.prefix,
- (const guchar *) match.prefix +
- strlen (match.prefix),
+ match.prefix,
+ match.prefix + strlen (match.prefix),
outptr, &outend);
outptr = writeln (
mime_filter,
- (const guchar *) start,
- (const guchar *) start +
- matchlen,
+ start,
+ start + matchlen,
outptr, &outend);
outptr = append_string_verbatim (
mime_filter, "\">",
@@ -407,9 +421,8 @@ html_convert (CamelMimeFilter *mime_filter,
/* now write the matched string */
outptr = writeln (
mime_filter,
- (const guchar *) start,
- (const guchar *) start +
- matchlen,
+ start,
+ start + matchlen,
outptr, &outend);
priv->column += matchlen;
start += matchlen;
@@ -423,8 +436,8 @@ html_convert (CamelMimeFilter *mime_filter,
/* nothing matched so write out the remainder of this line buffer */
outptr = writeln (
mime_filter,
- (const guchar *) start,
- (const guchar *) start + len,
+ start,
+ start + len,
outptr, &outend);
break;
}
@@ -432,8 +445,8 @@ html_convert (CamelMimeFilter *mime_filter,
} else {
outptr = writeln (
mime_filter,
- (const guchar *) start,
- (const guchar *) inptr,
+ start,
+ inptr,
outptr, &outend);
}
@@ -459,8 +472,8 @@ html_convert (CamelMimeFilter *mime_filter,
if (start < inend)
outptr = writeln (
mime_filter,
- (const guchar *) start,
- (const guchar *) inend,
+ start,
+ inend,
outptr, &outend);
while (priv->blockquote_depth > 0) {
diff --git a/src/camel/camel-utf8.c b/src/camel/camel-utf8.c
index fd1ba91f2..1d17fe96e 100644
--- a/src/camel/camel-utf8.c
+++ b/src/camel/camel-utf8.c
@@ -403,16 +403,65 @@ camel_ucs2_utf8 (const gchar *ptr)
gchar *
camel_utf8_make_valid (const gchar *text)
{
- gchar *res = g_strdup (text), *p;
+ return camel_utf8_make_valid_len (text, -1);
+}
+
+/**
+ * camel_utf8_make_valid_len:
+ * @text: a text to make valid
+ * @text_len: length of the @text, or -1 if NUL-terminated
+ *
+ * Ensures the returned text will be valid UTF-8 string, with incorrect letters
+ * changed to question marks.
+ *
+ * Returns: (transfer full): Valid UTF-8 string, with replaced incorrect letters.
+ * Free it with g_free(), when no longer needed.
+ *
+ * Since: 3.32.2
+ **/
+gchar *
+camel_utf8_make_valid_len (const gchar *text,
+ gssize text_len)
+{
+ /* almost identical copy of glib's _g_utf8_make_valid() */
+ GString *string;
+ const gchar *remainder, *invalid;
+ gint remaining_bytes, valid_bytes;
+
+ if (text && text_len < 0)
+ text_len = strlen (text);
+
+ if (!text || text_len <= 0 || !*text)
+ return g_strdup (text);
+
+ string = NULL;
+ remainder = (gchar *) text,
+ remaining_bytes = text_len;
+
+ while (remaining_bytes != 0) {
+ if (g_utf8_validate (remainder, remaining_bytes, &invalid))
+ break;
+
+ valid_bytes = invalid - remainder;
+
+ if (!string)
+ string = g_string_sized_new (remaining_bytes);
- if (!res)
- return res;
+ g_string_append_len (string, remainder, valid_bytes);
+ /* append U+FFFD REPLACEMENT CHARACTER */
+ g_string_append (string, "\357\277\275");
- p = res;
- while (!g_utf8_validate (p, -1, (const gchar **) &p)) {
- /* make all invalid characters appear as question marks */
- *p = '?';
+ remaining_bytes -= valid_bytes + 1;
+ remainder = invalid + 1;
}
- return res;
+ if (!string)
+ return g_strndup (text, text_len);
+
+ if (remaining_bytes > 0)
+ g_string_append_len (string, remainder, remaining_bytes);
+
+ g_warn_if_fail (g_utf8_validate (string->str, -1, NULL));
+
+ return g_string_free (string, FALSE);
}
diff --git a/src/camel/camel-utf8.h b/src/camel/camel-utf8.h
index 48c5e86f5..e600f7292 100644
--- a/src/camel/camel-utf8.h
+++ b/src/camel/camel-utf8.h
@@ -42,6 +42,8 @@ gchar *camel_ucs2_utf8 (const gchar *ptr);
/* make valid utf8 string */
gchar *camel_utf8_make_valid (const gchar *text);
+gchar *camel_utf8_make_valid_len (const gchar *text,
+ gssize text_len);
G_END_DECLS
diff --git a/src/libedataserver/e-data-server-util.c b/src/libedataserver/e-data-server-util.c
index 4d85234c6..88aa696d4 100644
--- a/src/libedataserver/e-data-server-util.c
+++ b/src/libedataserver/e-data-server-util.c
@@ -650,7 +650,8 @@ e_util_utf8_data_make_valid (const gchar *data,
if (string == NULL)
return g_strndup ((gchar *) data, data_bytes);
- g_string_append (string, remainder);
+ if (remaining_bytes > 0)
+ g_string_append_len (string, remainder, remaining_bytes);
g_warn_if_fail (g_utf8_validate (string->str, -1, NULL));
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]