[evolution/gnome-3-36] I#884 - Composer: Text split for wrapping can produce invalid UTF-8 text
- From: Milan Crha <mcrha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evolution/gnome-3-36] I#884 - Composer: Text split for wrapping can produce invalid UTF-8 text
- Date: Thu, 30 Apr 2020 15:33:20 +0000 (UTC)
commit 147065763109aebb5c0067e6756310401edd20ce
Author: Milan Crha <mcrha redhat com>
Date: Thu Apr 30 17:36:13 2020 +0200
I#884 - Composer: Text split for wrapping can produce invalid UTF-8 text
Closes https://gitlab.gnome.org/GNOME/evolution/-/issues/884
src/e-util/test-html-editor-units-bugs.c | 40 +++++
.../web-extension/e-editor-dom-functions.c | 175 ++++++++++++++++-----
.../web-extension/e-editor-web-extension.c | 9 ++
3 files changed, 184 insertions(+), 40 deletions(-)
---
diff --git a/src/e-util/test-html-editor-units-bugs.c b/src/e-util/test-html-editor-units-bugs.c
index ae6901e20b..0b34653ab2 100644
--- a/src/e-util/test-html-editor-units-bugs.c
+++ b/src/e-util/test-html-editor-units-bugs.c
@@ -1480,6 +1480,45 @@ test_issue_107 (TestFixture *fixture)
}
}
+static void
+test_issue_884 (TestFixture *fixture)
+{
+ if (!test_utils_process_commands (fixture,
+ "mode:plain\n")) {
+ g_test_fail ();
+ return;
+ }
+
+ test_utils_insert_content (fixture,
+ "<div>Xxxxx'x \"Xxxx 🡒 Xxxxxxxx 🡒 Xxxx Xxxxxxxxxx 🡒 Xxxxxxxx xxx xxxxxxxxxx xxxxxxxx\"
xxxxxxx xxxxx xxxx? Xx xxx, xxxx xx xxxxxxx?</div>"
+ "<div><br></div>"
+ "<div>123456789 123456789 123456789 123456789 123456789 123456789 123456789 123</div>"
+ "<div><br></div>"
+ "<div>🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈</div>",
+ E_CONTENT_EDITOR_INSERT_REPLACE_ALL | E_CONTENT_EDITOR_INSERT_TEXT_HTML);
+
+ if (!test_utils_run_simple_test (fixture,
+ "",
+ HTML_PREFIX
+ "<div style=\"width: 71ch;\">Xxxxx'x \"Xxxx 🡒 Xxxxxxxx 🡒 Xxxx Xxxxxxxxxx 🡒 Xxxxxxxx xxx
xxxxxxxxxx<br>xxxxxxxx\" xxxxxxx xxxxx xxxx? Xx xxx, xxxx xx xxxxxxx?</div>"
+ "<div style=\"width: 71ch;\"><br></div>"
+ "<div style=\"width: 71ch;\">123456789 123456789 123456789 123456789 123456789 123456789
123456789<br>123</div>"
+ "<div style=\"width: 71ch;\"><br></div>"
+ "<div style=\"width:
71ch;\">🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈<br>🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈<br>🐈🐈🐈🐈</div>"
+ HTML_SUFFIX,
+ "Xxxxx'x \"Xxxx 🡒 Xxxxxxxx 🡒 Xxxx Xxxxxxxxxx 🡒 Xxxxxxxx xxx xxxxxxxxxx\n"
+ "xxxxxxxx\" xxxxxxx xxxxx xxxx? Xx xxx, xxxx xx xxxxxxx?\n"
+ "\n"
+ "123456789 123456789 123456789 123456789 123456789 123456789 123456789\n"
+ "123\n"
+ "\n"
+ "🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈\n"
+ "🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈🐈\n"
+ "🐈🐈🐈🐈")) {
+ g_test_fail ();
+ }
+}
+
void
test_add_html_editor_bug_tests (void)
{
@@ -1513,4 +1552,5 @@ test_add_html_editor_bug_tests (void)
test_utils_add_test ("/issue/103", test_issue_103);
test_utils_add_test ("/issue/104", test_issue_104);
test_utils_add_test ("/issue/107", test_issue_107);
+ test_utils_add_test ("/issue/884", test_issue_884);
}
diff --git a/src/modules/webkit-editor/web-extension/e-editor-dom-functions.c
b/src/modules/webkit-editor/web-extension/e-editor-dom-functions.c
index 35cab8cc10..48b4fe9be0 100644
--- a/src/modules/webkit-editor/web-extension/e-editor-dom-functions.c
+++ b/src/modules/webkit-editor/web-extension/e-editor-dom-functions.c
@@ -1474,6 +1474,59 @@ remove_thunderbird_signature (WebKitDOMDocument *document)
remove_node (WEBKIT_DOM_NODE (signature));
}
+static WebKitDOMText *
+safe_node_split_text (WebKitDOMDocument *document,
+ WebKitDOMText *node,
+ gulong offset)
+{
+ WebKitDOMNode *tmp_node;
+ WebKitDOMText *next_node;
+ gchar *original_text, *new_text;
+
+ original_text = webkit_dom_text_get_whole_text (node);
+
+ next_node = webkit_dom_text_split_text (node, offset, NULL);
+
+ if (!offset || !next_node) {
+ g_free (original_text);
+ return next_node;
+ }
+
+ do {
+ /* Need to insert an element between the text nodes, otherwise the get_whole_text()
+ returns concatenated text from surrounding text nodes as well. */
+ tmp_node = WEBKIT_DOM_NODE (webkit_dom_document_create_element (document, "SPAN", NULL));
+
+ webkit_dom_node_insert_before (
+ webkit_dom_node_get_parent_node (WEBKIT_DOM_NODE (node)),
+ tmp_node,
+ WEBKIT_DOM_NODE (next_node),
+ NULL);
+
+ new_text = webkit_dom_text_get_whole_text (node);
+
+ webkit_dom_node_remove_child (webkit_dom_node_get_parent_node (WEBKIT_DOM_NODE (node)),
tmp_node, NULL);
+
+ if (!new_text || !*new_text || g_utf8_validate (new_text, -1, NULL))
+ break;
+
+ webkit_dom_text_replace_whole_text (node, original_text, NULL);
+ offset--;
+
+ next_node = webkit_dom_text_split_text (node, offset, NULL);
+
+ if (!next_node)
+ break;
+
+ g_clear_pointer (&new_text, g_free);
+ } while (offset);
+
+ g_free (original_text);
+ g_free (new_text);
+
+ return next_node;
+}
+
void
e_editor_dom_check_magic_links (EEditorPage *editor_page,
gboolean include_space_by_user)
@@ -1624,13 +1677,12 @@ e_editor_dom_check_magic_links (EEditorPage *editor_page,
url_start = url_end - url_length;
- webkit_dom_text_split_text (
+ safe_node_split_text (document,
WEBKIT_DOM_TEXT (node),
- include_space ? url_end - 1 : url_end,
- NULL);
+ include_space ? url_end - 1 : url_end);
- webkit_dom_text_split_text (
- WEBKIT_DOM_TEXT (node), url_start, NULL);
+ safe_node_split_text (document,
+ WEBKIT_DOM_TEXT (node), url_start);
url_text_node = webkit_dom_node_get_next_sibling (node);
if (url_text_node)
url_text = webkit_dom_character_data_get_data (WEBKIT_DOM_CHARACTER_DATA
(url_text_node));
@@ -1695,10 +1747,9 @@ e_editor_dom_check_magic_links (EEditorPage *editor_page,
* split it into two nodes and select the right one. */
if (g_str_has_suffix (text_to_append, UNICODE_NBSP) ||
g_str_has_suffix (text_to_append, UNICODE_ZERO_WIDTH_SPACE)) {
- webkit_dom_text_split_text (
+ safe_node_split_text (document,
WEBKIT_DOM_TEXT (node),
- g_utf8_strlen (text_to_append, -1) - 1,
- NULL);
+ g_utf8_strlen (text_to_append, -1) - 1);
g_free (text_to_append);
text_to_append = webkit_dom_node_get_text_content (node);
}
@@ -13432,9 +13483,10 @@ e_editor_dom_selection_unindent (EEditorPage *editor_page)
}
static void
-dom_insert_selection_point (WebKitDOMNode *container,
- glong offset,
- WebKitDOMElement *selection_point)
+dom_insert_selection_point (WebKitDOMDocument *document,
+ WebKitDOMNode *container,
+ glong offset,
+ WebKitDOMElement *selection_point)
{
WebKitDOMNode *parent;
@@ -13446,8 +13498,7 @@ dom_insert_selection_point (WebKitDOMNode *container,
if (offset != 0) {
WebKitDOMText *split_text;
- split_text = webkit_dom_text_split_text (
- WEBKIT_DOM_TEXT (container), offset, NULL);
+ split_text = safe_node_split_text (document, WEBKIT_DOM_TEXT (container), offset);
parent = webkit_dom_node_get_parent_node (WEBKIT_DOM_NODE (split_text));
webkit_dom_node_insert_before (
@@ -13568,7 +13619,7 @@ e_editor_dom_selection_save (EEditorPage *editor_page)
if (webkit_dom_node_is_same_node (anchor, container) && offset == anchor_offset)
webkit_dom_element_set_attribute (start_marker, "data-anchor", "", NULL);
- dom_insert_selection_point (container, offset, start_marker);
+ dom_insert_selection_point (document, container, offset, start_marker);
end_marker = dom_create_selection_marker (document, FALSE);
@@ -13587,7 +13638,7 @@ e_editor_dom_selection_save (EEditorPage *editor_page)
if (webkit_dom_node_is_same_node (anchor, container) && offset == anchor_offset)
webkit_dom_element_set_attribute (end_marker, "data-anchor", "", NULL);
- dom_insert_selection_point (container, offset, end_marker);
+ dom_insert_selection_point (document, container, offset, end_marker);
if (!collapsed) {
if (start_marker && end_marker) {
@@ -13791,6 +13842,19 @@ e_editor_dom_selection_restore (EEditorPage *editor_page)
g_clear_object (&range);
}
+static gboolean
+can_wrap_at_unichar (gunichar uc)
+{
+ GUnicodeBreakType break_type;
+
+ break_type = g_unichar_break_type (uc);
+
+ return break_type != G_UNICODE_BREAK_COMBINING_MARK &&
+ break_type != G_UNICODE_BREAK_SURROGATE && /* Emoji */
+ break_type != G_UNICODE_BREAK_INSEPARABLE &&
+ break_type != G_UNICODE_BREAK_NON_BREAKING_GLUE;
+}
+
static gint
find_where_to_break_line (WebKitDOMCharacterData *node,
gint max_length)
@@ -13798,31 +13862,56 @@ find_where_to_break_line (WebKitDOMCharacterData *node,
gboolean last_break_position_is_dash = FALSE;
gchar *str, *text_start;
gunichar uc;
- gint pos = 1, last_break_position = 0, ret_val = 0;
+ gunichar2 *utf16_txt = NULL;
+ gint pos = 1, last_break_position = 0, ret_val = 0, ii;
text_start = webkit_dom_character_data_get_data (node);
+ if (!text_start)
+ return 0;
+
+ for (ii = 0; text_start[ii]; ii++) {
+ if (text_start[ii] < 0)
+ break;
+ }
+
+ /* The text contains non-ASCII letters; convert it to UTF-16, which is used by WebKit internally.
+ The UTF-16 representation is not the same as g_utf8_get_char() for Emoji and possibly others. */
+ if (text_start[ii]) {
+ utf16_txt = g_utf8_to_utf16 (text_start, -1, NULL, NULL, NULL);
+ }
+
+ ii = 0;
str = text_start;
do {
- uc = g_utf8_get_char (str);
+ if (utf16_txt)
+ uc = utf16_txt[ii];
+ else
+ uc = g_utf8_get_char (str);
if (!uc) {
ret_val = pos <= max_length ? pos : last_break_position > 0 ? last_break_position - 1
: 0;
goto out;
}
- if ((g_unichar_isspace (uc) && !(g_unichar_break_type (uc) ==
G_UNICODE_BREAK_NON_BREAKING_GLUE)) ||
- *str == '-') {
- if ((last_break_position_is_dash = *str == '-')) {
+ if ((g_unichar_isspace (uc) && can_wrap_at_unichar (uc)) || uc == L'-') {
+ if ((last_break_position_is_dash = uc == L'-')) {
/* There was no space before the dash */
if (pos - 1 != last_break_position) {
- gchar *rest;
+ gunichar next_char = 0;
+
+ if (utf16_txt) {
+ next_char = utf16_txt[ii + 1];
+ } else {
+ gchar *rest;
+
+ rest = g_utf8_next_char (str);
- rest = g_utf8_next_char (str);
- if (rest && *rest) {
- gunichar next_char;
+ if (rest && *rest)
+ next_char = g_utf8_get_char (rest);
+ }
+ if (next_char) {
/* There is no space after the dash */
- next_char = g_utf8_get_char (rest);
if (g_unichar_isspace (next_char))
last_break_position_is_dash = FALSE;
else
@@ -13839,25 +13928,34 @@ find_where_to_break_line (WebKitDOMCharacterData *node,
/* Look one character after the limit to check if there
* is a space (skip dash) that we are allowed to break at, if so
* break it there. */
- if (*str) {
+ if (utf16_txt) {
+ uc = utf16_txt[ii + 1];
+ } else {
str = g_utf8_next_char (str);
uc = g_utf8_get_char (str);
-
- if (g_unichar_isspace (uc) &&
- !(g_unichar_break_type (uc) == G_UNICODE_BREAK_NON_BREAKING_GLUE))
- last_break_position = ++pos;
}
+
+ if (g_unichar_isspace (uc) && can_wrap_at_unichar (uc))
+ last_break_position = ++pos;
break;
}
pos++;
- str = g_utf8_next_char (str);
- } while (*str);
+ if (utf16_txt)
+ ii++;
+ else
+ str = g_utf8_next_char (str);
+ } while (utf16_txt ? utf16_txt[ii] : *str);
if (last_break_position != 0)
ret_val = last_break_position - 1;
out:
+ /* This 0xd83d code is before an Emoji */
+ if (utf16_txt && ret_val > 0 && utf16_txt[ret_val] == 0xd83d)
+ ret_val--;
+
g_free (text_start);
+ g_free (utf16_txt);
/* Always break after the dash character. */
if (last_break_position_is_dash)
@@ -14163,10 +14261,9 @@ wrap_lines (EEditorPage *editor_page,
next_sibling = node;
while (newline) {
- next_sibling = WEBKIT_DOM_NODE (webkit_dom_text_split_text (
+ next_sibling = WEBKIT_DOM_NODE (safe_node_split_text (document,
WEBKIT_DOM_TEXT (next_sibling),
- g_utf8_pointer_to_offset (text_content, newline),
- NULL));
+ g_utf8_pointer_to_offset (text_content, newline)));
if (!next_sibling)
break;
@@ -14451,10 +14548,9 @@ wrap_lines (EEditorPage *editor_page,
if (text_length != length) {
WebKitDOMNode *nd;
- webkit_dom_text_split_text (
+ safe_node_split_text (document,
WEBKIT_DOM_TEXT (prev_sibling),
- text_length - length,
- NULL);
+ text_length - length);
if ((nd = webkit_dom_node_get_next_sibling
(prev_sibling))) {
gchar *nd_content;
@@ -14519,8 +14615,7 @@ wrap_lines (EEditorPage *editor_page,
WebKitDOMNode *nd;
if (offset != length_left && offset != 0) {
- webkit_dom_text_split_text (
- WEBKIT_DOM_TEXT (node), offset, NULL);
+ safe_node_split_text (document, WEBKIT_DOM_TEXT (node), offset);
nd = webkit_dom_node_get_next_sibling (node);
} else
diff --git a/src/modules/webkit-editor/web-extension/e-editor-web-extension.c
b/src/modules/webkit-editor/web-extension/e-editor-web-extension.c
index 05a1d4ac8d..41dd17f72c 100644
--- a/src/modules/webkit-editor/web-extension/e-editor-web-extension.c
+++ b/src/modules/webkit-editor/web-extension/e-editor-web-extension.c
@@ -1842,6 +1842,15 @@ handle_method_call (GDBusConnection *connection,
if ((flags & E_CONTENT_EDITOR_GET_INLINE_IMAGES) && from_domain && *from_domain &&
inline_images)
e_editor_dom_restore_images (editor_page, inline_images);
+ /* Ensure the text is UTF-8 valid (like in case text splitting broke it).
+ It may break user's text, but it's better than losing it all.
+ This is only a workaround for such cases. */
+ if (value && !g_utf8_validate (value, -1, NULL)) {
+ gchar *tmp = e_util_utf8_make_valid (value);
+ g_free (value);
+ value = tmp;
+ }
+
/* If no inline images are requested we still have to return
* something even it won't be used at all. */
g_dbus_method_invocation_return_value (
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]