[geary/cherry-pick-647ef90c] Merge branch 'mjog/589-attachment-keyword-check' into 'mainline'
- From: Michael Gratton <mjog src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [geary/cherry-pick-647ef90c] Merge branch 'mjog/589-attachment-keyword-check' into 'mainline'
- Date: Tue, 8 Oct 2019 22:33:39 +0000 (UTC)
commit f0b3d6791f609d749aa7bc804b16b7c35c678e7d
Author: Michael Gratton <mike vee net>
Date: Tue Oct 8 22:32:52 2019 +0000
Merge branch 'mjog/589-attachment-keyword-check' into 'mainline'
Attachment keyword fixes
Closes #589
See merge request GNOME/geary!334
(cherry picked from commit 647ef90cfce11342f8174e6aa7578f1e4be2b7d8)
5732d23e ui/composer-web-view.js: Add element blacklist to htmlToText
a8ef91f3 ui/composer-web-view.js: Improve keyword detection
69e3cdf9 Fix ComposerPageState::containsAttachmentKeyword
17a23bc5 ComposerWidget: Send both en and localised attachment keywords
src/client/composer/composer-widget.vala | 19 ++++-
test/js/composer-page-state-test.vala | 137 +++++++++++++++++++++----------
ui/composer-web-view.js | 123 ++++++++++++++-------------
3 files changed, 174 insertions(+), 105 deletions(-)
---
diff --git a/src/client/composer/composer-widget.vala b/src/client/composer/composer-widget.vala
index 31a0dff8..879bacc7 100644
--- a/src/client/composer/composer-widget.vala
+++ b/src/client/composer/composer-widget.vala
@@ -161,11 +161,17 @@ public class ComposerWidget : Gtk.EventBox, Geary.BaseInterface {
private const string URI_LIST_MIME_TYPE = "text/uri-list";
private const string FILE_URI_PREFIX = "file://";
+ // Keep these in sync with the next const below.
+ private const string ATTACHMENT_KEYWORDS =
+
"attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures";
// Translators: This is list of keywords, separated by pipe ("|")
// characters, that suggest an attachment; since this is full-word
- // checking, include all variants of each word. No spaces are
- // allowed.
- private const string ATTACHMENT_KEYWORDS_LOCALIZED =
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
+ // checking, include all variants of each word. No spaces are
+ // allowed. The words will be converted to lower case based on
+ // locale and English versions included automatically.
+ private const string ATTACHMENT_KEYWORDS_LOCALISED =
+
_("attach|attaching|attaches|attachment|attachments|attached|enclose|enclosed|enclosing|encloses|enclosure|enclosures");
+
public Geary.Account account { get; private set; }
private Gee.Map<string, Geary.AccountInformation> accounts;
@@ -1334,7 +1340,12 @@ public class ComposerWidget : Gtk.EventBox, Geary.BaseInterface {
confirmation = _("Send message with an empty body?");
} else if (!has_attachment &&
yield this.editor.contains_attachment_keywords(
- ATTACHMENT_KEYWORDS_LOCALIZED, this.subject)) {
+ string.join(
+ "|",
+ ATTACHMENT_KEYWORDS,
+ ATTACHMENT_KEYWORDS_LOCALISED
+ ),
+ this.subject)) {
confirmation = _("Send message without an attachment?");
}
if (confirmation != null) {
diff --git a/test/js/composer-page-state-test.vala b/test/js/composer-page-state-test.vala
index 8c1fcf08..6a7781b2 100644
--- a/test/js/composer-page-state-test.vala
+++ b/test/js/composer-page-state-test.vala
@@ -13,17 +13,19 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
public ComposerPageStateTest() {
base("ComposerPageStateTest");
+ add_test("html_to_text", html_to_text);
+ add_test("html_to_text_with_quote", html_to_text_with_quote);
+ add_test("html_to_text_with_nested_quote", html_to_text_with_nested_quote);
+ add_test("html_to_text_with_blacklist", html_to_text_with_blacklist);
add_test("edit_context_font", edit_context_font);
add_test("edit_context_link", edit_context_link);
add_test("indent_line", indent_line);
- add_test("contains_attachment_keywords", contains_attachment_keywords);
add_test("clean_content", clean_content);
add_test("get_html", get_html);
add_test("get_text", get_text);
- add_test("get_text_with_quote", get_text_with_quote);
- add_test("get_text_with_nested_quote", get_text_with_nested_quote);
-
add_test("contains_keywords", contains_keywords);
+ // Depends contains_keywords and html_to_text_with_blacklist
+ add_test("contains_attachment_keywords", contains_attachment_keywords);
add_test("replace_non_breaking_space", replace_non_breaking_space);
try {
@@ -33,6 +35,84 @@ class ComposerPageStateTest : ClientWebViewTestCase<ComposerWebView> {
}
}
+ public void html_to_text() throws Error {
+ load_body_fixture("<p>para</p>");
+ try {
+ assert(
+ Util.JS.to_string(
+ run_javascript(
+ @"ComposerPageState.htmlToText(window.document.body);"
+ ).get_js_value()
+ ) == "para\n\n\n\n"
+ );
+ } catch (Util.JS.Error err) {
+ print("Util.JS.Error: %s\n", err.message);
+ assert_not_reached();
+ } catch (Error err) {
+ print("WKError: %s\n", err.message);
+ assert_not_reached();
+ }
+ }
+
+ public void html_to_text_with_quote() throws Error {
+ unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
+ load_body_fixture("<p>pre</p> <blockquote><p>quote</p></blockquote> <p>post</p>");
+ try {
+ assert(
+ Util.JS.to_string(
+ run_javascript(
+ "ComposerPageState.htmlToText(window.document.body);"
+ ).get_js_value()
+ ) == @"pre\n\n$(q_marker)quote\n$(q_marker)\npost\n\n\n\n"
+ );
+ } catch (Util.JS.Error err) {
+ print("Util.JS.Error: %s", err.message);
+ assert_not_reached();
+ } catch (Error err) {
+ print("WKError: %s", err.message);
+ assert_not_reached();
+ }
+ }
+
+ public void html_to_text_with_nested_quote() throws Error {
+ unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
+ load_body_fixture("<p>pre</p> <blockquote><p>quote1</p>
<blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
+ try {
+ assert(
+ Util.JS.to_string(
+ run_javascript(
+ "ComposerPageState.htmlToText(window.document.body)"
+ ).get_js_value()
+ ) ==
@"pre\n\n$(q_marker)quote1\n$(q_marker)\n$(q_marker)$(q_marker)quote2\n$(q_marker)$(q_marker)\npost\n\n\n\n"
+ );
+ } catch (Util.JS.Error err) {
+ print("Util.JS.Error: %s\n", err.message);
+ assert_not_reached();
+ } catch (Error err) {
+ print("WKError: %s\n", err.message);
+ assert_not_reached();
+ }
+ }
+
+ public void html_to_text_with_blacklist() throws Error {
+ load_body_fixture("<p>pre</p> <blockquote><p>quote1</p>
<blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
+ try {
+ assert(
+ Util.JS.to_string(
+ run_javascript(
+ "ComposerPageState.htmlToText(window.document.body, [\"blockquote\"])"
+ ).get_js_value()
+ ) == @"pre\n\npost\n\n\n\n"
+ );
+ } catch (Util.JS.Error err) {
+ print("Util.JS.Error: %s\n", err.message);
+ assert_not_reached();
+ } catch (Error err) {
+ print("WKError: %s\n", err.message);
+ assert_not_reached();
+ }
+ }
+
public void edit_context_link() throws Error {
string html = "<a id=\"test\" href=\"url\">para</a>";
load_body_fixture(html);
@@ -222,44 +302,6 @@ unknown://example6.com
}
}
- public void get_text_with_quote() throws Error {
- unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
- load_body_fixture("<p>pre</p> <blockquote><p>quote</p></blockquote> <p>post</p>");
- try {
- assert(
- Util.JS.to_string(
- run_javascript(@"window.geary.getText();")
- .get_js_value()
- ) == @"pre\n\n$(q_marker)quote\n$(q_marker)\npost\n\n\n\n"
- );
- } catch (Util.JS.Error err) {
- print("Util.JS.Error: %s", err.message);
- assert_not_reached();
- } catch (Error err) {
- print("WKError: %s", err.message);
- assert_not_reached();
- }
- }
-
- public void get_text_with_nested_quote() throws Error {
- unichar q_marker = Geary.RFC822.Utils.QUOTE_MARKER;
- load_body_fixture("<p>pre</p> <blockquote><p>quote1</p>
<blockquote><p>quote2</p></blockquote></blockquote> <p>post</p>");
- try {
- assert(
- Util.JS.to_string(
- run_javascript(@"window.geary.getText();")
- .get_js_value()
- ) ==
@"pre\n\n$(q_marker)quote1\n$(q_marker)\n$(q_marker)$(q_marker)quote2\n$(q_marker)$(q_marker)\npost\n\n\n\n"
- );
- } catch (Util.JS.Error err) {
- print("Util.JS.Error: %s\n", err.message);
- assert_not_reached();
- } catch (Error err) {
- print("WKError: %s\n", err.message);
- assert_not_reached();
- }
- }
-
public void contains_keywords() throws Error {
load_body_fixture();
string complete_keys = """new Set(["keyword1", "keyword2"])""";
@@ -317,6 +359,11 @@ unknown://example6.com
).get_js_value()
));
+ assert(Util.JS.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('keyword1.', $complete_keys, $suffix_keys);"
+ ).get_js_value()
+ ));
+
assert(Util.JS.to_bool(run_javascript(
@"ComposerPageState.containsKeywords('something.sf1', $complete_keys, $suffix_keys);"
).get_js_value()
@@ -326,6 +373,12 @@ unknown://example6.com
@"ComposerPageState.containsKeywords('something.something.sf2', $complete_keys,
$suffix_keys);"
).get_js_value()
));
+
+ assert(!Util.JS.to_bool(run_javascript(
+ @"ComposerPageState.containsKeywords('http://something/esle.sf2', $complete_keys,
$suffix_keys);"
+ ).get_js_value()
+ ));
+
} catch (Util.JS.Error err) {
print("Util.JS.Error: %s\n", err.message);
assert_not_reached();
diff --git a/ui/composer-web-view.js b/ui/composer-web-view.js
index a848bb1a..3abda414 100644
--- a/ui/composer-web-view.js
+++ b/ui/composer-web-view.js
@@ -12,7 +12,8 @@
let ComposerPageState = function() {
this.init.apply(this, arguments);
};
-ComposerPageState.KEYWORD_SPLIT_REGEX = /[\s]+/g;
+ComposerPageState.SPACE_CHAR_REGEX = /[\s]/i;
+ComposerPageState.WORD_CHAR_REGEX = /[\s\\'!"#$%&()*+,\-.\/:;<=>?@\[\]^_`{|}~\u2000-\u206F\u2E00-\u2E7F]/i;
ComposerPageState.QUOTE_MARKER = "\x7f"; // delete
ComposerPageState.PROTOCOL_REGEX =
/^(aim|apt|bitcoin|cvs|ed2k|ftp|file|finger|git|gtalk|http|https|irc|ircs|irc6|lastfm|ldap|ldaps|magnet|news|nntp|rsync|sftp|skype|smb|sms|svn|telnet|tftp|ssh|webcal|xmpp):/i;
// Taken from Geary.HTML.URL_REGEX, without the inline modifier (?x)
@@ -238,30 +239,12 @@ ComposerPageState.prototype = {
return true;
}
- // Check interesting body text
- let node = this.bodyPart.firstChild;
- let content = [];
- let breakingElements = new Set([
- "BR", "P", "DIV", "BLOCKQUOTE", "TABLE", "OL", "UL", "HR"
- ]);
- while (node != null) {
- if (node.nodeType == Node.TEXT_NODE) {
- content.push(node.textContent);
- } else if (content.nodeType == Node.ELEMENT_NODE) {
- let isBreaking = breakingElements.has(node.nodeName);
- if (isBreaking) {
- content.push("\n");
- }
-
- // Only include non-quoted text
- if (content.nodeName != "BLOCKQUOTE") {
- content.push(content.textContent);
- }
- }
- node = node.nextSibling;
- }
+ // Check the body text
+ let content = ComposerPageState.htmlToText(
+ this.bodyPart, ["blockquote"]
+ );
return ComposerPageState.containsKeywords(
- content.join(""), completeKeys, suffixKeys
+ content, completeKeys, suffixKeys
);
},
tabOut: function() {
@@ -403,33 +386,50 @@ ComposerPageState.prototype = {
/**
* Determines if any keywords are present in a string.
*/
-ComposerPageState.containsKeywords = function(line, completeKeys, suffixKeys) {
- let tokens = new Set(
- line.toLocaleLowerCase().split(ComposerPageState.KEYWORD_SPLIT_REGEX)
- );
-
- for (let key of completeKeys) {
- if (tokens.has(key)) {
- return true;
- }
- }
-
+ComposerPageState.containsKeywords = function(line, wordKeys, suffixKeys) {
let urlRegex = ComposerPageState.URL_REGEX;
- // XXX assuming all suffixes have length = 3 here.
- let extLen = 3;
- for (let token of tokens) {
- let extDelim = token.length - (extLen + 1);
- // We do care about "a.pdf", but not ".pdf"
- if (token.length >= extLen + 2 && token.charAt(extDelim) == ".") {
- let suffix = token.substring(extDelim + 1);
- if (suffixKeys.has(suffix)) {
- if (token.match(urlRegex) == null) {
- return true;
+ let lastToken = -1;
+ let lastSpace = -1;
+ for (var i = 0; i <= line.length; i++) {
+ let char = (i < line.length) ? line[i] : " ";
+
+ if (char.match(ComposerPageState.WORD_CHAR_REGEX)) {
+ if (lastToken + 1 < i) {
+ let wordToken = line.substring(lastToken + 1, i).toLocaleLowerCase();
+ let isWordMatch = wordKeys.has(wordToken);
+ let isSuffixMatch = suffixKeys.has(wordToken);
+ if (isWordMatch || isSuffixMatch) {
+ let spaceToken = line.substring(lastSpace + 1, i);
+ let isUrl = (spaceToken.match(ComposerPageState.URL_REGEX) != null);
+
+ // Matches a token if it is a word that isn't in a
+ // URL. I.e. this gets "some attachment." but not
+ // "http://attachment.com"
+ if (isWordMatch && !isUrl) {
+ return true;
+ }
+
+ // Matches a token if it is a suffix that isn't a
+ // URL and such that the space-delimited token
+ // ends with ".SUFFIX". I.e. this matches "see
+ // attachment.pdf." but not
+ // "http://example.com/attachment.pdf" or "see the
+ // pdf."
+ if (isSuffixMatch &&
+ !isUrl &&
+ spaceToken.length != (1 + wordToken.length) &&
+ spaceToken.endsWith("." + wordToken)) {
+ return true;
+ }
}
}
+ lastToken = i;
+
+ if (char.match(ComposerPageState.SPACE_CHAR_REGEX)) {
+ lastSpace = i;
+ }
}
}
-
return false;
};
@@ -460,11 +460,16 @@ ComposerPageState.cleanPart = function(part, removeIfEmpty) {
* `ComposerPageState.QUOTE_MARKER`, where the number of markers indicates
* the depth of nesting of the quote.
*/
-ComposerPageState.htmlToText = function(root) {
+ComposerPageState.htmlToText = function(root, blacklist = []) {
let parentStyle = window.getComputedStyle(root);
let text = "";
for (let node of (root.childNodes || [])) {
+ let nodeName = node.nodeName.toLowerCase();
+ if (blacklist.includes(nodeName)) {
+ continue;
+ }
+
let isBlock = (
node instanceof Element
&& window.getComputedStyle(node).display == "block"
@@ -476,7 +481,7 @@ ComposerPageState.htmlToText = function(root) {
text += "\n";
}
}
- switch (node.nodeName.toLowerCase()) {
+ switch (nodeName) {
case "#text":
let nodeText = node.nodeValue;
switch (parentStyle.whiteSpace) {
@@ -500,24 +505,24 @@ ComposerPageState.htmlToText = function(root) {
break;
case "a":
if (node.closest("body.plain")) {
- text += ComposerPageState.htmlToText(node);
+ text += ComposerPageState.htmlToText(node, blacklist);
} else if (node.textContent == node.href) {
text += "<" + node.href + ">";
} else {
- text += ComposerPageState.htmlToText(node);
+ text += ComposerPageState.htmlToText(node, blacklist);
text += " <" + node.href + ">";
}
break;
case "b":
case "strong":
if (node.closest("body.plain")) {
- text += ComposerPageState.htmlToText(node);
+ text += ComposerPageState.htmlToText(node, blacklist);
} else {
- text += "*" + ComposerPageState.htmlToText(node) + "*";
+ text += "*" + ComposerPageState.htmlToText(node, blacklist) + "*";
}
break;
case "blockquote":
- let bqText = ComposerPageState.htmlToText(node);
+ let bqText = ComposerPageState.htmlToText(node, blacklist);
// If there is a newline at the end of the quote, remove it
// After this switch we ensure that there is a newline after the quote
bqText = bqText.replace(/\n$/, "");
@@ -532,23 +537,23 @@ ComposerPageState.htmlToText = function(root) {
case "i":
case "em":
if (node.closest("body.plain")) {
- text += ComposerPageState.htmlToText(node);
+ text += ComposerPageState.htmlToText(node, blacklist);
} else {
- text += "/" + ComposerPageState.htmlToText(node) + "/";
+ text += "/" + ComposerPageState.htmlToText(node, blacklist) + "/";
}
break;
case "u":
if (node.closest("body.plain")) {
- text += ComposerPageState.htmlToText(node);
+ text += ComposerPageState.htmlToText(node, blacklist);
} else {
- text += "_" + ComposerPageState.htmlToText(node) + "_";
+ text += "_" + ComposerPageState.htmlToText(node, blacklist) + "_";
}
break;
case "#comment":
- case "style":
+ case "style":
break;
default:
- text += ComposerPageState.htmlToText(node);
+ text += ComposerPageState.htmlToText(node, blacklist);
break;
}
if (isBlock) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]