[geary/wip/765516-gtk-widget-conversation-viewer: 128/207] Substantially rework email HTML sanitising and styling.



commit 7581f163209c217895eb4443c18e34250e7c4da3
Author: Michael James Gratton <mike vee net>
Date:   Wed Aug 10 01:16:37 2016 +1000

    Substantially rework email HTML sanitising and styling.
    
    * src/client/conversation-viewer/conversation-message.vala
      (ConversationMessage::clean_html_markup): Ensure all displayed message
      bodies have at least a HTML element, so that the style in
      conversation-web-view.css has something to work on. Load application
      style when sanitising the HTML rather than when the web view has
      loaded.
    
    * src/client/conversation-viewer/conversation-web-view.vala
      (ConversationWebView): Just set user style using WebKit.WebSettings,
      don't try to set application CSS, and remove all associated code.
    
    * ui/conversation-web-view.css: Only trigger CSS 2.1 § 10.6.7 on the HTML
      element, so BODY can be styled as normal by email CSS.

 .../conversation-viewer/conversation-message.vala  |   81 +++++++++++----
 .../conversation-viewer/conversation-web-view.vala |  105 +++++---------------
 ui/conversation-web-view.css                       |   37 +++----
 3 files changed, 100 insertions(+), 123 deletions(-)
---
diff --git a/src/client/conversation-viewer/conversation-message.vala 
b/src/client/conversation-viewer/conversation-message.vala
index 3a014f6..bab0d29 100644
--- a/src/client/conversation-viewer/conversation-message.vala
+++ b/src/client/conversation-viewer/conversation-message.vala
@@ -752,25 +752,64 @@ public class ConversationMessage : Gtk.Box {
     private string clean_html_markup(string text, Geary.RFC822.Message message, out bool remote_images) {
         remote_images = false;
         try {
-            string inner_text = text;
-            
-            // If email HTML has a BODY, use only that
-            GLib.Regex body_regex = new GLib.Regex("<body([^>]*)>(.*)</body>",
+            WebKit.DOM.HTMLElement html = (WebKit.DOM.HTMLElement)
+                this.web_view.get_dom_document().document_element;
+
+            // If the message has a HTML element, get its inner
+            // markup. We can't just set this on a temp container div
+            // (the old approach) using set_inner_html() will refuse
+            // to parse any HTML, HEAD and BODY elements that are out
+            // of place in the structure. We can't use
+            // set_outer_html() on the document element since it
+            // throws an error.
+            GLib.Regex html_regex = new GLib.Regex("<html([^>]*)>(.*)</html>",
                 GLib.RegexCompileFlags.DOTALL);
             GLib.MatchInfo matches;
-            if (body_regex.match(text, 0, out matches)) {
-                inner_text = matches.fetch(2);
+            if (html_regex.match(text, 0, out matches)) {
+                // Set the existing HTML element's content. Here, HEAD
+                // and BODY elements will be parsed fine.
+                html.set_inner_html(matches.fetch(2));
+                // Copy email HTML element attrs across to the
+                // existing HTML element
                 string attrs = matches.fetch(1);
-                if (attrs != "")
-                    inner_text = @"<div$attrs>$inner_text</div>";
+                if (attrs != "") {
+                    WebKit.DOM.HTMLElement container =
+                        this.web_view.create("div");
+                    container.set_inner_html(@"<div$attrs></div>");
+                    WebKit.DOM.HTMLElement? attr_element =
+                        Util.DOM.select(container, "div");
+                    WebKit.DOM.NamedNodeMap html_attrs =
+                        attr_element.get_attributes();
+                    for (int i = 0; i < html_attrs.get_length(); i++) {
+                        WebKit.DOM.Node attr = html_attrs.item(i);
+                        html.set_attribute(attr.node_name, attr.text_content);
+                    }
+                }
+            } else {
+                html.set_inner_html(text);
             }
-            
-            // Create a workspace for manipulating the HTML.
-            WebKit.DOM.HTMLElement container = web_view.create_div();
-            container.set_inner_html(inner_text);
-            
+
+            // Set dir="auto" if not already set possibly get a
+            // slightly better RTL experience.
+            string? dir = html.get_dir();
+            if (dir == null || dir.length == 0) {
+                html.set_dir("auto");
+            }
+
+            // Add application CSS to the document
+            WebKit.DOM.HTMLElement? head = Util.DOM.select(html, "head");
+            if (head == null) {
+                head = this.web_view.create("head");
+                html.insert_before(head, html.get_first_child());
+            }
+            WebKit.DOM.HTMLElement style_element = this.web_view.create("style");
+            string css_text = GearyApplication.instance.read_resource("conversation-web-view.css");
+            WebKit.DOM.Text text_node = this.web_view.get_dom_document().create_text_node(css_text);
+            style_element.append_child(text_node);
+            head.insert_before(style_element, head.get_first_child());
+
             // Get all the top level block quotes and stick them into a hide/show controller.
-            WebKit.DOM.NodeList blockquote_list = container.query_selector_all("blockquote");
+            WebKit.DOM.NodeList blockquote_list = html.query_selector_all("blockquote");
             for (int i = 0; i < blockquote_list.length; ++i) {
                 // Get the nodes we need.
                 WebKit.DOM.Node blockquote_node = blockquote_list.item(i);
@@ -796,11 +835,11 @@ public class ConversationMessage : Gtk.Box {
             }
 
             // Now look for the signature.
-            wrap_html_signature(ref container);
+            wrap_html_signature(ref html);
 
             // Then look for all <img> tags. Inline images are replaced with
             // data URLs.
-            WebKit.DOM.NodeList inline_list = container.query_selector_all("img");
+            WebKit.DOM.NodeList inline_list = html.query_selector_all("img");
             Gee.HashSet<string> inlined_content_ids = new Gee.HashSet<string>();
             for (ulong i = 0; i < inline_list.length; ++i) {
                 // Get the MIME content for the image.
@@ -857,7 +896,7 @@ public class ConversationMessage : Gtk.Box {
             foreach (string cid in inlined_content_ids) {
                 try {
                     string escaped_cid = Geary.HTML.escape_markup(cid);
-                    WebKit.DOM.Element? img = container.query_selector(@"[cid='$escaped_cid']");
+                    WebKit.DOM.Element? img = html.query_selector(@"[cid='$escaped_cid']");
                     if (img != null)
                         img.parent_element.remove_child(img);
                 } catch (Error error) {
@@ -866,15 +905,15 @@ public class ConversationMessage : Gtk.Box {
             }
             
             // Now return the whole message.
-            return container.get_inner_html();
+            return html.get_outer_html();
         } catch (Error e) {
             debug("Error modifying HTML message: %s", e.message);
             return text;
         }
     }
     
-    private WebKit.DOM.HTMLDivElement create_quote_container() throws Error {
-        WebKit.DOM.HTMLDivElement quote_container = web_view.create_div();
+    private WebKit.DOM.HTMLElement create_quote_container() throws Error {
+        WebKit.DOM.HTMLElement quote_container = web_view.create("div");
         quote_container.set_attribute(
             "class", "quote_container controllable hide"
         );
@@ -913,7 +952,7 @@ public class ConversationMessage : Gtk.Box {
         }
         WebKit.DOM.Node elem = div_list.item(i) as WebKit.DOM.Node;
         WebKit.DOM.Element parent = elem.get_parent_element();
-        WebKit.DOM.HTMLElement signature_container = web_view.create_div();
+        WebKit.DOM.HTMLElement signature_container = web_view.create("div");
         signature_container.set_attribute("class", "signature");
         do {
             // Get its sibling _before_ we move it into the signature div.
diff --git a/src/client/conversation-viewer/conversation-web-view.vala 
b/src/client/conversation-viewer/conversation-web-view.vala
index 5886547..88ab2b9 100644
--- a/src/client/conversation-viewer/conversation-web-view.vala
+++ b/src/client/conversation-viewer/conversation-web-view.vala
@@ -7,14 +7,12 @@
  */
 
 public class ConversationWebView : StylishWebView {
+
     private const string[] always_loaded_prefixes = {
         "https://secure.gravatar.com/avatar/";,
         "data:"
     };
-    
     private const string USER_CSS = "user-message.css";
-    private const string STYLE_NAME = "STYLE";
-    private const string PREVENT_HIDE_STYLE = "nohide";
 
     public string allow_prefix { get; private set; default = ""; }
 
@@ -27,24 +25,39 @@ public class ConversationWebView : StylishWebView {
 
     public bool is_height_valid = false;
 
-    private FileMonitor? user_style_monitor = null;
-
     public signal void link_selected(string link);
 
     public ConversationWebView() {
         // Set defaults.
         set_border_width(0);
         allow_prefix = random_string(10) + ":";
-        
+
+        File user_css = GearyApplication.instance.get_user_config_directory().get_child(USER_CSS);
+        // Print out a debug line here if the user CSS file exists, so
+        // we get warning about it when debugging visual issues.
+        user_css.query_info_async.begin(
+            FileAttribute.STANDARD_TYPE,
+            FileQueryInfoFlags.NONE,
+            Priority.DEFAULT_IDLE,
+            null,
+            (obj, res) => {
+                try {
+                    user_css.query_info_async.end(res);
+                    debug("User CSS file exists: %s", USER_CSS);
+                } catch (Error e) {
+                    // No problem, file does not exist
+                }
+            });
+
         WebKit.WebSettings config = settings;
         config.enable_scripts = false;
         config.enable_java_applet = false;
         config.enable_plugins = false;
         config.enable_developer_extras = Args.inspector;
+        config.user_stylesheet_uri = user_css.get_uri();
         settings = config;
-        
+
         // Hook up signals.
-        load_finished.connect(on_load_finished);
         resource_request_starting.connect(on_resource_request_starting);
         navigation_policy_decision_requested.connect(on_navigation_policy_decision_requested);
         new_window_policy_decision_requested.connect(on_navigation_policy_decision_requested);
@@ -81,9 +94,9 @@ public class ConversationWebView : StylishWebView {
 
         int preferred_height = 0;
         if (load_status == WebKit.LoadStatus.FINISHED) {
-            // XXX We need this 12px padding since WK doesn't seem to
-            // report the bottom margin?
-            preferred_height = (int) get_dom_document().get_body().scroll_height + 12;
+            WebKit.DOM.Element html =
+                get_dom_document().get_document_element();
+            preferred_height = (int) html.offset_height;
         }
 
         // XXX Currently, for some messages the WebView will report
@@ -131,25 +144,6 @@ public class ConversationWebView : StylishWebView {
                 request.set_uri("about:blank");
         }
     }
-    
-    private void on_load_finished(WebKit.WebFrame frame) {
-        // Load the style.
-        try {
-            WebKit.DOM.Document document = get_dom_document();
-            WebKit.DOM.Element style_element = document.create_element(STYLE_NAME);
-            
-            string css_text = GearyApplication.instance.read_resource("conversation-web-view.css");
-            WebKit.DOM.Text text_node = document.create_text_node(css_text);
-            style_element.append_child(text_node);
-            
-            WebKit.DOM.HTMLHeadElement head_element = document.get_head();
-            head_element.append_child(style_element);
-        } catch (Error error) {
-            debug("Error loading conversation-web-view.css: %s", error.message);
-        }
-
-        load_user_style();
-    }
 
     private bool on_scroll_event(Gdk.EventScroll event) {
         if ((event.state & Gdk.ModifierType.CONTROL_MASK) != 0) {
@@ -171,56 +165,7 @@ public class ConversationWebView : StylishWebView {
         }
         return false;
     }
-    
-    private void load_user_style() {
-        try {
-            WebKit.DOM.Document document = get_dom_document();
-            WebKit.DOM.Element style_element = document.create_element(STYLE_NAME);
-            style_element.set_attribute("id", "user_style");
-            WebKit.DOM.HTMLHeadElement head_element = document.get_head();
-            head_element.append_child(style_element);
-            
-            File user_style = GearyApplication.instance.get_user_config_directory().get_child(USER_CSS);
-            user_style_monitor = user_style.monitor_file(FileMonitorFlags.NONE, null);
-            user_style_monitor.changed.connect(on_user_style_changed);
-            
-            // And call it once to load the initial user style
-            on_user_style_changed(user_style, null, FileMonitorEvent.CREATED);
-        } catch (Error error) {
-            debug("Error setting up user style: %s", error.message);
-        }
-    }
-    
-    private void on_user_style_changed(File user_style, File? other_file, FileMonitorEvent event_type) {
-        // Changing a file produces 1 created signal, 3 changes done hints, and 0 changed
-        if (event_type != FileMonitorEvent.CHANGED && event_type != FileMonitorEvent.CREATED
-            && event_type != FileMonitorEvent.DELETED) {
-            return;
-        }
-        
-        debug("Loading new message viewer style from %s...", user_style.get_path());
-        
-        WebKit.DOM.Document document = get_dom_document();
-        WebKit.DOM.Element style_element = document.get_element_by_id("user_style");
-        ulong n = style_element.child_nodes.length;
-        try {
-            for (int i = 0; i < n; i++)
-                style_element.remove_child(style_element.first_child);
-        } catch (Error error) {
-            debug("Error removing old user style: %s", error.message);
-        }
-        
-        try {
-            DataInputStream data_input_stream = new DataInputStream(user_style.read());
-            size_t length;
-            string user_css = data_input_stream.read_upto("\0", 1, out length);
-            WebKit.DOM.Text text_node = document.create_text_node(user_css);
-            style_element.append_child(text_node);
-        } catch (Error error) {
-            // Expected if file was deleted.
-        }
-    }
-    
+
     private bool on_navigation_policy_decision_requested(WebKit.WebFrame frame,
         WebKit.NetworkRequest request, WebKit.WebNavigationAction navigation_action,
         WebKit.WebPolicyDecision policy_decision) {
diff --git a/ui/conversation-web-view.css b/ui/conversation-web-view.css
index ba2bc60..dd3e37d 100644
--- a/ui/conversation-web-view.css
+++ b/ui/conversation-web-view.css
@@ -7,37 +7,30 @@
  */
 
 html {
-  /* Disallow changes to HTML style using !important since this it is
-  needed for sizing the ConversationWebView. */
-  left: 0 !important;
-  width: 100% !important;
-  margin: 0 !important;
-  border: 0 !important;
-  padding: 0 !important;
-}
-
-html, body {
-  /* Trigger CSS 2.1 § 10.6.7 to get a shrink-wrapped height.
-     See also ConversationWebView.get_preferred_height */
+  /* Trigger CSS 2.1 § 10.6.7 to get a shrink-wrapped height. */
   position: absolute !important;
   top: 0 !important;
+  left: 0 !important;
   bottom: auto !important;
+  width: 100% !important;
   height: auto !important;
 
-  /* Disable viewport scrollbbars */
-  overflow: hidden !important;
+  /* Lock down the box enough so we don't get an incrementally
+     expanding web view */
+  box-sizing: border-box !important;
+  margin: 0 !important;
+  border: 0 !important;
+  padding: 0;
+
+  /* Never show scroll bars */
+  overflow: hidden;
 }
 
 body {
-  /* Allow email style to change the body however. */
-  left: 0;
-  right: 0;
-  margin: 12px;
+  margin: 0;
   border: 0;
-  padding: 0;
-  color: black;
-  background-color: white;
-  overflow-wrap: break-word;
+  padding: 12px;
+  overflow-wrap: break-word !important;
 }
 
 /* By default, tables reset the font properties to "normal" */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]