[balsa/html-filter] improved Webkit HTML privacy filter
- From: Albrecht Dreß <albrecht src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [balsa/html-filter] improved Webkit HTML privacy filter
- Date: Sat, 23 Oct 2021 16:18:53 +0000 (UTC)
commit e34fa3a2a05614de69997890a16a8eb83ee2654c
Author: Albrecht Dreß <albrecht dress netcologne de>
Date: Sat Oct 23 18:19:33 2021 +0200
improved Webkit HTML privacy filter
Provide improved HTML privacy filtering using a Webkit extension. See
issue #62 for further details.
changed files:
* libbalsa/html-filter.c: Webkit filter extension (new file)
* libbalsa/html.c: use the filter extension if available
* configure.ac, meson.build, libbalsa/Makefile.am, libbalsa/meson.build:
configure web extension folder, build the extension
Signed-off-by: Albrecht Dreß <albrecht dress netcologne de>
configure.ac | 3 +
libbalsa/Makefile.am | 8 +++
libbalsa/html-filter.c | 135 +++++++++++++++++++++++++++++++++++++++++++
libbalsa/html.c | 154 +++++++++++++++++++++++++++++++++++++------------
libbalsa/meson.build | 7 +++
meson.build | 3 +
6 files changed, 272 insertions(+), 38 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index e59b34026..ba4cf446f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -263,6 +263,9 @@ case "$use_html_widget" in
# note: sqlite3 is needed to manage html vs. plain and image download preferences
PKG_CHECK_MODULES(HTML, [ webkit2gtk-4.0 >= 2.28.0
sqlite3 >= 3.24.0])
+ BALSA_WEB_EXTENSIONS="${libdir}/${PACKAGE}"
+ AC_SUBST(BALSA_WEB_EXTENSIONS)
+ CPPFLAGS="$CPPFLAGS -DBALSA_WEB_EXTENSIONS=\\\"$BALSA_WEB_EXTENSIONS\\\""
AC_PATH_PROGS(HTML2TEXT,
[python-html2text \
html2markdown \
diff --git a/libbalsa/Makefile.am b/libbalsa/Makefile.am
index b272d4269..28ae1d2e3 100644
--- a/libbalsa/Makefile.am
+++ b/libbalsa/Makefile.am
@@ -139,6 +139,12 @@ libbalsa_a_SOURCES = \
x509-cert-widget.h
+pkglib_LTLIBRARIES = libhtmlfilter.la
+libhtmlfilter_la_SOURCES = html-filter.c
+libhtmlfilter_la_CFLAGS = $(BALSA_CFLAGS)
+libhtmlfilter_la_LIBADD = $(BALSA_LIBS)
+libhtmlfilter_la_LDFLAGS = -module -avoid-version -no-undefined
+
EXTRA_DIST = \
meson.build \
padlock-keyhole.xpm
@@ -149,3 +155,5 @@ AM_CPPFLAGS = -I${top_builddir} -I${top_srcdir} -I${top_srcdir}/libbalsa \
$(BALSA_DEFS)
AM_CFLAGS = $(BALSA_CFLAGS)
+
+html.o: AM_CPPFLAGS += -DBALSA_WEB_EXT_DEVEL=\"${abs_builddir}/.libs\"
diff --git a/libbalsa/html-filter.c b/libbalsa/html-filter.c
new file mode 100644
index 000000000..258ece1a2
--- /dev/null
+++ b/libbalsa/html-filter.c
@@ -0,0 +1,135 @@
+/* -*-mode:c; c-style:k&r; c-basic-offset:4; -*- */
+/* Balsa E-Mail Client
+ *
+ * Copyright (C) 2021 Albrecht Dreß <albrecht dress arcor de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Filtering of external resources referenced in HTML messages
+ *
+ * The Webkit extension expects a user message "load_ext" from the main process indicating if external
resources shall be loaded.
+ * If not, all images are redirected to a non-existing image, whilst other resources (e.g. fonts etc.) are
simply ignored.
+ */
+
+#if defined(HAVE_CONFIG_H) && HAVE_CONFIG_H
+# include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifdef HAVE_HTML_WIDGET
+
+#if defined(GTK_DISABLE_DEPRECATED)
+#define GtkAction GAction
+#include <webkit2/webkit-web-extension.h>
+#undef GtkAction
+#else /* defined(GTK_DISABLE_DEPRECATED) */
+#include <webkit2/webkit-web-extension.h>
+#endif /* defined(GTK_DISABLE_DEPRECATED) */
+#include <string.h>
+
+#ifdef G_LOG_DOMAIN
+# undef G_LOG_DOMAIN
+#endif
+#define G_LOG_DOMAIN "html"
+
+
+#define LOAD_EXT_KEY "load-ext"
+
+
+G_MODULE_EXPORT void webkit_web_extension_initialize(WebKitWebExtension *extension);
+
+
+static gboolean
+lbhf_chk_send_request(WebKitWebPage *web_page,
+ WebKitURIRequest *request,
+ WebKitURIResponse G_GNUC_UNUSED *redirected_response,
+ gpointer G_GNUC_UNUSED user_data)
+{
+ const gchar *uri = webkit_uri_request_get_uri(request);
+ gboolean result; /* note: TRUE to skip this request, FALSE to process it */
+
+ result = (strcmp(uri, "about:blank") != 0) && (strncmp(uri, "cid:", 4UL) != 0);
+ if (result) {
+ gboolean *load_val;
+
+ load_val = g_object_get_data(G_OBJECT(web_page), LOAD_EXT_KEY);
+ if (load_val == NULL) {
+ g_warning("[HTML filter] %s: no policy for loading external resource %s", __func__,
uri);
+ } else if (*load_val) {
+ g_debug("[HTML filter] %s: accept %s", __func__, uri);
+ result = FALSE;
+ } else {
+ webkit_uri_request_set_uri(request, "about:blank");
+ g_debug("[HTML filter] %s: request for uri %s blocked", __func__, uri);
+ result = FALSE;
+ }
+ }
+ return result;
+}
+
+
+static gboolean
+lbhf_page_user_message_cb(WebKitWebPage *web_page,
+ WebKitUserMessage *message,
+ gpointer G_GNUC_UNUSED user_data)
+{
+ gboolean result;
+
+ if (strcmp(webkit_user_message_get_name(message), "load_ext") == 0) {
+ GVariant *data;
+ gboolean *load_val;
+
+ data = webkit_user_message_get_parameters(message);
+ load_val = g_new(gboolean, 1U);
+ *load_val = g_variant_get_boolean(data);
+ g_debug("[HTML filter] %s: page %p: load externals = %d", __func__, web_page, *load_val);
+ g_object_set_data_full(G_OBJECT(web_page), LOAD_EXT_KEY, load_val, g_free);
+ result = TRUE;
+ } else {
+ g_debug("[HTML filter] %s: unexpected message '%s'", __func__,
webkit_user_message_get_name(message));
+ result = FALSE;
+ }
+ return result;
+}
+
+
+static void
+lbhf_page_created_callback(WebKitWebExtension G_GNUC_UNUSED *extension,
+ WebKitWebPage *web_page,
+ gpointer G_GNUC_UNUSED user_data)
+{
+ g_debug("[HTML filter] %s: page %p created", __func__, web_page);
+ g_signal_connect(web_page, "user-message-received", G_CALLBACK(lbhf_page_user_message_cb), NULL);
+ g_signal_connect(web_page, "send-request", G_CALLBACK(lbhf_chk_send_request), NULL);
+}
+
+
+G_MODULE_EXPORT void
+webkit_web_extension_initialize(WebKitWebExtension *extension)
+{
+ static guint main_notified = 0U;
+
+ g_debug("[HTML filter] %s", __func__);
+ g_signal_connect(extension, "page-created", G_CALLBACK(lbhf_page_created_callback), NULL);
+ if (g_atomic_int_or(&main_notified, 1U) == 0U) {
+ WebKitUserMessage *message;
+
+ /* report the main Balsa process that the HTML filter extension has been found */
+ message = webkit_user_message_new("balsa-html-filter", NULL);
+ webkit_web_extension_send_message_to_context(extension, message, NULL, NULL, NULL);
+ }
+}
+
+#endif
diff --git a/libbalsa/html.c b/libbalsa/html.c
index 59c3e770c..42532561e 100644
--- a/libbalsa/html.c
+++ b/libbalsa/html.c
@@ -33,7 +33,9 @@
#include <stdio.h>
#include <string.h>
+#include <unistd.h>
#include <glib/gi18n.h>
+#include <glib/gstdio.h>
#ifdef HAVE_HTML_WIDGET
@@ -43,6 +45,15 @@
#define G_LOG_DOMAIN "html"
+/* Note:
+ * If the following variable is set, search the build folder for the Webkit
+ * extension first, and fall back to the install folder if the folder does
+ * not exist. Thus, this variable should be set for testing/development,
+ * and unset for release builds to avoid leaking the build folder name.
+ */
+#define WEB_EXT_CHECK_BUILDDIR 1
+
+
#define CID_REGEX "<[^>]*src\\s*=\\s*['\"]?\\s*cid:"
/* use a negative lookahead assertion to match "src=" *not* followed by "cid:" */
#define SRC_REGEX "<[^>]*src\\s*=\\s*(?!['\"]?\\s*cid:)"
@@ -59,6 +70,10 @@
#define LBH_NATURAL_SIZE (-1)
+/* indicates if Balsa's HTML filter webkit extension is available */
+static guint html_filter_found = 0U;
+
+
/*
* lbh_get_body_content
*
@@ -361,6 +376,20 @@ lbh_decide_policy_cb(WebKitWebView * web_view,
return TRUE;
}
+
+static void
+lbh_load_external_resources(WebKitWebView *web_view, gboolean load_resources)
+{
+ WebKitUserMessage *message;
+ GVariant *data;
+
+ data = g_variant_new_boolean(load_resources);
+ message = webkit_user_message_new("load_ext", data);
+ webkit_web_view_send_message_to_page(web_view, message, NULL, NULL, NULL);
+ g_usleep(1000);
+}
+
+
/*
* Show the GtkInfoBar for asking about downloading images
*
@@ -369,25 +398,29 @@ lbh_decide_policy_cb(WebKitWebView * web_view,
static void
lbh_info_bar_response_cb(GtkInfoBar * info_bar,
- gint response_id, gpointer data)
+ gint response_id, gpointer data)
{
- LibBalsaWebKitInfo *info = data;
-
- if (response_id == GTK_RESPONSE_OK) {
- gchar *text;
-
- if (lbh_get_body_content_utf8(info->body, &text) >= 0) {
- WebKitSettings *settings;
-
- settings = webkit_web_view_get_settings(info->web_view);
- webkit_settings_set_auto_load_images(settings, TRUE);
- webkit_web_view_load_html(info->web_view, text, NULL);
- g_free(text);
- }
- }
+ LibBalsaWebKitInfo *info = data;
+
+ if (response_id == GTK_RESPONSE_OK) {
+ gchar *text;
+
+ if (lbh_get_body_content_utf8(info->body, &text) >= 0) {
+ if (g_atomic_int_get(&html_filter_found) != 0) {
+ lbh_load_external_resources(info->web_view, TRUE);
+ } else {
+ WebKitSettings *settings;
+
+ settings = webkit_web_view_get_settings(info->web_view);
+ webkit_settings_set_auto_load_images(settings, TRUE);
+ }
+ webkit_web_view_load_html(info->web_view, text, NULL);
+ g_free(text);
+ }
+ }
- gtk_widget_destroy(info->info_bar);
- info->info_bar = NULL;
+ gtk_widget_destroy(info->info_bar);
+ info->info_bar = NULL;
}
static void
@@ -403,26 +436,13 @@ lbh_info_bar(LibBalsaWebKitInfo * info)
GtkInfoBar *info_bar;
GtkWidget *label;
GtkWidget *content_area;
-#ifdef GTK_INFO_BAR_WRAPPING_IS_BROKEN
static const gchar text[] =
- N_("This message part contains images "
- "from a remote server.\n"
- "To protect your privacy, "
- "Balsa has not downloaded them.\n"
- "You may choose to download them "
- "if you trust the server.");
-#else /* GTK_INFO_BAR_WRAPPING_IS_BROKEN */
- static const gchar text[] =
- N_("This message part contains images "
- "from a remote server. "
- "To protect your privacy, "
- "Balsa has not downloaded them. "
- "You may choose to download them "
- "if you trust the server.");
-#endif /* GTK_INFO_BAR_WRAPPING_IS_BROKEN */
+ N_("This message part references contents on one or more external servers. "
+ "To protect your privacy, Balsa has not downloaded them. You may choose "
+ "to download them if you trust the sender of the message.");
info_bar_widget =
- gtk_info_bar_new_with_buttons(_("_Download images"),
+ gtk_info_bar_new_with_buttons(_("_Download external contents"),
GTK_RESPONSE_OK,
_("_Close"), GTK_RESPONSE_CLOSE,
NULL);
@@ -454,14 +474,15 @@ lbh_resource_notify_response_cb(WebKitWebResource * resource,
GParamSpec * pspec,
gpointer data)
{
- LibBalsaWebKitInfo *info = data;
+ LibBalsaWebKitInfo *info = (LibBalsaWebKitInfo *) data;
const gchar *mime_type;
WebKitURIResponse *response;
response = webkit_web_resource_get_response(resource);
mime_type = webkit_uri_response_get_mime_type(response);
g_debug("%s mime-type %s", __func__, mime_type);
- if (g_ascii_strncasecmp(mime_type, "image/", 6) != 0)
+ if ((g_atomic_int_get(&html_filter_found) != 0) ||
+ (g_ascii_strncasecmp(mime_type, "image/", 6) != 0))
return;
if (info->info_bar) {
@@ -589,6 +610,25 @@ lbh_context_menu_cb(WebKitWebView * web_view,
return retval;
}
+static gboolean
+lbh_web_extension_cb(WebKitWebContext G_GNUC_UNUSED *context,
+ WebKitUserMessage *message,
+ gpointer G_GNUC_UNUSED user_data)
+{
+ gboolean result;
+
+ if (strcmp(webkit_user_message_get_name(message), "balsa-html-filter") == 0) {
+ g_atomic_int_or(&html_filter_found, 1U);
+ g_debug("%s: Balsa HTML filter WebKit extension found", __func__);
+ result = TRUE;
+ } else {
+ g_info("%s: unknown webkit extension message '%s'", __func__,
webkit_user_message_get_name(message));
+ result = FALSE;
+ }
+ return result;
+}
+
+
static WebKitWebContext *
lbh_get_web_view_context(void)
{
@@ -596,16 +636,52 @@ lbh_get_web_view_context(void)
if (g_once_init_enter(&context)) {
WebKitWebsiteDataManager *data_manager;
+ WebKitCookieManager *cookie_manager;
WebKitWebContext *tmp;
+ WebKitWebView *view;
gchar *cache_dir;
cache_dir = g_build_filename(g_get_home_dir(), ".balsa", CACHE_DIR, NULL);
data_manager = webkit_website_data_manager_new("base-cache-directory", cache_dir, NULL);
g_free(cache_dir);
+ webkit_website_data_manager_set_tls_errors_policy(data_manager,
WEBKIT_TLS_ERRORS_POLICY_FAIL);
+ cookie_manager = webkit_website_data_manager_get_cookie_manager(data_manager);
+ webkit_cookie_manager_set_accept_policy(cookie_manager, WEBKIT_COOKIE_POLICY_ACCEPT_NEVER);
tmp = webkit_web_context_new_with_website_data_manager(data_manager);
- webkit_web_context_set_cache_model(tmp, WEBKIT_CACHE_MODEL_DOCUMENT_BROWSER);
+#ifdef WEB_EXT_CHECK_BUILDDIR
+ g_debug("%s: WEB_EXT_CHECK_BUILDDIR is defined, check for '%s'", __func__,
BALSA_WEB_EXT_DEVEL "/libhtmlfilter.so");
+ if (g_access(BALSA_WEB_EXT_DEVEL "/libhtmlfilter.so", R_OK) == 0) {
+ g_debug("%s: set extensions folder '%s'", __func__, BALSA_WEB_EXT_DEVEL);
+ webkit_web_context_set_web_extensions_directory(tmp, BALSA_WEB_EXT_DEVEL);
+ } else {
+ g_debug("%s: set extensions folder '%s'", __func__, BALSA_WEB_EXTENSIONS);
+ webkit_web_context_set_web_extensions_directory(tmp, BALSA_WEB_EXTENSIONS);
+ }
+#else
+ g_debug("%s: set extensions folder '%s'", __func__, BALSA_WEB_EXTENSIONS);
+ webkit_web_context_set_web_extensions_directory(tmp, BALSA_WEB_EXTENSIONS);
+#endif
+ g_signal_connect(tmp, "user-message-received", G_CALLBACK(lbh_web_extension_cb), NULL);
+ webkit_web_context_set_cache_model(tmp, WEBKIT_CACHE_MODEL_WEB_BROWSER);
webkit_web_context_register_uri_scheme(tmp, "cid", lbh_cid_cb, NULL, NULL);
g_debug("%s: registered “cid:” scheme", __func__);
+
+ /* create a dummy view to trigger loading the html filter extension */
+ view = WEBKIT_WEB_VIEW(webkit_web_view_new_with_context(tmp));
+ webkit_web_view_load_uri(view, "about:blank");
+ while (webkit_web_view_is_loading(view)) {
+ gtk_main_iteration_do(FALSE);
+ g_usleep(100);
+ }
+ g_object_ref_sink(view);
+ g_object_unref(view);
+ if (g_atomic_int_get(&html_filter_found) != 0) {
+ g_debug("%s: Balsa HTML filter available", __func__);
+ } else {
+ libbalsa_information(LIBBALSA_INFORMATION_WARNING,
+ _("Balsa's external HTML resources filter web extension is not available in
the folder “%s”, "
+ "falling back to simplified image filtering. Please check your
installation. "), BALSA_WEB_EXTENSIONS);
+ }
g_once_init_leave(&context, tmp);
}
return context;
@@ -633,7 +709,9 @@ lbh_web_view_new(LibBalsaWebKitInfo *info,
webkit_settings_set_enable_javascript(settings, FALSE);
webkit_settings_set_enable_java(settings, FALSE);
webkit_settings_set_enable_hyperlink_auditing(settings, TRUE);
- webkit_settings_set_auto_load_images(settings, auto_load_images);
+ webkit_settings_set_auto_load_images(settings,
+ auto_load_images || (g_atomic_int_get(&html_filter_found) != 0));
+ lbh_load_external_resources(view, auto_load_images);
g_signal_connect(view, "web-process-terminated", G_CALLBACK(lbh_web_process_terminated_cb), info);
g_signal_connect(view, "decide-policy", G_CALLBACK(lbh_decide_policy_cb), info);
diff --git a/libbalsa/meson.build b/libbalsa/meson.build
index 8abc1824b..c4945695e 100644
--- a/libbalsa/meson.build
+++ b/libbalsa/meson.build
@@ -145,4 +145,11 @@ libbalsa_a = static_library('balsa', libbalsa_a_sources,
libimap_include],
install : false)
+libhtmlfilter_la = shared_library('htmlfilter',
+ 'html-filter.c',
+ dependencies : balsa_deps,
+ include_directories : [top_include],
+ install : true,
+ install_dir : balsa_web_extensions)
+
subdir('imap')
diff --git a/meson.build b/meson.build
index a84c775a7..601650cbd 100644
--- a/meson.build
+++ b/meson.build
@@ -168,6 +168,9 @@ libnetclient_deps = [glib_dep,
if html_widget == 'webkit2'
html_dep = dependency('webkit2gtk-4.0', version : '>= 2.28.0')
htmlpref_dep = dependency('sqlite3', version : '>= 3.24.0')
+ balsa_web_extensions = join_paths(get_option('prefix'), get_option('libdir'), 'balsa')
+ add_project_arguments('-DBALSA_WEB_EXTENSIONS="' + balsa_web_extensions + '"', language : 'c')
+ add_project_arguments('-DBALSA_WEB_EXT_DEVEL="' + join_paths(meson.current_build_dir(), 'libbalsa') + '"',
language : 'c')
html2text = find_program('python-html2text',
'html2markdown',
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]