[epiphany/gnome-3-18] EphyUriTester: Handle '@@' whitelisting exception rules
- From: Emanuele Aina <emaaa src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [epiphany/gnome-3-18] EphyUriTester: Handle '@@' whitelisting exception rules
- Date: Wed, 11 Nov 2015 12:44:53 +0000 (UTC)
commit af586fd3240162698325d42f8d3e294c9f0c8e8c
Author: Emanuele Aina <emanuele aina collabora com>
Date: Wed Nov 11 09:47:03 2015 +0000
EphyUriTester: Handle '@@' whitelisting exception rules
The AdBlockPlus filter language has some special rules that start with
'@@' and are meant to override some overly matching blocking rules by
whitelisting those which should succeed.
https://bugzilla.gnome.org/show_bug.cgi?id=754954
embed/web-extension/uri-tester.c | 154 ++++++++++++++++++++++++++++----------
1 files changed, 114 insertions(+), 40 deletions(-)
---
diff --git a/embed/web-extension/uri-tester.c b/embed/web-extension/uri-tester.c
index c62bedb..8758719 100644
--- a/embed/web-extension/uri-tester.c
+++ b/embed/web-extension/uri-tester.c
@@ -48,6 +48,11 @@ struct _UriTesterPrivate
GHashTable *optslist;
GHashTable *urlcache;
+ GHashTable *whitelisted_pattern;
+ GHashTable *whitelisted_keys;
+ GHashTable *whitelisted_optslist;
+ GHashTable *whitelisted_urlcache;
+
GString *blockcss;
GString *blockcssprivate;
@@ -300,36 +305,48 @@ uri_tester_check_rule (UriTester *tester,
GRegex *regex,
const char *patt,
const char *req_uri,
- const char *page_uri)
+ const char *page_uri,
+ gboolean whitelist)
{
char *opts;
+ GHashTable *optslist = tester->priv->optslist;
+ if (whitelist)
+ optslist = tester->priv->whitelisted_optslist;
if (!g_regex_match_full (regex, req_uri, -1, 0, 0, NULL, NULL))
return FALSE;
- opts = g_hash_table_lookup (tester->priv->optslist, patt);
+ opts = g_hash_table_lookup (optslist, patt);
if (opts && g_regex_match (tester->priv->regex_third_party, opts, 0, NULL))
{
if (page_uri && g_regex_match_full (regex, page_uri, -1, 0, 0, NULL, NULL))
return FALSE;
}
- /* TODO: Domain opt check */
- LOG ("blocked by pattern regexp=%s -- %s", g_regex_get_pattern (regex), req_uri);
+ /* TODO: Domain and document opt check */
+ if (whitelist)
+ LOG ("whitelisted by pattern regexp=%s -- %s", g_regex_get_pattern (regex), req_uri);
+ else
+ LOG ("blocked by pattern regexp=%s -- %s", g_regex_get_pattern (regex), req_uri);
return TRUE;
}
static inline gboolean
uri_tester_is_matched_by_pattern (UriTester *tester,
const char *req_uri,
- const char *page_uri)
+ const char *page_uri,
+ gboolean whitelist)
{
GHashTableIter iter;
gpointer patt, regex;
- g_hash_table_iter_init (&iter, tester->priv->pattern);
+ GHashTable *pattern = tester->priv->pattern;
+ if (whitelist)
+ pattern = tester->priv->whitelisted_pattern;
+
+ g_hash_table_iter_init (&iter, pattern);
while (g_hash_table_iter_next (&iter, &patt, ®ex))
{
- if (uri_tester_check_rule(tester, regex, patt, req_uri, page_uri))
+ if (uri_tester_check_rule(tester, regex, patt, req_uri, page_uri, whitelist))
return TRUE;
}
return FALSE;
@@ -339,7 +356,8 @@ static inline gboolean
uri_tester_is_matched_by_key (UriTester *tester,
const char *opts,
const char *req_uri,
- const char *page_uri)
+ const char *page_uri,
+ gboolean whitelist)
{
UriTesterPrivate *priv = NULL;
char *uri;
@@ -349,9 +367,14 @@ uri_tester_is_matched_by_key (UriTester *tester,
GString *guri;
gboolean ret = FALSE;
char sig[SIGNATURE_SIZE + 1];
+ GHashTable *keys;
priv = tester->priv;
+ keys = priv->keys;
+ if (whitelist)
+ keys = priv->whitelisted_keys;
+
memset (&sig[0], 0, sizeof (sig));
/* Signatures are made on pattern, so we need to convert url to a pattern as well */
guri = uri_tester_fixup_regexp ("", (char*)req_uri);
@@ -362,12 +385,12 @@ uri_tester_is_matched_by_key (UriTester *tester,
{
GRegex *regex;
strncpy (sig, uri + pos, SIGNATURE_SIZE);
- regex = g_hash_table_lookup (priv->keys, sig);
+ regex = g_hash_table_lookup (keys, sig);
/* Dont check if regex is already blacklisted */
if (!regex || g_list_find (regex_bl, regex))
continue;
- ret = uri_tester_check_rule (tester, regex, sig, req_uri, page_uri);
+ ret = uri_tester_check_rule (tester, regex, sig, req_uri, page_uri, whitelist);
if (ret)
break;
regex_bl = g_list_prepend (regex_bl, regex);
@@ -381,32 +404,38 @@ static gboolean
uri_tester_is_matched (UriTester *tester,
const char *opts,
const char *req_uri,
- const char *page_uri)
+ const char *page_uri,
+ gboolean whitelist)
{
UriTesterPrivate *priv = NULL;
char *value;
+ GHashTable *urlcache;
priv = tester->priv;
+ urlcache = priv->urlcache;
+ if (whitelist)
+ urlcache = priv->whitelisted_urlcache;
+
/* Check cached URLs first. */
- if ((value = g_hash_table_lookup (priv->urlcache, req_uri)))
+ if ((value = g_hash_table_lookup (urlcache, req_uri)))
return (value[0] != '0') ? TRUE : FALSE;
/* Look for a match either by key or by pattern. */
- if (uri_tester_is_matched_by_key (tester, opts, req_uri, page_uri))
+ if (uri_tester_is_matched_by_key (tester, opts, req_uri, page_uri, whitelist))
{
- g_hash_table_insert (priv->urlcache, g_strdup (req_uri), g_strdup("1"));
+ g_hash_table_insert (urlcache, g_strdup (req_uri), g_strdup("1"));
return TRUE;
}
/* Matching by pattern is pretty expensive, so do it if needed only. */
- if (uri_tester_is_matched_by_pattern (tester, req_uri, page_uri))
+ if (uri_tester_is_matched_by_pattern (tester, req_uri, page_uri, whitelist))
{
- g_hash_table_insert (priv->urlcache, g_strdup (req_uri), g_strdup("1"));
+ g_hash_table_insert (urlcache, g_strdup (req_uri), g_strdup("1"));
return TRUE;
}
- g_hash_table_insert (priv->urlcache, g_strdup (req_uri), g_strdup("0"));
+ g_hash_table_insert (urlcache, g_strdup (req_uri), g_strdup("0"));
return FALSE;
}
@@ -468,8 +497,12 @@ uri_tester_fixup_regexp (const char *prefix, char *src)
static void
uri_tester_compile_regexp (UriTester *tester,
GString *gpatt,
- char *opts)
+ char *opts,
+ gboolean whitelist)
{
+ GHashTable *pattern;
+ GHashTable *keys;
+ GHashTable *optslist;
GRegex *regex;
GError *error = NULL;
char *patt;
@@ -491,6 +524,16 @@ uri_tester_compile_regexp (UriTester *tester,
return;
}
+ pattern = tester->priv->pattern;
+ keys = tester->priv->keys;
+ optslist = tester->priv->optslist;
+ if (whitelist)
+ {
+ pattern = tester->priv->whitelisted_pattern;
+ keys = tester->priv->whitelisted_keys;
+ optslist = tester->priv->whitelisted_optslist;
+ }
+
if (!g_regex_match (tester->priv->regex_pattern, patt, 0, NULL))
{
int signature_count = 0;
@@ -500,36 +543,36 @@ uri_tester_compile_regexp (UriTester *tester,
for (pos = len - SIGNATURE_SIZE; pos >= 0; pos--) {
sig = g_strndup (patt + pos, SIGNATURE_SIZE);
if (!strchr (sig, '*') &&
- !g_hash_table_lookup (tester->priv->keys, sig))
+ !g_hash_table_lookup (keys, sig))
{
LOG ("sig: %s %s", sig, patt);
- g_hash_table_insert (tester->priv->keys, g_strdup (sig), g_regex_ref (regex));
- g_hash_table_insert (tester->priv->optslist, g_strdup (sig), g_strdup (opts));
+ g_hash_table_insert (keys, g_strdup (sig), g_regex_ref (regex));
+ g_hash_table_insert (optslist, g_strdup (sig), g_strdup (opts));
signature_count++;
}
else
{
if (sig[0] == '*' &&
- !g_hash_table_lookup (tester->priv->pattern, patt))
+ !g_hash_table_lookup (pattern, patt))
{
LOG ("patt2: %s %s", sig, patt);
- g_hash_table_insert (tester->priv->pattern, g_strdup (patt), g_regex_ref (regex));
- g_hash_table_insert (tester->priv->optslist, g_strdup (patt), g_strdup (opts));
+ g_hash_table_insert (pattern, g_strdup (patt), g_regex_ref (regex));
+ g_hash_table_insert (optslist, g_strdup (patt), g_strdup (opts));
}
}
g_free (sig);
}
g_regex_unref (regex);
- if (signature_count > 1 && g_hash_table_lookup (tester->priv->pattern, patt))
- g_hash_table_remove (tester->priv->pattern, patt);
+ if (signature_count > 1 && g_hash_table_lookup (pattern, patt))
+ g_hash_table_steal (pattern, patt);
}
else
{
LOG ("patt: %s%s", patt, "");
/* Pattern is a regexp chars */
- g_hash_table_insert (tester->priv->pattern, g_strdup (patt), regex);
- g_hash_table_insert (tester->priv->optslist, g_strdup (patt), g_strdup (opts));
+ g_hash_table_insert (pattern, g_strdup (patt), regex);
+ g_hash_table_insert (optslist, g_strdup (patt), g_strdup (opts));
}
}
@@ -537,7 +580,8 @@ static void
uri_tester_add_url_pattern (UriTester *tester,
char *prefix,
char *type,
- char *line)
+ char *line,
+ gboolean whitelist)
{
char **data;
char *patt;
@@ -579,8 +623,12 @@ uri_tester_add_url_pattern (UriTester *tester,
format_patt = uri_tester_fixup_regexp (prefix, patt);
- LOG ("got: %s opts %s", format_patt->str, opts);
- uri_tester_compile_regexp (tester, format_patt, opts);
+ if (whitelist)
+ LOG ("whitelist: %s opts %s", format_patt->str, opts);
+ else
+ LOG ("blacklist: %s opts %s", format_patt->str, opts);
+
+ uri_tester_compile_regexp (tester, format_patt, opts, whitelist);
if (data[1] && data[2])
g_free (patt);
@@ -647,7 +695,7 @@ uri_tester_frame_add_private (UriTester *tester,
}
static void
-uri_tester_parse_line (UriTester *tester, char *line)
+uri_tester_parse_line (UriTester *tester, char *line, gboolean whitelist)
{
if (!line)
return;
@@ -656,12 +704,17 @@ uri_tester_parse_line (UriTester *tester, char *line)
/* Ignore comments and new lines */
if (line[0] == '!')
return;
- /* FIXME: No support for whitelisting */
- if (line[0] == '@' && line[1] == '@')
- return;
/* FIXME: No support for [include] and [exclude] tags */
if (line[0] == '[')
return;
+
+ /* Whitelisted exception rules */
+ if (g_str_has_prefix (line, "@@"))
+ {
+ uri_tester_parse_line (tester, line+2, TRUE);
+ return;
+ }
+
/* FIXME: No support for domain= */
if (strstr (line, "domain="))
return;
@@ -701,16 +754,16 @@ uri_tester_parse_line (UriTester *tester, char *line)
/* set a regex prefix to ensure that '||' patterns are anchored at the
* start and that any characters (if any) preceding the domain specified
* by the rule is separated from it by a dot '.' */
- uri_tester_add_url_pattern (tester, "^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?", "fulluri", line);
+ uri_tester_add_url_pattern (tester, "^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?", "fulluri", line,
whitelist);
return;
}
if (line[0] == '|')
{
(void)*line++;
- uri_tester_add_url_pattern (tester, "^", "fulluri", line);
+ uri_tester_add_url_pattern (tester, "^", "fulluri", line, whitelist);
return;
}
- uri_tester_add_url_pattern (tester, "", "uri", line);
+ uri_tester_add_url_pattern (tester, "", "uri", line, whitelist);
}
static void
@@ -729,7 +782,7 @@ file_parse_cb (GDataInputStream *stream, GAsyncResult *result, UriTester *tester
return;
}
- uri_tester_parse_line (tester, line);
+ uri_tester_parse_line (tester, line, FALSE);
g_free (line);
g_data_input_stream_read_line_async (stream, G_PRIORITY_DEFAULT_IDLE, NULL,
@@ -798,6 +851,19 @@ uri_tester_init (UriTester *tester)
(GDestroyNotify)g_free,
(GDestroyNotify)g_free);
+ priv->whitelisted_pattern = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_regex_unref);
+ priv->whitelisted_keys = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_regex_unref);
+ priv->whitelisted_optslist = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_free);
+ priv->whitelisted_urlcache = g_hash_table_new_full (g_str_hash, g_str_equal,
+ (GDestroyNotify)g_free,
+ (GDestroyNotify)g_free);
+
priv->blockcss = g_string_new ("z-non-exist");
priv->blockcssprivate = g_string_new ("");
@@ -868,6 +934,11 @@ uri_tester_finalize (GObject *object)
g_hash_table_destroy (priv->optslist);
g_hash_table_destroy (priv->urlcache);
+ g_hash_table_destroy (priv->whitelisted_pattern);
+ g_hash_table_destroy (priv->whitelisted_keys);
+ g_hash_table_destroy (priv->whitelisted_optslist);
+ g_hash_table_destroy (priv->whitelisted_urlcache);
+
g_string_free (priv->blockcss, TRUE);
g_string_free (priv->blockcssprivate, TRUE);
@@ -925,7 +996,10 @@ uri_tester_test_uri (UriTester *tester,
if (type == AD_URI_CHECK_TYPE_DOCUMENT)
return FALSE;
- return uri_tester_is_matched (tester, NULL, req_uri, page_uri);
+ /* check whitelisting rules before the normal ones */
+ if (uri_tester_is_matched (tester, NULL, req_uri, page_uri, TRUE))
+ return FALSE;
+ return uri_tester_is_matched (tester, NULL, req_uri, page_uri, FALSE);
}
void
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]