[evolution-data-server] Bug 793504 - Links containing Cyrillic characters are not highlighted properly
- From: Milan Crha <mcrha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evolution-data-server] Bug 793504 - Links containing Cyrillic characters are not highlighted properly
- Date: Tue, 20 Feb 2018 14:52:19 +0000 (UTC)
commit 101b9888f1dccec6aaf8f6a172b27996f9e68eb5
Author: Milan Crha <mcrha redhat com>
Date: Tue Feb 20 15:53:06 2018 +0100
Bug 793504 - Links containing Cyrillic characters are not highlighted properly
src/camel/camel-url-scanner.c | 201 +++++++++--------------------------------
1 files changed, 43 insertions(+), 158 deletions(-)
---
diff --git a/src/camel/camel-url-scanner.c b/src/camel/camel-url-scanner.c
index d86a9bc..e1bb62c 100644
--- a/src/camel/camel-url-scanner.c
+++ b/src/camel/camel-url-scanner.c
@@ -136,6 +136,12 @@ camel_url_scanner_scan (CamelUrlScanner *scanner,
return FALSE;
}
+/* stephenhay from https://mathiasbynens.be/demo/url-regex */
+#define URL_PROTOCOLS "news|telnet|nntp|file|https?|s?ftp|webcal|localhost|ssh"
+#define URL_PATTERN "((?:(?:(?:" URL_PROTOCOLS ")\\:\\/\\/)|(?:www\\.|ftp\\.))[^\\s\\/\\$\\.\\?#].[^\\s]*+)"
+
+#define FILE_PATTERN "((?:(?:(?:file)\\:\\/\\/)|(?:(?:file)\\:\\/\\/\\/))[^\\s\\/\\$\\.\\?#].[^\\s]*+)"
+
static guchar url_scanner_table[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -199,27 +205,48 @@ is_open_brace (gchar c)
return FALSE;
}
-static char
-url_stop_at_brace (const gchar *in,
- gsize so,
- gchar *open_brace)
+static gboolean
+camel_url_pattern_end (const gchar *in,
+ const gchar *pos,
+ const gchar *inend,
+ CamelUrlMatch *match,
+ const gchar *pattern,
+ gboolean remove_trailing_bad_chars)
{
- gint i;
+ GRegex *regex;
+ GMatchInfo *match_info = NULL;
+ gboolean success = FALSE;
+
+ regex = g_regex_new (pattern, 0, 0, NULL);
+ if (!regex)
+ return FALSE;
+
+ if (g_regex_match_all_full (regex, pos, inend - pos, 0, G_REGEX_MATCH_NOTEMPTY, &match_info, NULL) &&
match_info &&
+ g_match_info_matches (match_info)) {
+ gint start_pos, end_pos;
+
+ if (g_match_info_fetch_pos (match_info, 0, &start_pos, &end_pos) && start_pos == 0 && end_pos
0 && end_pos <= inend - pos) {
+ const gchar *inptr = pos + end_pos;
- if (open_brace != NULL)
- *open_brace = '\0';
+ success = TRUE;
- if (so > 0) {
- for (i = 0; i < G_N_ELEMENTS (url_braces); i++) {
- if (in[so - 1] == url_braces[i].open) {
- if (open_brace != NULL)
- *open_brace = url_braces[i].open;
- return url_braces[i].close;
+ if (remove_trailing_bad_chars) {
+ /* urls are extremely unlikely to end with any
+ * punctuation, so strip any trailing
+ * punctuation off. Also strip off any closing
+ * double-quotes. */
+ while (inptr > pos && strchr (",.:;?!-|}])\"", inptr[-1]))
+ inptr--;
}
+
+ match->um_eo = (inptr - in);
}
}
- return '\0';
+ g_match_info_free (match_info);
+ g_regex_unref (regex);
+
+ return success;
}
gboolean
@@ -340,25 +367,7 @@ camel_url_file_end (const gchar *in,
const gchar *inend,
CamelUrlMatch *match)
{
- register const gchar *inptr = pos;
- gchar close_brace;
-
- inptr += strlen (match->pattern);
-
- if (*inptr == '/')
- inptr++;
-
- close_brace = url_stop_at_brace (in, match->um_so, NULL);
-
- while (inptr < inend && is_urlsafe (*inptr) && *inptr != close_brace)
- inptr++;
-
- if (inptr == pos)
- return FALSE;
-
- match->um_eo = (inptr - in);
-
- return TRUE;
+ return camel_url_pattern_end (in, pos, inend, match, FILE_PATTERN, FALSE);
}
gboolean
@@ -384,131 +393,7 @@ camel_url_web_end (const gchar *in,
const gchar *inend,
CamelUrlMatch *match)
{
- register const gchar *inptr = pos;
- gboolean passwd = FALSE;
- const gchar *save;
- gchar close_brace, open_brace;
- gint brace_stack = 0;
- gint port;
-
- inptr += strlen (match->pattern);
-
- close_brace = url_stop_at_brace (in, match->um_so, &open_brace);
-
- /* find the end of the domain */
- if (is_atom (*inptr)) {
- /* might be a domain or user@domain */
- save = inptr;
- while (inptr < inend) {
- if (!is_atom (*inptr))
- break;
-
- inptr++;
-
- while (inptr < inend && is_atom (*inptr))
- inptr++;
-
- if ((inptr + 1) < inend && *inptr == '.' && (is_atom (inptr[1]) || inptr[1] == '/'))
- inptr++;
- }
-
- if (*inptr != '@')
- inptr = save;
- else
- inptr++;
-
- goto domain;
- } else if (is_domain (*inptr)) {
- domain:
- while (inptr < inend) {
- if (!is_domain (*inptr))
- break;
-
- inptr++;
-
- while (inptr < inend && is_domain (*inptr))
- inptr++;
-
- if ((inptr + 1) < inend && *inptr == '.' && (is_domain (inptr[1]) || inptr[1] == '/'))
- inptr++;
- }
- } else {
- return FALSE;
- }
-
- if (inptr < inend) {
- switch (*inptr) {
- case ':': /* we either have a port or a password */
- inptr++;
-
- if (is_digit (*inptr) || passwd) {
- port = (*inptr++ - '0');
-
- while (inptr < inend && is_digit (*inptr) && port < 65536)
- port = (port * 10) + (*inptr++ - '0');
-
- if (!passwd && (port >= 65536 || *inptr == '@')) {
- if (inptr < inend) {
- /* this must be a password? */
- goto passwd;
- }
-
- inptr--;
- }
- } else {
- passwd:
- passwd = TRUE;
- save = inptr;
-
- while (inptr < inend && is_atom (*inptr))
- inptr++;
-
- if ((inptr + 2) < inend) {
- if (*inptr == '@') {
- inptr++;
- if (is_domain (*inptr))
- goto domain;
- }
-
- return FALSE;
- }
- }
-
- if (inptr >= inend || *inptr != '/')
- break;
-
- /* we have a '/' so there could be a path - fall through */
- case '/': /* we've detected a path component to our url */
- inptr++;
- /* coverity[fallthrough] */
- case '?':
- while (inptr < inend && is_urlsafe (*inptr)) {
- if (*inptr == open_brace) {
- brace_stack++;
- } else if (*inptr == close_brace) {
- brace_stack--;
- if (brace_stack == -1)
- break;
- }
- inptr++;
- }
-
- break;
- default:
- break;
- }
- }
-
- /* urls are extremely unlikely to end with any
- * punctuation, so strip any trailing
- * punctuation off. Also strip off any closing
- * double-quotes. */
- while (inptr > pos && strchr (",.:;?!-|}])\"", inptr[-1]))
- inptr--;
-
- match->um_eo = (inptr - in);
-
- return TRUE;
+ return camel_url_pattern_end (in, pos, inend, match, URL_PATTERN, TRUE);
}
#ifdef BUILD_TABLE
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]