[evolution-data-server/gnome-3-38] evo-I#1070 - Improve URL scanner (detect angle brackets) ][

commit b15bab8ac0226667b1b1864af097fc8fcd0ff4bb
Author: Milan Crha <mcrha redhat com>
Date:   Wed Sep 16 13:42:05 2020 +0200

    evo-I#1070 - Improve URL scanner (detect angle brackets) ][
    The previous commit missed the buffer tagger, which uses different method
    of the URL scanner/detection.
    Related to https://gitlab.gnome.org/GNOME/evolution/-/issues/1070

 src/libedataserverui/e-buffer-tagger.c | 51 +++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)
diff --git a/src/libedataserverui/e-buffer-tagger.c b/src/libedataserverui/e-buffer-tagger.c
index 6af23d004..c52dc78dd 100644
--- a/src/libedataserverui/e-buffer-tagger.c
+++ b/src/libedataserverui/e-buffer-tagger.c
@@ -155,7 +155,56 @@ markup_text (GtkTextBuffer *buffer)
                any = FALSE;
                for (i = 0; i < G_N_ELEMENTS (mim); i++) {
                        if (mim[i].preg && !regexec (mim[i].preg, str, 2, pmatch, 0)) {
-                               gint char_so, char_eo;
+                               gint char_so, char_eo, rm_eo;
+                               /* Stop on the angle brackets, which cannot be part of the URL (see RFC 3986 
Appendix C) */
+                               for (rm_eo = pmatch[0].rm_eo - 1; rm_eo > pmatch[0].rm_so; rm_eo--) {
+                                       if (str[rm_eo] == '<' || str[rm_eo] == '>') {
+                                               pmatch[0].rm_eo = rm_eo;
+                                               break;
+                                       }
+                               }
+                               rm_eo = pmatch[0].rm_eo;
+                               /* URLs are extremely unlikely to end with any
+                                * punctuation, so strip any trailing
+                                * punctuation off. Also strip off any closing
+                                * double-quotes. */
+                               while (rm_eo > pmatch[0].rm_so && strchr (",.:;?!-|}])\">", str[rm_eo - 1])) {
+                                       gchar open_bracket = 0, close_bracket = str[rm_eo - 1];
+                                       if (close_bracket == ')')
+                                               open_bracket = '(';
+                                       else if (close_bracket == '}')
+                                               open_bracket = '{';
+                                       else if (close_bracket == ']')
+                                               open_bracket = '[';
+                                       else if (close_bracket == '>')
+                                               open_bracket = '<';
+                                       if (open_bracket != 0) {
+                                               const gchar *ptr, *endptr;
+                                               gint n_opened = 0, n_closed = 0;
+                                               endptr = str + rm_eo;
+                                               for (ptr = str + pmatch[0].rm_so; ptr < endptr; ptr++) {
+                                                       if (*ptr == open_bracket)
+                                                               n_opened++;
+                                                       else if (*ptr == close_bracket)
+                                                               n_closed++;
+                                               }
+                                               /* The closing bracket can match one inside the URL,
+                                                  thus keep it there. */
+                                               if (n_opened > 0 && n_opened - n_closed >= 0)
+                                                       break;
+                                       }
+                                       rm_eo--;
+                                       pmatch[0].rm_eo--;
+                               }
                                char_so = g_utf8_pointer_to_offset (str, str + pmatch[0].rm_so);
                                char_eo = g_utf8_pointer_to_offset (str, str + pmatch[0].rm_eo);

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]