[gnome-shell] findUrl: be pickier about what can precede a URL



commit e2898bea5c3f79d4ee8cf058936d2e6c8507144b
Author: Dan Winship <danw gnome org>
Date:   Wed Apr 13 09:40:28 2011 -0400

    findUrl: be pickier about what can precede a URL
    
    findUrl() was seeing strings like "You have 1 new message in
    foo example com/Inbox" and finding the URL
    "[http://]example.com/Inbox";. Require that URLs either start at the
    start of the string, or are preceded by whitespace or an open
    paren/quote/etc.
    
    (Since JS doesn't have look-behind assertions like perl does, we have
    to actually match the URL-preceding character in the regex, and then
    adjust the result findUrl returns accordingly.)
    
    https://bugzilla.gnome.org/show_bug.cgi?id=636252

 js/misc/util.js |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)
---
diff --git a/js/misc/util.js b/js/misc/util.js
index 3ba2b7d..ee3c27f 100644
--- a/js/misc/util.js
+++ b/js/misc/util.js
@@ -9,10 +9,12 @@ const Main = imports.ui.main;
 
 // http://daringfireball.net/2010/07/improved_regex_for_matching_urls
 const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)';
+const _leadingJunk = '[\\s`(\\[{\'\\"<\u00AB\u201C\u2018]';
 const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';
 
 const _urlRegexp = new RegExp(
-    '\\b(' +
+    '(^|' + _leadingJunk + ')' +
+    '(' +
         '(?:' +
             '[a-z][\\w-]+://' +                   // scheme://
             '|' +
@@ -43,7 +45,7 @@ const _urlRegexp = new RegExp(
 function findUrls(str) {
     let res = [], match;
     while ((match = _urlRegexp.exec(str)))
-        res.push({ url: match[0], pos: match.index });
+        res.push({ url: match[2], pos: match.index + match[1].length });
     return res;
 }
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]