[gedit] Refactored linkparsing and added support for more tools

From: Paolo Borelli <pborelli src gnome org>
To: svn-commits-list gnome org
Cc:
Subject: [gedit] Refactored linkparsing and added support for more tools
Date: Mon, 4 Jan 2010 11:35:09 +0000 (UTC)
commit 434e693ff10e017492e3eb4059dbf576ebfecf14
Author: Per Arneng <per arneng anyplanet com>
Date:   Mon Jan 4 00:21:43 2010 +0100

    Refactored linkparsing and added support for more tools
    
    * Made it simpler to add regular expression based parsers
    * Regexp is now verbose and expects named capturing groups lnk, pth and ln
    * Created a standard regexp that covers ~ "\n/tmp/my_file.ext:23: ..."
    * Created regex for valac
    * Changed python parser from a class to regexp
    * Refactored the linkparsing_test.py to make it easier to write tests
    * Verified that scalac works on REGEXP_STANDARD with unit test
    * Verified that 6g (go) works on REGEXP_STANDARD with unit test
    * Added regexp for perl output
    * Added regexp for mcs (c#) output
    * tools supported (verified) to date:
        gcc, python, javac, valac, ruby, scalac, 6g (go), perl, mcs (c#)

 plugins/externaltools/tools/linkparsing.py      |  163 +++++++++++++++++-----
 plugins/externaltools/tools/linkparsing_test.py |  136 ++++++++++++++-----
 2 files changed, 226 insertions(+), 73 deletions(-)
---
diff --git a/plugins/externaltools/tools/linkparsing.py b/plugins/externaltools/tools/linkparsing.py
index b0f60df..46a38d6 100644
--- a/plugins/externaltools/tools/linkparsing.py
+++ b/plugins/externaltools/tools/linkparsing.py
@@ -21,7 +21,9 @@ import re
 class Link:
     """
     This class represents a file link from within a string given by the
-    output of some software tool.
+    output of some software tool. A link contains a reference to a file, the
+    line number within the file and the boundaries within the given output
+    string that should be marked as a link.
     """
 
     def __init__(self, path, line_nr, start, end):
@@ -37,28 +39,47 @@ class Link:
         self.end     = end
 
     def __repr__(self):
-        return "%s[%s](%s:%s)" % (self.path, self.line_nr, 
+        return "%s[%s](%s:%s)" % (self.path, self.line_nr,
                                   self.start, self.end)
 
 class LinkParser:
     """
     Parses a text using different parsing providers with the goal of finding one
-    or more file links within the text. A typicak example could be the output
+    or more file links within the text. A typical example could be the output
     from a compiler that specifies an error in a specific file. The path of the
     file, the line nr and some more info is then returned so that it can be used
     to be able to navigate from the error output in to the specific file.
 
     The actual work of parsing the text is done by instances of classes that
-    inherits from LinkParserProvider. To add a new parser just create a class
-    that inherits from LinkParserProvider and override the parse method. Then
-    you need to register the class in the _provider list of this class wich is
-    done in the class constructor.
+    inherits from AbstractLinkParser or by regular expressions. To add a new
+    parser just create a class that inherits from AbstractLinkParser and then
+    register in this class cunstructor using the method add_parser. If you want
+    to add a regular expression then just call add_regexp in this class
+    constructor and provide your regexp string as argument.
     """
 
     def __init__(self):
         self._providers = []
-        self._providers.append(GccLinkParserProvider())
-        self._providers.append(PythonLinkParserProvider())
+        self.add_regexp(REGEXP_STANDARD)
+        self.add_regexp(REGEXP_PYTHON)
+        self.add_regexp(REGEXP_VALAC)
+        self.add_regexp(REGEXP_RUBY)
+        self.add_regexp(REGEXP_PERL)
+        self.add_regexp(REGEXP_MCS)
+
+    def add_parser(self, parser):
+        self._providers.append(parser)
+
+    def add_regexp(self, regexp):
+        """
+        Adds a regular expression string that should match a link using
+        re.MULTILINE and re.VERBOSE regexp. The area marked as a link should
+        be captured by a group named lnk. The path of the link should be
+        captured by a group named pth. The line number should be captured by
+        a group named ln. To read more about this look at the documentation
+        for the RegexpLinkParser constructor.
+        """
+        self.add_parser(RegexpLinkParser(regexp))
 
     def parse(self, text):
         """
@@ -79,7 +100,7 @@ class LinkParser:
 
         return links
 
-class LinkParserProvider:
+class AbstractLinkParser(object):
     """The "abstract" base class for link parses"""
 
     def parse(self, text):
@@ -88,46 +109,114 @@ class LinkParserProvider:
         argument (never None) and then returns a list of Link objects. If no
         links are found then an empty list is expected. The Link class is
         defined in this module. If you do not override this method then a
-        NotImplementedError will be thrown. 
+        NotImplementedError will be thrown.
 
         text -- the text to parse. This argument is never None.
         """
         raise NotImplementedError("need to implement a parse method")
 
-class GccLinkParserProvider(LinkParserProvider):
+class RegexpLinkParser(AbstractLinkParser):
+    """
+    A class that represents parsers that only use one single regular expression.
+    It can be used by subclasses or by itself. See the constructor documentation
+    for details about the rules surrouning the regexp.
+    """
 
-    def __init__(self):
-        self.fm = re.compile("^(.*)\:(\d+)\:", re.MULTILINE)
+    def __init__(self, regex):
+        """
+        Creates a new RegexpLinkParser based on the given regular expression.
+        The regular expression is multiline and verbose (se python docs on
+        compilation flags). The regular expression should contain three named
+        capturing groups 'lnk', 'pth' and 'ln'. 'lnk' represents the area wich
+        should be marked as a link in the text. 'pth' is the path that should
+        be looked for and 'ln' is the line number in that file.
+        """
+        self.re = re.compile(regex, re.MULTILINE | re.VERBOSE)
 
     def parse(self, text):
         links = []
-        for m in re.finditer(self.fm, text):
-            path = m.group(1)
-            line_nr = m.group(2)
-            start = m.start(1)
-            end = m.end(2)
+        for m in re.finditer(self.re, text):
+            path = m.group("pth")
+            line_nr = m.group("ln")
+            start = m.start("lnk")
+            end = m.end("lnk")
             link = Link(path, line_nr, start, end)
             links.append(link)
 
         return links
 
-class PythonLinkParserProvider(LinkParserProvider):
-
-    def __init__(self):
-        # example:
-        #  File "test.py", line 10, in <module>
-        self.fm = re.compile("^  File \"([^\"]+)\", line (\d+),", re.MULTILINE)
-
-    def parse(self, text):
-        links = []
-        for m in re.finditer(self.fm, text):
-            path = m.group(1)
-            line_nr = m.group(2)
-            start = m.start(1) - 1
-            end = m.end(2)
-            link = Link(path, line_nr, start, end)
-            links.append(link)
-
-        return links
+# gcc 'test.c:13: warning: ...'
+# javac 'Test.java:13: ...'
+# ruby 'test.rb:5: ...'
+# scalac 'Test.scala:5: ...'
+# 6g (go) 'test.go:9: ...'
+REGEXP_STANDARD = r"""
+^
+(?P<lnk>
+    (?P<pth> .*[a-z0-9] )
+    \:
+    (?P<ln> \d+)
+)
+\:\s"""
+
+# python '  File "test.py", line 13'
+REGEXP_PYTHON = r"""
+^\s\sFile\s
+(?P<lnk>
+    \"
+    (?P<pth> [^\"]+ )
+    \",\sline\s
+    (?P<ln> \d+ )
+),"""
+
+# valac 'Test.vala:13.1-13.3: ...'
+REGEXP_VALAC = r"""
+^(?P<lnk>
+    (?P<pth>
+        .*vala
+    )
+    \:
+    (?P<ln>
+        \d+
+    )
+    \.\d+-\d+\.\d+
+ )\: """
+
+#ruby
+#test.rb:5: ...
+#	from test.rb:3:in `each'
+# fist line parsed by REGEXP_STANDARD
+REGEXP_RUBY = r"""
+^\s+from\s
+(?P<lnk>
+    (?P<pth>
+        .*
+    )
+    \:
+    (?P<ln>
+        \d+
+    )
+ )"""
+
+# perl 'syntax error at test.pl line 88, near "$fake_var'
+REGEXP_PERL = r"""
+\sat\s
+(?P<lnk>
+    (?P<pth> .* )
+    \sline\s
+    (?P<ln> \d+ )
+)"""
+
+# mcs (C#) 'Test.cs(12,7): error CS0103: The name `fakeMethod'
+REGEXP_MCS = r"""
+^
+(?P<lnk>
+    (?P<pth> .*\.[cC][sS] )
+    \(
+    (?P<ln> \d+ )
+    ,\d+\)
+)
+\:\s
+"""
 
 # ex:ts=4:et:
diff --git a/plugins/externaltools/tools/linkparsing_test.py b/plugins/externaltools/tools/linkparsing_test.py
index 734229b..4ab2a96 100644
--- a/plugins/externaltools/tools/linkparsing_test.py
+++ b/plugins/externaltools/tools/linkparsing_test.py
@@ -18,14 +18,23 @@
 
 import unittest
 from linkparsing import LinkParser
-from linkparsing import GccLinkParserProvider
-from linkparsing import PythonLinkParserProvider
 
 class LinkParserTest(unittest.TestCase):
 
     def setUp(self):
         self.p = LinkParser()
 
+    def assert_link_count(self, links, expected_count):
+        self.assertEquals(len(links), expected_count, 'incorrect nr of links')
+
+    def assert_link(self, actual, path, line_nr):
+        self.assertEquals(actual.path, path, "incorrect path")
+        self.assertEquals(actual.line_nr, line_nr, "incorrect line nr")
+
+    def assert_link_text(self, text, link, link_text):
+        self.assertEquals(text[link.start:link.end], link_text,
+           "the expected link text does not match the text within the string")
+
     def test_parse_gcc_simple_test_with_real_output(self):
         gcc_output = """
 test.c: In function 'f':
@@ -38,35 +47,32 @@ test.c:13: error: too few arguments to function 'f'
 test.c:14: error: expected ';' before 'return'
 """
         links = self.p.parse(gcc_output)
-        self.assertEquals(len(links), 6, 'incorrect nr of links')
+        self.assert_link_count(links, 6)
         lnk = links[2]
-        self.assertEquals(lnk.path, 'test.c', 'incorrect path')
-        self.assertEquals(lnk.line_nr, 11, 'incorrect line nr')
-        self.assertEquals(gcc_output[lnk.start:lnk.end], 'test.c:11',
-                            'the link positions are incorrect')
+        self.assert_link(lnk, "test.c", 11)
+        self.assert_link_text(gcc_output, lnk, "test.c:11")
 
     def test_parse_gcc_one_line(self):
-        links = self.p.parse("/tmp/myfile.c:1212: error: ...")
-        self.assertEquals(len(links), 1, 'incorrect nr of links')
+        line = "/tmp/myfile.c:1212: error: ..."
+        links = self.p.parse(line)
+        self.assert_link_count(links, 1)
         lnk = links[0]
-        self.assertEquals(lnk.path, '/tmp/myfile.c', 'incorrect path')
-        self.assertEquals(lnk.line_nr, 1212, 'incorrect line nr')
-        self.assertEquals(lnk.start, 0, 'incorrect start point')
-        self.assertEquals(lnk.end, 18, 'incorrect end point')
+        self.assert_link(lnk, "/tmp/myfile.c", 1212)
+        self.assert_link_text(line, lnk, "/tmp/myfile.c:1212")
 
     def test_parse_gcc_empty_string(self):
         links = self.p.parse("")
-        self.assertEquals(len(links), 0, 'incorrect nr of links')
+        self.assert_link_count(links, 0)
 
     def test_parse_gcc_no_files_in_text(self):
         links = self.p.parse("no file links in this string")
-        self.assertEquals(len(links), 0, 'incorrect nr of links')
+        self.assert_link_count(links, 0)
 
     def test_parse_gcc_none_as_argument(self):
         self.assertRaises(ValueError, self.p.parse, None)
 
     def test_parse_python_simple_test_with_real_output(self):
-        python_output = """
+        output = """
 Traceback (most recent call last):
   File "test.py", line 10, in <module>
     err()
@@ -76,30 +82,88 @@ Traceback (most recent call last):
     int('xxx')
 ValueError: invalid literal for int() with base 10: 'xxx'
 """
-        links = self.p.parse(python_output)
-        self.assertEquals(len(links), 3, 'incorrect nr of links')
+        links = self.p.parse(output)
+        self.assert_link_count(links, 3)
         lnk = links[2]
-        self.assertEquals(lnk.path, 'test.py', 'incorrect path')
-        self.assertEquals(lnk.line_nr, 4, 'incorrect line nr')
-        link_string = python_output[lnk.start:lnk.end]
-        self.assertEquals(link_string, '"test.py", line 4',
-                            'the link positions are incorrect')
-        lnk = links[1]
-        self.assertEquals(lnk.path, 'test.py', 'incorrect path')
-        self.assertEquals(lnk.line_nr, 7, 'incorrect line nr')
-        link_string = python_output[lnk.start:lnk.end]
-        self.assertEquals(link_string, '"test.py", line 7',
-                            'the link positions are incorrect')
+        self.assert_link(lnk, "test.py", 4)
+        self.assert_link_text(output, lnk, '"test.py", line 4')
 
     def test_parse_python_one_line(self):
-        links = self.p.parse("  File \"test.py\", line 10, in <module>")
-        self.assertEquals(len(links), 1, 'incorrect nr of links')
+        line = "  File \"test.py\", line 10, in <module>"
+        links = self.p.parse(line)
+        self.assert_link_count(links, 1)
+        lnk = links[0]
+        self.assert_link(lnk, "test.py", 10)
+        self.assert_link_text(line, lnk, '"test.py", line 10')
+
+    def test_parse_javac_one_line(self):
+        line = "/tmp/Test.java:10: incompatible types"
+        links = self.p.parse(line)
+        self.assert_link_count(links, 1)
+        lnk = links[0]
+        self.assert_link(lnk, "/tmp/Test.java", 10)
+        self.assert_link_text(line, lnk, '/tmp/Test.java:10')
+
+    def test_parse_valac_simple_test_with_real_output(self):
+        output = """
+Test.vala:14.13-14.21: error: Assignment: Cannot convert from `string' to `int'
+        int a = "xxx";
+            ^^^^^^^^^
+"""
+        links = self.p.parse(output)
+        self.assert_link_count(links, 1)
+        lnk = links[0]
+        self.assert_link(lnk, "Test.vala", 14)
+        self.assert_link_text(output, lnk, 'Test.vala:14.13-14.21')
+
+    def test_parse_ruby_simple_test_with_real_output(self):
+        output = """
+test.rb:5: undefined method `fake_method' for main:Object (NoMethodError)
+	from test.rb:3:in `each'
+	from test.rb:3
+"""
+        links = self.p.parse(output)
+        self.assert_link_count(links, 3)
         lnk = links[0]
-        self.assertEquals(lnk.path, 'test.py', 'incorrect path')
-        self.assertEquals(lnk.line_nr, 10, 'incorrect line nr')
-        self.assertEquals(lnk.start, 7, 'incorrect start point')
-        self.assertEquals(lnk.end, 25, 'incorrect end point')
-        
+        self.assert_link(lnk, "test.rb", 5)
+        self.assert_link_text(output, lnk, 'test.rb:5')
+        lnk = links[1]
+        self.assert_link(lnk, "test.rb", 3)
+        self.assert_link_text(output, lnk, 'test.rb:3')
+
+
+    def test_parse_scalac_one_line(self):
+        line = "Test.scala:7: error: not found: value fakeMethod"
+        links = self.p.parse(line)
+        self.assert_link_count(links, 1)
+        lnk = links[0]
+        self.assert_link(lnk, "Test.scala", 7)
+        self.assert_link_text(line, lnk, 'Test.scala:7')
+
+    def test_parse_go_6g_one_line(self):
+        line = "test.go:9: undefined: FakeMethod"
+        links = self.p.parse(line)
+        self.assert_link_count(links, 1)
+        lnk = links[0]
+        self.assert_link(lnk, "test.go", 9)
+        self.assert_link_text(line, lnk, 'test.go:9')
+
+    def test_parse_perl_one_line(self):
+        line = 'syntax error at test.pl line 889, near "$fake_var'
+        links = self.p.parse(line)
+        self.assert_link_count(links, 1)
+        lnk = links[0]
+        self.assert_link(lnk, "test.pl", 889)
+        self.assert_link_text(line, lnk, 'test.pl line 889')
+
+    def test_parse_mcs_one_line(self):
+        line = 'Test.cs(12,7): error CS0103: The name `fakeMethod'
+        links = self.p.parse(line)
+        self.assert_link_count(links, 1)
+        lnk = links[0]
+        self.assert_link(lnk, "Test.cs", 12)
+        self.assert_link_text(line, lnk, 'Test.cs(12,7)')
+
 if __name__ == '__main__':
     unittest.main()
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]