Replaced entire-tree xml.etree.ElementTree parser with streaming xml.parsers.expat parser (~4x speedup and ~4x lowering memory usage). Also to allow parallel running of svn and parser this change removes logic thsta checks for EAGAIN, that was added in e139a0edb9803911e713220da57a924e62b612b2 as "Possible fix for bug 151339" It looks safe because that change looked wrong: trace in bug references EINTERRUPTED but not EAGAIN, and EAGAIN should never appear in synchronous pipe returned by subprocess --- meld/vc/svn.py | 56 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/meld/vc/svn.py b/meld/vc/svn.py index 68fb3d5..6534b9b 100644 --- a/meld/vc/svn.py +++ b/meld/vc/svn.py @@ -28,7 +28,7 @@ import glob import os import shutil import tempfile -import xml.etree.ElementTree as ElementTree +import xml.parsers.expat import subprocess from meld.conf import _ @@ -186,32 +186,32 @@ class Vc(_vc.Vc): return cls._repo_version_support(repo_version) def _update_tree_state_cache(self, path): - while 1: - try: - proc = _vc.popen( - [self.CMD, "status", "-v", "--xml", path], - cwd=self.location) - tree = ElementTree.parse(proc) - break - except OSError as e: - if e.errno != errno.EAGAIN: - raise - - for target in tree.findall("target") + tree.findall("changelist"): - for entry in (t for t in target.getchildren() if t.tag == "entry"): - path = entry.attrib["path"] - if not path: - continue + xml_stream = _vc.popen( + [self.CMD, "status", "-v", "--xml", path], + cwd=self.location) + + rev_label_pattern = _("Rev %s") + parser = xml.parsers.expat.ParserCreate() + parser.returns_unicode = False + last_entry_attribs_in_state = 0 + state = [{}] + + def handle_tag(tag, attribs): + if tag == "entry": + state[last_entry_attribs_in_state] = attribs + elif tag == "wc-status": + try: + path = state[last_entry_attribs_in_state].pop("path") + except KeyError: + return if not os.path.isabs(path): path = os.path.abspath(os.path.join(self.location, path)) - for status in (e for e in entry.getchildren() - if e.tag == "wc-status"): - item = status.attrib["item"] - if item == "": - continue - state = self.state_map.get(item, _vc.STATE_NONE) - self._tree_cache[path] = state - - rev = status.attrib.get("revision") - rev_label = _("Rev %s") % rev if rev is not None else '' - self._tree_meta_cache[path] = rev_label + + self._tree_cache[path] = self.state_map.get(attribs.get("item"), _vc.STATE_NONE) + try: + self._tree_meta_cache[path] = rev_label_pattern % attribs['revision'] + except KeyError: #revision does not present for entries with status set as item="external" + self._tree_meta_cache[path] = '' + + parser.StartElementHandler = handle_tag + parser.ParseFile(xml_stream) -- 2.7.0.rc0.173.g4a846af