[orca] Move and tweak language identification code



commit 20982973d950c06955890a8b96c0200f958f040e
Author: Joanmarie Diggs <jdiggs igalia com>
Date:   Fri Jan 7 11:59:26 2022 +0100

    Move and tweak language identification code

 src/orca/script_utilities.py             | 58 +++++++++++++++++++++++++-------
 src/orca/scripts/web/script_utilities.py | 41 ++++------------------
 2 files changed, 52 insertions(+), 47 deletions(-)
---
diff --git a/src/orca/script_utilities.py b/src/orca/script_utilities.py
index 42f9a1d88..c9dec3e8b 100644
--- a/src/orca/script_utilities.py
+++ b/src/orca/script_utilities.py
@@ -3236,32 +3236,64 @@ class Utilities:
 
         return "%s: %s" % (localizedKey, localizedValue)
 
+    def getLanguageAndDialectForObject(self, obj):
+        """Returns a (language, dialect) tuple for obj."""
+
+        locale, encoding = obj.objectLocale.split(".")
+        if not locale:
+            locale, encoding = local.getdefaultlocale()
+
+        language, dialect = locale.split("_")
+        return language, dialect
+
+    def splitSubstringByLanguage(self, obj, start, end):
+        """Returns a list of (start, end, string, language, dialect) tuples."""
+
+        rv = []
+        allSubstrings = self.getLanguageAndDialectFromTextAttributes(obj)
+        for startOffset, endOffset, language, dialect in allSubstrings:
+            if start >= endOffset:
+                continue
+            if end <= startOffset:
+                break
+            startOffset = max(start, startOffset)
+            endOffset = min(end, endOffset)
+            string = self.substring(obj, startOffset, endOffset)
+            rv.append([startOffset, endOffset, string, language, dialect])
+
+        return rv
+
     def getLanguageAndDialectForSubstring(self, obj, start, end):
         """Returns a (language, dialect) tuple. If multiple languages apply to
         the substring, language and dialect will be empty strings. Callers must
         do any preprocessing to avoid that condition."""
 
-        allSubstrings = self.getLanguageAndDialectForObject(obj)
+        allSubstrings = self.getLanguageAndDialectFromTextAttributes(obj)
         for startOffset, endOffset, language, dialect in allSubstrings:
             if startOffset <= start and endOffset >= end:
                 return language, dialect
 
         return "", ""
 
-    def getLanguageAndDialectForObject(self, obj):
-        """Returns a list of (start, end, language, dialect) tuples for obj.
-        This default implementation assumes there can be exactly one language
-        plus dialect that applies to the entire object. Support for apps in
-        which that assumption is not valid must override this method.
-        """
+    def getLanguageAndDialectFromTextAttributes(self, obj):
+        """Returns a list of (start, end, language, dialect) tuples for obj
+        based on what is exposed via text attributes."""
 
-        locale, encoding = obj.objectLocale.split(".")
-        if not locale:
-            locale, encoding = local.getdefaultlocale()
+        rv = []
+        attributeSet = self.getAllTextAttributesForObject(obj)
+        lastLanguage = lastDialect = ""
+        for (start, end, attrs) in attributeSet:
+            language = attrs.get("language", "")
+            dialect = ""
+            if "-" in language:
+                language, dialect = language.split("-")
+            if rv and lastLanguage == language and lastDialect == dialect:
+                rv[-1] = rv[-1][0], end, language, dialect
+            else:
+                rv.append((start, end, language, dialect))
+            lastLanguage, lastDialect = language, dialect
 
-        language, dialect = locale.split("_")
-        start, end = 0, -1
-        return [(start, end, language, dialect)]
+        return rv
 
     def willEchoCharacter(self, event):
         """Given a keyboard event containing an alphanumeric key,
diff --git a/src/orca/scripts/web/script_utilities.py b/src/orca/scripts/web/script_utilities.py
index c3f0116b9..ad9480863 100644
--- a/src/orca/scripts/web/script_utilities.py
+++ b/src/orca/scripts/web/script_utilities.py
@@ -953,52 +953,25 @@ class Utilities(script_utilities.Utilities):
     def adjustContentsForLanguage(self, contents):
         rv = []
         for content in contents:
-            rv.extend(self.splitSubstringByLanguage(*content[0:3]))
+            split = self.splitSubstringByLanguage(*content[0:3])
+            for start, end, string, language, dialect in split:
+                rv.append([content[0], start, end, string])
 
         return rv
 
-    def splitSubstringByLanguage(self, obj, start, end):
-        rv = []
-        allSubstrings = self.getLanguageAndDialectForObject(obj)
-        for startOffset, endOffset, language, dialect in allSubstrings:
-            if start >= endOffset:
-                continue
-            if end <= startOffset:
-                break
-            startOffset = max(start, startOffset)
-            endOffset = min(end, endOffset)
-            string = self.substring(obj, startOffset, endOffset)
-            rv.append([obj, startOffset, endOffset, string])
-
-        return rv
-
-    def getLanguageAndDialectForObject(self, obj):
-        """Returns a list of (start, end, language, dialect) tuples for obj."""
-
+    def getLanguageAndDialectFromTextAttributes(self, obj):
         if not self.inDocumentContent(obj):
-            return super().getLanguageAndDialectForObject(obj)
+            return super().getLanguageAndDialectFromTextAttributes(obj)
 
         rv = self._languageAndDialects.get(hash(obj))
         if rv is not None:
             return rv
 
-        rv = []
-        attributeSet = self.getAllTextAttributesForObject(obj)
-        lastLanguage = lastDialect = ""
-        for (start, end, attrs) in attributeSet:
-            language = attrs.get("language", "")
-            dialect = ""
-            if "-" in language:
-                language, dialect = language.split("-")
-            if rv and lastLanguage == language and lastDialect == dialect:
-                rv[-1] = rv[-1][0], end, language, dialect
-            else:
-                rv.append((start, end, language, dialect))
-            lastLanguage, lastDialect = language, dialect
+        rv = super().getLanguageAndDialectFromTextAttributes(obj)
 
         # Embedded objects such as images and certain widgets won't implement the text interface
         # and thus won't expose text attributes. Therefore try to get the info from the parent.
-        if not attributeSet:
+        if not rv:
             start, end = self.getHyperlinkRange(obj)
             language, dialect = self.getLanguageAndDialectForSubstring(obj.parent, start, end)
             rv.append((0, 1, language, dialect))


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]