[orca] Move and tweak language identification code
- From: Joanmarie Diggs <joanied src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [orca] Move and tweak language identification code
- Date: Fri, 7 Jan 2022 12:05:57 +0000 (UTC)
commit 20982973d950c06955890a8b96c0200f958f040e
Author: Joanmarie Diggs <jdiggs igalia com>
Date: Fri Jan 7 11:59:26 2022 +0100
Move and tweak language identification code
src/orca/script_utilities.py | 58 +++++++++++++++++++++++++-------
src/orca/scripts/web/script_utilities.py | 41 ++++------------------
2 files changed, 52 insertions(+), 47 deletions(-)
---
diff --git a/src/orca/script_utilities.py b/src/orca/script_utilities.py
index 42f9a1d88..c9dec3e8b 100644
--- a/src/orca/script_utilities.py
+++ b/src/orca/script_utilities.py
@@ -3236,32 +3236,64 @@ class Utilities:
return "%s: %s" % (localizedKey, localizedValue)
+ def getLanguageAndDialectForObject(self, obj):
+ """Returns a (language, dialect) tuple for obj."""
+
+ locale, encoding = obj.objectLocale.split(".")
+ if not locale:
+ locale, encoding = local.getdefaultlocale()
+
+ language, dialect = locale.split("_")
+ return language, dialect
+
+ def splitSubstringByLanguage(self, obj, start, end):
+ """Returns a list of (start, end, string, language, dialect) tuples."""
+
+ rv = []
+ allSubstrings = self.getLanguageAndDialectFromTextAttributes(obj)
+ for startOffset, endOffset, language, dialect in allSubstrings:
+ if start >= endOffset:
+ continue
+ if end <= startOffset:
+ break
+ startOffset = max(start, startOffset)
+ endOffset = min(end, endOffset)
+ string = self.substring(obj, startOffset, endOffset)
+ rv.append([startOffset, endOffset, string, language, dialect])
+
+ return rv
+
def getLanguageAndDialectForSubstring(self, obj, start, end):
"""Returns a (language, dialect) tuple. If multiple languages apply to
the substring, language and dialect will be empty strings. Callers must
do any preprocessing to avoid that condition."""
- allSubstrings = self.getLanguageAndDialectForObject(obj)
+ allSubstrings = self.getLanguageAndDialectFromTextAttributes(obj)
for startOffset, endOffset, language, dialect in allSubstrings:
if startOffset <= start and endOffset >= end:
return language, dialect
return "", ""
- def getLanguageAndDialectForObject(self, obj):
- """Returns a list of (start, end, language, dialect) tuples for obj.
- This default implementation assumes there can be exactly one language
- plus dialect that applies to the entire object. Support for apps in
- which that assumption is not valid must override this method.
- """
+ def getLanguageAndDialectFromTextAttributes(self, obj):
+ """Returns a list of (start, end, language, dialect) tuples for obj
+ based on what is exposed via text attributes."""
- locale, encoding = obj.objectLocale.split(".")
- if not locale:
- locale, encoding = local.getdefaultlocale()
+ rv = []
+ attributeSet = self.getAllTextAttributesForObject(obj)
+ lastLanguage = lastDialect = ""
+ for (start, end, attrs) in attributeSet:
+ language = attrs.get("language", "")
+ dialect = ""
+ if "-" in language:
+ language, dialect = language.split("-")
+ if rv and lastLanguage == language and lastDialect == dialect:
+ rv[-1] = rv[-1][0], end, language, dialect
+ else:
+ rv.append((start, end, language, dialect))
+ lastLanguage, lastDialect = language, dialect
- language, dialect = locale.split("_")
- start, end = 0, -1
- return [(start, end, language, dialect)]
+ return rv
def willEchoCharacter(self, event):
"""Given a keyboard event containing an alphanumeric key,
diff --git a/src/orca/scripts/web/script_utilities.py b/src/orca/scripts/web/script_utilities.py
index c3f0116b9..ad9480863 100644
--- a/src/orca/scripts/web/script_utilities.py
+++ b/src/orca/scripts/web/script_utilities.py
@@ -953,52 +953,25 @@ class Utilities(script_utilities.Utilities):
def adjustContentsForLanguage(self, contents):
rv = []
for content in contents:
- rv.extend(self.splitSubstringByLanguage(*content[0:3]))
+ split = self.splitSubstringByLanguage(*content[0:3])
+ for start, end, string, language, dialect in split:
+ rv.append([content[0], start, end, string])
return rv
- def splitSubstringByLanguage(self, obj, start, end):
- rv = []
- allSubstrings = self.getLanguageAndDialectForObject(obj)
- for startOffset, endOffset, language, dialect in allSubstrings:
- if start >= endOffset:
- continue
- if end <= startOffset:
- break
- startOffset = max(start, startOffset)
- endOffset = min(end, endOffset)
- string = self.substring(obj, startOffset, endOffset)
- rv.append([obj, startOffset, endOffset, string])
-
- return rv
-
- def getLanguageAndDialectForObject(self, obj):
- """Returns a list of (start, end, language, dialect) tuples for obj."""
-
+ def getLanguageAndDialectFromTextAttributes(self, obj):
if not self.inDocumentContent(obj):
- return super().getLanguageAndDialectForObject(obj)
+ return super().getLanguageAndDialectFromTextAttributes(obj)
rv = self._languageAndDialects.get(hash(obj))
if rv is not None:
return rv
- rv = []
- attributeSet = self.getAllTextAttributesForObject(obj)
- lastLanguage = lastDialect = ""
- for (start, end, attrs) in attributeSet:
- language = attrs.get("language", "")
- dialect = ""
- if "-" in language:
- language, dialect = language.split("-")
- if rv and lastLanguage == language and lastDialect == dialect:
- rv[-1] = rv[-1][0], end, language, dialect
- else:
- rv.append((start, end, language, dialect))
- lastLanguage, lastDialect = language, dialect
+ rv = super().getLanguageAndDialectFromTextAttributes(obj)
# Embedded objects such as images and certain widgets won't implement the text interface
# and thus won't expose text attributes. Therefore try to get the info from the parent.
- if not attributeSet:
+ if not rv:
start, end = self.getHyperlinkRange(obj)
language, dialect = self.getLanguageAndDialectForSubstring(obj.parent, start, end)
rv.append((0, 1, language, dialect))
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]