[orca] Begin addition of support for language identification
- From: Joanmarie Diggs <joanied src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [orca] Begin addition of support for language identification
- Date: Wed, 5 Jan 2022 15:29:11 +0000 (UTC)
commit c6cbefcea5cdb4eb3c12c67fa0e55c1eb238e5da
Author: Joanmarie Diggs <jdiggs igalia com>
Date: Wed Jan 5 16:25:52 2022 +0100
Begin addition of support for language identification
Please note: This is a work in progress and there should be no end-user
experienced changes in behavior. It is part of the foundation needed to
eventually make automatic language switching an optional feature.
src/orca/script_utilities.py | 59 ++++++++++++++++++++++++++
src/orca/scripts/web/script_utilities.py | 73 +++++++++++++++++++++++++++++++-
2 files changed, 130 insertions(+), 2 deletions(-)
---
diff --git a/src/orca/script_utilities.py b/src/orca/script_utilities.py
index 290cb8874..c5015ec2a 100644
--- a/src/orca/script_utilities.py
+++ b/src/orca/script_utilities.py
@@ -3146,6 +3146,38 @@ class Utilities:
return self._script.attributeNamesDict.get(attribName, attribName)
+ def getAllTextAttributesForObject(self, obj):
+ """Returns a list of (start, end, attrsDict) tuples for obj."""
+ try:
+ text = obj.queryText()
+ except:
+ return []
+
+ msg = "INFO: Getting all text attributes for %s" % obj
+ debug.println(debug.LEVEL_INFO, msg, True)
+
+ rv = []
+ offset = 0
+ while offset < text.characterCount:
+ attrList, start, end = text.getAttributeRun(offset)
+ if start == end:
+ msg = "INFO: start and end offsets should not be equal in attribute run"
+ debug.println(debug.LEVEL_INFO, msg, True)
+ break
+
+ if start < offset:
+ msg = "INFO: Unexpected start offset less than offset in attribute run"
+ debug.println(debug.LEVEL_INFO, msg, True)
+ break
+
+ attrDict = dict([attr.split(':', 1) for attr in attrList])
+ rv.append((start, end, attrDict))
+ offset = end
+
+ msg = "INFO: Result: %s" % rv
+ debug.println(debug.LEVEL_INFO, msg, True)
+ return rv
+
def textAttributes(self, acc, offset=None, get_defaults=False):
"""Get the text attributes run for a given offset in a given accessible
@@ -3211,6 +3243,33 @@ class Utilities:
return "%s: %s" % (localizedKey, localizedValue)
+ def getLanguageAndDialectForSubstring(self, obj, start, end):
+ """Returns a (language, dialect) tuple. If multiple languages apply to
+ the substring, language and dialect will be empty strings. Callers must
+ do any preprocessing to avoid that condition."""
+
+ allSubstrings = self.getLanguageAndDialectForObject(obj)
+ for startOffset, endOffset, language, dialect in allSubstrings:
+ if startOffset <= start and endOffset >= end:
+ return language, dialect
+
+ return "", ""
+
+ def getLanguageAndDialectForObject(self, obj):
+ """Returns a list of (start, end, language, dialect) tuples for obj.
+ This default implementation assumes there can be exactly one language
+ plus dialect that applies to the entire object. Support for apps in
+ which that assumption is not valid must override this method.
+ """
+
+ locale, encoding = obj.objectLocale.split(".")
+ if not locale:
+ locale, encoding = local.getdefaultlocale()
+
+ language, dialect = locale.split("_")
+ start, end = 0, -1
+ return [(start, end, language, dialect)]
+
def willEchoCharacter(self, event):
"""Given a keyboard event containing an alphanumeric key,
determine if the script is likely to echo it as a character.
diff --git a/src/orca/scripts/web/script_utilities.py b/src/orca/scripts/web/script_utilities.py
index 5509c8835..c603b1115 100644
--- a/src/orca/scripts/web/script_utilities.py
+++ b/src/orca/scripts/web/script_utilities.py
@@ -53,6 +53,8 @@ class Utilities(script_utilities.Utilities):
self._objectAttributes = {}
self._currentTextAttrs = {}
+ self._allTextAttrs = {}
+ self._languageAndDialects = {}
self._caretContexts = {}
self._priorContexts = {}
self._contextPathsRolesAndNames = {}
@@ -152,6 +154,8 @@ class Utilities(script_utilities.Utilities):
def clearCachedObjects(self):
debug.println(debug.LEVEL_INFO, "WEB: cleaning up cached objects", True)
self._objectAttributes = {}
+ self._allTextAttrs = {}
+ self._languageAndDialects = {}
self._inDocumentContent = {}
self._inTopLevelWebApp = {}
self._isTextBlockElement = {}
@@ -932,6 +936,69 @@ class Utilities(script_utilities.Utilities):
return super().localizeTextAttribute(key, value)
+ def getAllTextAttributesForObject(self, obj):
+ """Returns a list of (start, end, attrsDict) tuples for obj."""
+
+ if not (obj and self.inDocumentContent(obj)):
+ return super().getAllTextAttributesForObject(obj)
+
+ rv = self._allTextAttrs.get(hash(obj))
+ if rv is not None:
+ return rv
+
+ rv = super().getAllTextAttributesForObject(obj)
+ self._allTextAttrs[hash(obj)] = rv
+ return rv
+
+ def adjustContentsForLanguage(self, contents):
+ rv = []
+ for content in contents:
+ rv.extend(self.splitSubstringByLanguage(*content[0:3]))
+
+ return rv
+
+ def splitSubstringByLanguage(self, obj, start, end):
+ rv = []
+ allSubstrings = self.getLanguageAndDialectForObject(obj)
+ for startOffset, endOffset, language, dialect in allSubstrings:
+ if start > endOffset:
+ continue
+ if end <= startOffset:
+ break
+ string = self.substring(obj, startOffset, endOffset)
+ rv.append([obj, startOffset, endOffset, string])
+
+ return rv
+
+ def getLanguageAndDialectForObject(self, obj):
+ """Returns a list of (start, end, language, dialect) tuples for obj."""
+
+ if not self.inDocumentContent(obj):
+ return super().getLanguageAndDialectForObject(obj)
+
+ rv = self._languageAndDialects.get(hash(obj))
+ if rv is not None:
+ return rv
+
+ rv = []
+ attributeSet = self.getAllTextAttributesForObject(obj)
+ for (start, end, attrs) in attributeSet:
+ language = attrs.get("language", "")
+ dialect = ""
+ if "-" in language:
+ language, dialect = language.split("-")
+ rv.append((start, end, language, dialect))
+
+ # Embedded objects such as images and certain widgets won't implement the text interface
+ # and thus won't expose text attributes. Therefore try to get the info from the parent.
+ if not attributeSet:
+ start, end = self.getHyperlinkRange(obj)
+ language, dialect = self.getLanguageAndDialectForSubstring(obj.parent, start, end)
+ rv.append((0, 1, language, dialect))
+
+ self._languageAndDialects[hash(obj)] = rv
+ return rv
+
def findObjectInContents(self, obj, offset, contents, usingCache=False):
if not obj or not contents:
return -1
@@ -1442,7 +1509,7 @@ class Utilities(script_utilities.Utilities):
string = string[rangeStart:rangeEnd]
end = start + len(string)
- return [[obj, start, end, string]]
+ return self.adjustContentsForLanguage([[obj, start, end, string]])
def getSentenceContentsAtOffset(self, obj, offset, useCache=True):
if not obj:
@@ -1690,7 +1757,9 @@ class Utilities(script_utilities.Utilities):
extents = self.getExtents(acc, start, end)
except:
extents = "(exception)"
- msg = " %i. chars: %i-%i: '%s' extents=%s\n" % (i, start, end, string, extents)
+ language, dialect = self.getLanguageAndDialectForSubstring(acc, start, end)
+ msg = " %i. chars: %i-%i: '%s' extents=%s language='%s' dialect='%s'\n" % \
+ (i, start, end, string, extents, language, dialect)
msg += debug.getAccessibleDetails(debug.LEVEL_INFO, acc, indent)
debug.println(debug.LEVEL_INFO, msg, True)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]