[ocrfeeder] Add support for migrating the OCR engines' configuration
- From: Joaquim Manuel Pereira Rocha <jrocha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ocrfeeder] Add support for migrating the OCR engines' configuration
- Date: Sat, 10 Mar 2012 19:03:40 +0000 (UTC)
commit dda43c5f75885b7f78617e3e4241e6f922a97538
Author: Joaquim Rocha <jrocha igalia com>
Date: Sat Mar 10 19:59:43 2012 +0100
Add support for migrating the OCR engines' configuration
It detects whether the configuration needs update, if it can
be performed automatically and asks the user for his decision.
src/ocrfeeder/feeder/ocrEngines.py | 38 +++++++++++++++++++--
src/ocrfeeder/studio/studioBuilder.py | 56 ++++++++++++++++++++++++++++++-
src/ocrfeeder/studio/widgetPresenter.py | 23 ++++++++++--
src/ocrfeeder/util/configuration.py | 26 ++++++++++++--
4 files changed, 130 insertions(+), 13 deletions(-)
---
diff --git a/src/ocrfeeder/feeder/ocrEngines.py b/src/ocrfeeder/feeder/ocrEngines.py
index 07c791d..3d73a1c 100644
--- a/src/ocrfeeder/feeder/ocrEngines.py
+++ b/src/ocrfeeder/feeder/ocrEngines.py
@@ -31,11 +31,15 @@ FILE_ARGUMENT = '$FILE'
class Engine:
- def __init__(self, name, engine_path, arguments, image = None, temporary_folder = '/tmp/', image_format = 'PPM', failure_string = ''):
+ def __init__(self, name, engine_path, arguments,
+ image = None, temporary_folder = '/tmp/',
+ image_format = 'PPM', failure_string = '',
+ version = 0.0):
self.name = name
self.engine_path = engine_path
self.arguments = arguments
+ self.version = version
if not self.name:
raise WrongSettingsForEngine("The engine's name cannot be empty!")
if not self.engine_path or not os.path.isfile(self.engine_path):
@@ -109,7 +113,8 @@ class Engine:
'engine_path': self.engine_path,
'arguments': self.arguments,
'image_format': self.image_format,
- 'failure_string': self.failure_string}
+ 'failure_string': self.failure_string,
+ 'version': self.version}
root = ET.Element('engine')
for key, value in engine_info.items():
if not key or not value:
@@ -158,6 +163,29 @@ class OcrEnginesManager:
lib.debug("Warning: no engines found!")
elif not favorite_engine_exists:
self.configuration_manager.favorite_engine = self.ocr_engines[0][0].name
+ engines_needing_update = {'auto': [],
+ 'manual': []}
+ for engine, path in self.ocr_engines:
+ path = engine.engine_path
+ default_conf = \
+ self.configuration_manager.getEngineDefaultConfiguration(path)
+ if default_conf is None:
+ continue
+ if float(engine.version) < float(default_conf['version']):
+ update_type = 'manual'
+ for arguments in default_conf['old_arguments']:
+ if engine.arguments == arguments:
+ update_type = 'auto'
+ break
+ engines_needing_update[update_type].append({'engine': engine,
+ 'configuration': default_conf})
+ return engines_needing_update
+
+ def migrateEngine(self, engine, configuration, only_version = False):
+ if not only_version:
+ engine.arguments = configuration['arguments']
+ engine.version = configuration['version']
+ self.replaceEngine(engine, engine)
def getEngineFromXml(self, xml_file_name):
document = ET.parse(xml_file_name)
@@ -174,11 +202,13 @@ class OcrEnginesManager:
def getXmlFilesInFolder(self, folder):
return [os.path.join(folder, file) for file in os.listdir(folder) if file.endswith('.xml')]
- def newEngine(self, name, engine_path, arguments, image_format, failure_string):
+ def newEngine(self, name, engine_path, arguments,
+ image_format, failure_string, version):
engine = Engine(name = name, engine_path = engine_path,
arguments = arguments, image_format = image_format,
temporary_folder = self.configuration_manager.TEMPORARY_FOLDER,
- failure_string = failure_string)
+ failure_string = failure_string,
+ version = version)
return engine
def delete(self, index):
diff --git a/src/ocrfeeder/studio/studioBuilder.py b/src/ocrfeeder/studio/studioBuilder.py
index a3145cb..c0efda5 100644
--- a/src/ocrfeeder/studio/studioBuilder.py
+++ b/src/ocrfeeder/studio/studioBuilder.py
@@ -69,7 +69,9 @@ class Studio:
self.configuration_manager = ConfigurationManager()
self.ocr_engines_manager = OcrEnginesManager(self.configuration_manager)
self.configuration_manager.loadConfiguration()
- self.ocr_engines_manager.makeEnginesFromFolder(self.configuration_manager.user_engines_folder)
+ user_engines_folder = self.configuration_manager.user_engines_folder
+ self.engines_needing_update = \
+ self.ocr_engines_manager.makeEnginesFromFolder(user_engines_folder)
self.ocr_engines = self.ocr_engines_manager.ocr_engines
self.source_images_list_store = SourceImagesListStore()
self.source_images_icon_view = SourceImagesSelectorIconView(self.source_images_list_store)
@@ -146,6 +148,9 @@ class Studio:
self.ocr_engines_manager.addNewEngine(engine)
add_engines_dialog.destroy()
+ else:
+ self.__askForEnginesMigration()
+
def run(self):
gtk.gdk.threads_init()
gtk.main()
@@ -440,6 +445,55 @@ class Studio:
def zoomFit(self, widget = None):
self.source_images_controler.zoomFit()
+ def __askForEnginesMigration(self):
+ auto_update = self.engines_needing_update['auto']
+ if auto_update:
+ names = []
+ for migration in auto_update:
+ names.append(migration['engine'].name)
+ dialog = gtk.MessageDialog(self.main_window.window,
+ gtk.DIALOG_MODAL |
+ gtk.DIALOG_DESTROY_WITH_PARENT,
+ gtk.MESSAGE_WARNING,
+ buttons = gtk.BUTTONS_YES_NO)
+ message = _('The following engines\' arguments '
+ 'might need to be updated:\n <b>%(engines)s</b> '
+ '\nDo you want to update them automatically?') % \
+ {'engines': '\n'.join(names)}
+ dialog.set_markup(message)
+ if dialog.run() == gtk.RESPONSE_YES:
+ for migration in auto_update:
+ self.ocr_engines_manager.migrateEngine(migration['engine'],
+ migration['configuration'])
+ dialog.destroy()
+
+ manual_update = self.engines_needing_update['manual']
+ if manual_update:
+ names = []
+ for migration in manual_update:
+ names.append(migration['engine'].name)
+ dialog = gtk.MessageDialog(self.main_window.window,
+ gtk.DIALOG_MODAL |
+ gtk.DIALOG_DESTROY_WITH_PARENT,
+ gtk.MESSAGE_WARNING)
+ dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
+ _('_Open OCR Engines Manager Dialog'),
+ gtk.RESPONSE_OK)
+ message = _('The following engines\' arguments '
+ 'might need to be updated but it appears '
+ 'you have changed their default configuration so '
+ 'they need to be updated manually:\n '
+ '<b>%(engines)s</b> ') % \
+ {'engines': '\n'.join(names)}
+ dialog.set_markup(message)
+ response = dialog.run()
+ dialog.destroy()
+ for migration in manual_update:
+ self.ocr_engines_manager.migrateEngine(migration['engine'],
+ migration['configuration'])
+ if response == gtk.RESPONSE_OK:
+ self.ocrEngines()
+
def quit(self, widget = None, data = None):
if not self.project_name and not self.source_images_list_store.isEmpty():
quit_dialog = widgetPresenter.QuestionDialog('<b>' + _("The project hasn't been saved.") + '</b>', gtk.BUTTONS_NONE)
diff --git a/src/ocrfeeder/studio/widgetPresenter.py b/src/ocrfeeder/studio/widgetPresenter.py
index 279dee4..c8e1dc3 100644
--- a/src/ocrfeeder/studio/widgetPresenter.py
+++ b/src/ocrfeeder/studio/widgetPresenter.py
@@ -1814,10 +1814,25 @@ class OcrSettingsDialog(gtk.Dialog):
def setEngine(self):
try:
- engine = self.engine_manager.newEngine(self.name_entry.get_text(), self.engine_path_entry.get_text(),
- self.arguments_entry.get_text(), self.image_format_entry.get_text(),
- self.failure_string_entry.get_text()
- )
+ path = self.engine_path_entry.get_text()
+ if self.engine:
+ version = self.engine.version
+ else:
+ configuration = \
+ self.configuration_manager.getEngineDefaultConfiguration(path)
+ if configuration:
+ version = configuration['version']
+ else:
+ version = 0.0
+ arguments = self.arguments_entry.get_text()
+ image_format = self.image_format_entry.get_text()
+ failure_string = self.failure_string_entry.get_text()
+ engine = self.engine_manager.newEngine(self.name_entry.get_text(),
+ path,
+ arguments,
+ image_format,
+ failure_string,
+ version)
if self.engine:
self.engine_manager.replaceEngine(self.engine, engine)
self.engine = engine
diff --git a/src/ocrfeeder/util/configuration.py b/src/ocrfeeder/util/configuration.py
index 74eb852..f81ee42 100644
--- a/src/ocrfeeder/util/configuration.py
+++ b/src/ocrfeeder/util/configuration.py
@@ -31,21 +31,32 @@ PREDEFINED_ENGINES = {'tesseract': {'name': 'Tesseract',
'engine_path': 'tesseract',
'arguments': '$IMAGE $FILE >'
' /dev/null 2> /dev/null; cat '
- '$FILE.txt; rm $FILE $FILE.txt'},
+ '$FILE.txt; rm $FILE $FILE.txt',
+ 'old_arguments': ['$IMAGE $FILE; cat '
+ '$FILE.txt; rm $FILE $FILE.txt'],
+ 'version': 0.1},
'ocrad': {'name': 'Ocrad',
'image_format': 'PPM',
'engine_path': 'ocrad',
- 'arguments': '-F utf8 $IMAGE'},
+ 'arguments': '-F utf8 $IMAGE',
+ 'old_arguments': ['-F utf8 $IMAGE'],
+ 'version': 0.1},
'gocr': {'name': 'GOCR',
'image_format': 'PPM',
'engine_path': 'gocr',
- 'arguments': '-f UTF8 $IMAGE'},
+ 'arguments': '-f UTF8 $IMAGE',
+ 'old_arguments': ['-f UTF8 $IMAGE'],
+ 'version': 0.1},
'cuneiform': {'name': 'Cuneiform',
'image_format': 'BMP',
'engine_path': 'cuneiform',
'arguments': '-f text -o $FILE $IMAGE >'
' /dev/null 2> /dev/null && cat $FILE'
- ' && rm $FILE'},
+ ' && rm $FILE',
+ 'old_arguments': ['-f text -o $FILE $IMAGE >'
+ ' /dev/null 2> /dev/null && cat $FILE'
+ ' && rm $FILE'],
+ 'version': 0.1},
}
class ConfigurationManager(object):
@@ -304,6 +315,13 @@ class ConfigurationManager(object):
else:
return ''
+ def getEngineDefaultConfiguration(self, engine_path):
+ path = os.path.basename(engine_path)
+ for name, conf in PREDEFINED_ENGINES.items():
+ if conf['engine_path'] == path:
+ return conf
+ return None
+
def loadConfiguration(self):
configuration_file = os.path.join(self.user_configuration_folder, 'preferences.xml')
if not os.path.isfile(configuration_file):
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]