[ocrfeeder] Add support for migrating the OCR engines' configuration



commit dda43c5f75885b7f78617e3e4241e6f922a97538
Author: Joaquim Rocha <jrocha igalia com>
Date:   Sat Mar 10 19:59:43 2012 +0100

    Add support for migrating the OCR engines' configuration
    
    It detects whether the configuration needs update, if it can
    be performed automatically and asks the user for his decision.

 src/ocrfeeder/feeder/ocrEngines.py      |   38 +++++++++++++++++++--
 src/ocrfeeder/studio/studioBuilder.py   |   56 ++++++++++++++++++++++++++++++-
 src/ocrfeeder/studio/widgetPresenter.py |   23 ++++++++++--
 src/ocrfeeder/util/configuration.py     |   26 ++++++++++++--
 4 files changed, 130 insertions(+), 13 deletions(-)
---
diff --git a/src/ocrfeeder/feeder/ocrEngines.py b/src/ocrfeeder/feeder/ocrEngines.py
index 07c791d..3d73a1c 100644
--- a/src/ocrfeeder/feeder/ocrEngines.py
+++ b/src/ocrfeeder/feeder/ocrEngines.py
@@ -31,11 +31,15 @@ FILE_ARGUMENT = '$FILE'
 
 class Engine:
 
-    def __init__(self, name, engine_path, arguments, image = None, temporary_folder = '/tmp/', image_format = 'PPM', failure_string = ''):
+    def __init__(self, name, engine_path, arguments,
+                 image = None, temporary_folder = '/tmp/',
+                 image_format = 'PPM', failure_string = '',
+                 version = 0.0):
 
         self.name = name
         self.engine_path = engine_path
         self.arguments = arguments
+        self.version = version
         if not self.name:
             raise WrongSettingsForEngine("The engine's name cannot be empty!")
         if not self.engine_path or not os.path.isfile(self.engine_path):
@@ -109,7 +113,8 @@ class Engine:
                        'engine_path': self.engine_path,
                        'arguments': self.arguments,
                        'image_format': self.image_format,
-                       'failure_string': self.failure_string}
+                       'failure_string': self.failure_string,
+                       'version': self.version}
         root = ET.Element('engine')
         for key, value in engine_info.items():
             if not key or not value:
@@ -158,6 +163,29 @@ class OcrEnginesManager:
             lib.debug("Warning: no engines found!")
         elif not favorite_engine_exists:
             self.configuration_manager.favorite_engine = self.ocr_engines[0][0].name
+        engines_needing_update = {'auto': [],
+                                  'manual': []}
+        for engine, path in self.ocr_engines:
+            path = engine.engine_path
+            default_conf = \
+                self.configuration_manager.getEngineDefaultConfiguration(path)
+            if default_conf is None:
+                continue
+            if float(engine.version) < float(default_conf['version']):
+                update_type = 'manual'
+                for arguments in default_conf['old_arguments']:
+                    if engine.arguments == arguments:
+                        update_type = 'auto'
+                        break
+                engines_needing_update[update_type].append({'engine': engine,
+                                               'configuration': default_conf})
+        return engines_needing_update
+
+    def migrateEngine(self, engine, configuration, only_version = False):
+        if not only_version:
+            engine.arguments = configuration['arguments']
+        engine.version = configuration['version']
+        self.replaceEngine(engine, engine)
 
     def getEngineFromXml(self, xml_file_name):
         document = ET.parse(xml_file_name)
@@ -174,11 +202,13 @@ class OcrEnginesManager:
     def getXmlFilesInFolder(self, folder):
         return [os.path.join(folder, file) for file in os.listdir(folder) if file.endswith('.xml')]
 
-    def newEngine(self, name, engine_path, arguments, image_format, failure_string):
+    def newEngine(self, name, engine_path, arguments,
+                  image_format, failure_string, version):
         engine = Engine(name = name, engine_path = engine_path,
                         arguments = arguments, image_format = image_format,
                         temporary_folder = self.configuration_manager.TEMPORARY_FOLDER,
-                        failure_string = failure_string)
+                        failure_string = failure_string,
+                        version = version)
         return engine
 
     def delete(self, index):
diff --git a/src/ocrfeeder/studio/studioBuilder.py b/src/ocrfeeder/studio/studioBuilder.py
index a3145cb..c0efda5 100644
--- a/src/ocrfeeder/studio/studioBuilder.py
+++ b/src/ocrfeeder/studio/studioBuilder.py
@@ -69,7 +69,9 @@ class Studio:
         self.configuration_manager = ConfigurationManager()
         self.ocr_engines_manager = OcrEnginesManager(self.configuration_manager)
         self.configuration_manager.loadConfiguration()
-        self.ocr_engines_manager.makeEnginesFromFolder(self.configuration_manager.user_engines_folder)
+        user_engines_folder = self.configuration_manager.user_engines_folder
+        self.engines_needing_update = \
+            self.ocr_engines_manager.makeEnginesFromFolder(user_engines_folder)
         self.ocr_engines = self.ocr_engines_manager.ocr_engines
         self.source_images_list_store = SourceImagesListStore()
         self.source_images_icon_view = SourceImagesSelectorIconView(self.source_images_list_store)
@@ -146,6 +148,9 @@ class Studio:
                         self.ocr_engines_manager.addNewEngine(engine)
                 add_engines_dialog.destroy()
 
+        else:
+            self.__askForEnginesMigration()
+
     def run(self):
         gtk.gdk.threads_init()
         gtk.main()
@@ -440,6 +445,55 @@ class Studio:
     def zoomFit(self, widget = None):
         self.source_images_controler.zoomFit()
 
+    def __askForEnginesMigration(self):
+        auto_update = self.engines_needing_update['auto']
+        if auto_update:
+            names = []
+            for migration in auto_update:
+                names.append(migration['engine'].name)
+            dialog = gtk.MessageDialog(self.main_window.window,
+                                       gtk.DIALOG_MODAL |
+                                       gtk.DIALOG_DESTROY_WITH_PARENT,
+                                       gtk.MESSAGE_WARNING,
+                                       buttons = gtk.BUTTONS_YES_NO)
+            message = _('The following engines\' arguments '
+                        'might need to be updated:\n  <b>%(engines)s</b> '
+                        '\nDo you want to update them automatically?') % \
+                        {'engines': '\n'.join(names)}
+            dialog.set_markup(message)
+            if dialog.run() == gtk.RESPONSE_YES:
+                for migration in auto_update:
+                    self.ocr_engines_manager.migrateEngine(migration['engine'],
+                                                    migration['configuration'])
+            dialog.destroy()
+
+        manual_update = self.engines_needing_update['manual']
+        if manual_update:
+            names = []
+            for migration in manual_update:
+                names.append(migration['engine'].name)
+            dialog = gtk.MessageDialog(self.main_window.window,
+                                       gtk.DIALOG_MODAL |
+                                       gtk.DIALOG_DESTROY_WITH_PARENT,
+                                       gtk.MESSAGE_WARNING)
+            dialog.add_buttons(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
+                               _('_Open OCR Engines Manager Dialog'),
+                               gtk.RESPONSE_OK)
+            message = _('The following engines\' arguments '
+                        'might need to be updated but it appears '
+                        'you have changed their default configuration so '
+                        'they need to be updated manually:\n  '
+                        '<b>%(engines)s</b> ') % \
+                        {'engines': '\n'.join(names)}
+            dialog.set_markup(message)
+            response = dialog.run()
+            dialog.destroy()
+            for migration in manual_update:
+                self.ocr_engines_manager.migrateEngine(migration['engine'],
+                                                migration['configuration'])
+            if response == gtk.RESPONSE_OK:
+                self.ocrEngines()
+
     def quit(self, widget = None, data = None):
         if not self.project_name and not self.source_images_list_store.isEmpty():
             quit_dialog = widgetPresenter.QuestionDialog('<b>' + _("The project hasn't been saved.") + '</b>', gtk.BUTTONS_NONE)
diff --git a/src/ocrfeeder/studio/widgetPresenter.py b/src/ocrfeeder/studio/widgetPresenter.py
index 279dee4..c8e1dc3 100644
--- a/src/ocrfeeder/studio/widgetPresenter.py
+++ b/src/ocrfeeder/studio/widgetPresenter.py
@@ -1814,10 +1814,25 @@ class OcrSettingsDialog(gtk.Dialog):
 
     def setEngine(self):
         try:
-            engine = self.engine_manager.newEngine(self.name_entry.get_text(), self.engine_path_entry.get_text(),
-                                             self.arguments_entry.get_text(), self.image_format_entry.get_text(),
-                                             self.failure_string_entry.get_text()
-                                             )
+            path = self.engine_path_entry.get_text()
+            if self.engine:
+                version = self.engine.version
+            else:
+                configuration = \
+                  self.configuration_manager.getEngineDefaultConfiguration(path)
+                if configuration:
+                    version = configuration['version']
+                else:
+                    version = 0.0
+            arguments = self.arguments_entry.get_text()
+            image_format = self.image_format_entry.get_text()
+            failure_string = self.failure_string_entry.get_text()
+            engine = self.engine_manager.newEngine(self.name_entry.get_text(),
+                                                   path,
+                                                   arguments,
+                                                   image_format,
+                                                   failure_string,
+                                                   version)
             if self.engine:
                 self.engine_manager.replaceEngine(self.engine, engine)
                 self.engine = engine
diff --git a/src/ocrfeeder/util/configuration.py b/src/ocrfeeder/util/configuration.py
index 74eb852..f81ee42 100644
--- a/src/ocrfeeder/util/configuration.py
+++ b/src/ocrfeeder/util/configuration.py
@@ -31,21 +31,32 @@ PREDEFINED_ENGINES = {'tesseract': {'name': 'Tesseract',
                                     'engine_path': 'tesseract',
                                     'arguments': '$IMAGE $FILE >'
                                     ' /dev/null 2> /dev/null; cat '
-                                    '$FILE.txt; rm $FILE $FILE.txt'},
+                                    '$FILE.txt; rm $FILE $FILE.txt',
+                                    'old_arguments': ['$IMAGE $FILE; cat '
+                                       '$FILE.txt; rm $FILE $FILE.txt'],
+                                    'version': 0.1},
                       'ocrad': {'name': 'Ocrad',
                                 'image_format': 'PPM',
                                 'engine_path': 'ocrad',
-                                'arguments': '-F utf8 $IMAGE'},
+                                'arguments': '-F utf8 $IMAGE',
+                                'old_arguments': ['-F utf8 $IMAGE'],
+                                'version': 0.1},
                       'gocr': {'name': 'GOCR',
                                'image_format': 'PPM',
                                'engine_path': 'gocr',
-                               'arguments': '-f UTF8 $IMAGE'},
+                               'arguments': '-f UTF8 $IMAGE',
+                               'old_arguments': ['-f UTF8 $IMAGE'],
+                               'version': 0.1},
                       'cuneiform': {'name': 'Cuneiform',
                                'image_format': 'BMP',
                                'engine_path': 'cuneiform',
                                'arguments': '-f text -o $FILE $IMAGE >'
                                ' /dev/null 2> /dev/null && cat $FILE'
-                               ' && rm $FILE'},
+                               ' && rm $FILE',
+                               'old_arguments': ['-f text -o $FILE $IMAGE >'
+                                      ' /dev/null 2> /dev/null && cat $FILE'
+                                      ' && rm $FILE'],
+                               'version': 0.1},
                      }
 
 class ConfigurationManager(object):
@@ -304,6 +315,13 @@ class ConfigurationManager(object):
         else:
             return ''
 
+    def getEngineDefaultConfiguration(self, engine_path):
+        path = os.path.basename(engine_path)
+        for name, conf in PREDEFINED_ENGINES.items():
+            if conf['engine_path'] == path:
+                return conf
+        return None
+
     def loadConfiguration(self):
         configuration_file = os.path.join(self.user_configuration_folder, 'preferences.xml')
         if not os.path.isfile(configuration_file):



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]