[ocrfeeder] Add OCR engine option to the CLI version
- From: Joaquim Manuel Pereira Rocha <jrocha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ocrfeeder] Add OCR engine option to the CLI version
- Date: Wed, 6 Feb 2013 06:51:17 +0000 (UTC)
commit fede8c285aec4b4a6e72a63afdc87a72b7e38996
Author: Joaquim Rocha <me joaquimrocha com>
Date: Tue Feb 5 22:34:23 2013 -0800
Add OCR engine option to the CLI version
bin/ocrfeeder-cli.in | 37 ++++++++++++++++++++++++++++---------
1 files changed, 28 insertions(+), 9 deletions(-)
---
diff --git a/bin/ocrfeeder-cli.in b/bin/ocrfeeder-cli.in
index 4cae370..4fae83e 100644
--- a/bin/ocrfeeder-cli.in
+++ b/bin/ocrfeeder-cli.in
@@ -42,6 +42,20 @@ from optparse import OptionParser
document_generator_manager = DocumentGeneratorManager()
formats = document_generator_manager.getFormats()
+configuration_manager = ConfigurationManager()
+ocr_engines_manager = OcrEnginesManager(configuration_manager)
+ocr_engines_manager.makeEnginesFromFolder(configuration_manager.user_engines_folder)
+ocr_engines = ocr_engines_manager.ocr_engines
+
+ocr_engines_help_text = 'the OCR engine to be used.'
+if not ocr_engines:
+ ocr_engines_help_text += ' No engines configured!'
+else:
+ ocr_engines_help_text += ' Options are: %s' % \
+ ', '.join([engine[0].name for engine in ocr_engines[:-1]])
+ if len(ocr_engines) > 1:
+ ocr_engines_help_text += ' or %s' % ocr_engines[-1][0].name
+
parser = OptionParser(usage = 'Usage: %prog -i IMAGE1 [-i IMAGE2, ...] -o FILE',
version = '%prog ' + OCRFEEDER_STUDIO_VERSION)
parser.add_option('-i', '--image', dest = 'images',
@@ -55,6 +69,9 @@ parser.add_option('-f', '--format', dest = 'format',
parser.add_option('-o', '--output', dest = 'output',
action = 'store', type = 'string',
help = 'the document to be generated')
+parser.add_option('-e', '--engine', dest = 'engine',
+ action = 'store', type = 'string',
+ help = ocr_engines_help_text)
parser.add_option('--window-size', dest = 'window_size', default = 'auto',
action = 'store', type = 'string', metavar= 'auto or an integer value',
help = 'the segmentation algorithm window size')
@@ -85,17 +102,19 @@ file_name = options.output
if not file_name:
parser.error('Please choose the output name.')
-configuration_manager = ConfigurationManager()
-ocr_engines_manager = OcrEnginesManager(configuration_manager)
-ocr_engines_manager.makeEnginesFromFolder(configuration_manager.user_engines_folder)
-ocr_engines = ocr_engines_manager.ocr_engines
-
-if len(ocr_engines):
- engine = ocr_engines[0]
-else:
+if not len(ocr_engines):
parser.error('No OCR engines configured.')
exit(0)
+engine_name = options.engine.lower()
+if engine_name:
+ for engine in ocr_engines:
+ if engine[0].name.lower() == engine_name:
+ ocr_engine = engine[0]
+ break
+else:
+ ocr_engine = ocr_engines[0][0]
+
pages = []
for image in images:
if not os.path.isfile(image):
@@ -105,7 +124,7 @@ for image in images:
page_data = PageData(image)
data_boxes = []
image_obj = Image.open(image)
- layout_analysis = LayoutAnalysis(engine[0],
+ layout_analysis = LayoutAnalysis(ocr_engine,
window_size)
resolution = getImageResolution(image_obj)[1]
page_data.data_boxes = layout_analysis.recognize(image,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]