[ocrfeeder] Add the LayoutAnalysis class that provides easy ways of recognizing images
- From: Joaquim Manuel Pereira Rocha <jrocha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [ocrfeeder] Add the LayoutAnalysis class that provides easy ways of recognizing images
- Date: Tue, 27 Jul 2010 22:05:35 +0000 (UTC)
commit 6ff17ac2fd35baa796a31b90c961d59be4fd1872
Author: Joaquim Rocha <jrocha igalia com>
Date: Wed Jul 7 19:12:03 2010 +0200
Add the LayoutAnalysis class that provides easy ways of recognizing images
layoutAnalysis.LayoutAnalysis: This new class abstracts the usage of
the OCR engines' classes and the page segmentation methods.
feeder/layoutAnalysis.py | 84 +++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 83 insertions(+), 1 deletions(-)
---
diff --git a/feeder/layoutAnalysis.py b/feeder/layoutAnalysis.py
index 307c6fb..5da0498 100644
--- a/feeder/layoutAnalysis.py
+++ b/feeder/layoutAnalysis.py
@@ -19,7 +19,10 @@
###########################################################################
from util.lib import debug
-from util.constants import OCRFEEDER_DEBUG
+from util import graphics
+from util.constants import OCRFEEDER_DEBUG, DTP
+from studio.dataHolder import DataBox
+from imageManipulation import ImageProcessor
NONE = 0
TOP = -1
@@ -370,3 +373,82 @@ class BlockRetriever:
for block in blocks:
debug(block)
return blocks
+
+class LayoutAnalysis(object):
+
+ def __init__(self,
+ ocr_engine,
+ window_size = None,
+ improve_column_detection = True,
+ column_size = None):
+ self.ocr_engine = ocr_engine
+ self.window_size = window_size
+ self.column_size = column_size
+ self.improve_column_detection = improve_column_detection
+
+ def recognize(self, path_to_image, page_resolution):
+ image_processor = ImageProcessor(path_to_image,
+ self.window_size)
+ block_retriever = BlockRetriever(image_processor.imageToBinary())
+
+ # Get "untouched" block bounds
+ blocks = block_retriever.getAllBlocks()
+ block_bounds = [block.translateToUnits(image_processor.window_size) \
+ for block in blocks]
+
+ # Perform column subdivision (optimization of results)
+ if self.improve_column_detection:
+ bounds_optimized = []
+ for bounds in block_bounds:
+ bounds_divided = image_processor.divideImageClipInColumns(bounds,
+ self.column_size)
+ bounds_optimized.extend(bounds_divided)
+ block_bounds = bounds_optimized
+
+ # Adjust margins (optimization of results)
+ block_bounds = [image_processor.adjustImageClipMargins(bounds, \
+ self.column_size) \
+ for bounds in block_bounds]
+
+ image = image_processor.original_image
+ data_boxes = [self.__recognizeImageFromBounds(image,
+ bounds,
+ page_resolution) \
+ for bounds in block_bounds]
+ return data_boxes
+
+ def __recognizeImageFromBounds(self, image, bounds, page_resolution):
+ clip = image.crop(bounds)
+ text = clip_type = None
+ if self.ocr_engine:
+ text = self.readImage(clip)
+ clip_type = self.ocr_engine.classify(text)
+
+ x0, y0, x1, y1 = bounds
+ x, y, width, height = graphics.getBoundsFromStartEndPoints((x0, y0),
+ (x1, y1))
+
+ data_box = DataBox(x, y, width, height, clip)
+ if text:
+ data_box.setText(text)
+ data_box.setType(clip_type)
+
+ text_size = self.getTextSizeFromImage(clip, page_resolution)
+ if text_size:
+ data_box.setFontSize(text_size)
+
+ return data_box
+
+ def getTextSizeFromImage(self, image, page_resolution):
+ text_size = graphics.getTextSizeFromImage(image)
+ if not text_size:
+ return None
+ y_resolution = float(page_resolution)
+ text_size /= y_resolution
+ text_size *= DTP
+ print text_size
+ return round(text_size)
+
+ def readImage(self, image):
+ self.ocr_engine.setImage(image)
+ return self.ocr_engine.read()
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]