From 3d6c3de3161d5f53c75ab4623eb5ccfd984c1c57 Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Fri, 8 Aug 2014 15:53:23 +0200 Subject: [PATCH] Add Page, Line and Letter classes. --- pixelocr/image.py | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/pixelocr/image.py b/pixelocr/image.py index 6f2e407..4c03c28 100644 --- a/pixelocr/image.py +++ b/pixelocr/image.py @@ -7,9 +7,11 @@ from skimage.color import rgb2gray from .utils import cached_property -class Image(IPythonImageMixin): +class Image(object): """Basic image class.""" + child_cls = None + def __init__(self, data): self._data = data @@ -77,7 +79,10 @@ class Image(IPythonImageMixin): def unframe(self, width=2): return self[width:-width,width:-width] - def iter_lines(self, min_space=200): + def _iter_children(self, min_space): + if self.child_cls is None: + raise NotImplementedError + line_start = None prev_line_end = 0 for i, row in enumerate(self.bitmap): @@ -85,13 +90,28 @@ class Image(IPythonImageMixin): if line_start is None: line_start = i if line_start - prev_line_end >= min_space: - yield Image.space(line_start - prev_line_end, self.width) + yield self.child_cls.space(line_start - prev_line_end, self.width) else: if line_start is not None: - yield self[line_start:i, :] + yield self.child_cls(self._data[line_start:i, :]) line_start = None prev_line_end = i - def iter_letters(self, min_space=10): - for letter in self.T.iter_lines(min_space=7): - yield letter.T + +class Page(Image): + child_cls = Line + + def __iter__(self): + return self._iter_children(min_space=200) + + +class Line(Image): + child_cls = Letter + + def __iter__(self): + for rotated_letter in self.T._iter_children(min_space=10): + yield rotated_letter.T + + +class Letter(Image): + pass