From ee6aca6c80ed241de3891e9b216eb46b0172b514 Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Tue, 26 Aug 2014 17:09:52 +0200 Subject: [PATCH] Move Image._iter_lines() to Page class. --- pixelocr/image.py | 37 +++---------------------------------- pixelocr/page.py | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 37 deletions(-) diff --git a/pixelocr/image.py b/pixelocr/image.py index 83f554e..99895da 100644 --- a/pixelocr/image.py +++ b/pixelocr/image.py @@ -59,7 +59,7 @@ def combine(*images): return Image(data, left, top) -def _is_nonblank(bitmap): +def is_nonblank(bitmap): """Return True if bitmap contains at least one black (=1) pixel.""" return bitmap.any() @@ -159,7 +159,7 @@ class Image(object): @cached_property def isspace(self): - return not _is_nonblank(self.bitmap) + return not is_nonblank(self.bitmap) def serialize(self): """Serialize the image as some hashable object.""" @@ -196,7 +196,7 @@ class Image(object): def _get_margin_height(rows): for i, row in enumerate(rows): - if _is_nonblank(row): + if is_nonblank(row): return i return 0 @@ -218,34 +218,3 @@ class Image(object): and self.right <= right and self.bottom <= bottom ) - - def _iter_lines(self, min_space): - def iter_lines(): - line_start = None - prev_line_end = 0 - - for i, row in enumerate(self.bitmap): - if _is_nonblank(row): - if line_start is None: - line_start = i - else: - if line_start is not None: - yield self[line_start:i,:] - line_start = None - prev_line_end = i - - def merge_lines(lines): - prev_line = None - for line in lines: - if prev_line is None: - prev_line = line - else: - if line.top - prev_line.bottom < min_space: - prev_line = self[prev_line.top:line.bottom] - else: - yield prev_line - prev_line = line - if prev_line is not None: - yield prev_line - - return merge_lines(iter_lines()) diff --git a/pixelocr/page.py b/pixelocr/page.py index 4b6982f..54d4789 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -22,7 +22,7 @@ from scipy import ndimage from scipy.ndimage import filters from .utils import cached_property, collect_iterable, pairwise, neighbourhood -from .image import Image, combine +from .image import Image, combine, is_nonblank CONNECTIVITY8 = ndimage.generate_binary_structure(2, 2) @@ -111,8 +111,36 @@ class Page(PageObject): @cached_property @collect_iterable def lines(self): - for line_img in self.image._iter_lines(min_space=2): - yield Line(line_img) + return self._merge_lines(self._iter_lines()) + + def _iter_lines(self): + line_start = None + prev_line_end = 0 + + for i, row in enumerate(self.image.bitmap): + if is_nonblank(row): + if line_start is None: + line_start = i + else: + if line_start is not None: + yield Line(self.image[line_start:i,:]) + line_start = None + prev_line_end = i + + def _merge_lines(self, lines, min_space=2): + prev_line = None + for line in lines: + if prev_line is None: + prev_line = line + else: + if line.top - prev_line.bottom < min_space: + prev_line = Line(self.image[prev_line.top:line.bottom]) + else: + yield prev_line + prev_line = line + if prev_line is not None: + yield prev_line + class Line(PageObject):