From ee220088207b68303e39583456fe664bbe0a067a Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Fri, 8 Aug 2014 17:24:32 +0200 Subject: [PATCH] Strip vertical whitespace from letters. --- pixelocr/image.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/pixelocr/image.py b/pixelocr/image.py index 274063b..5fdb84d 100644 --- a/pixelocr/image.py +++ b/pixelocr/image.py @@ -7,6 +7,11 @@ from skimage.color import rgb2gray from .utils import cached_property +def _is_nonblank(bitmap): + """Return True if bitmap contains at least one black (=1) pixel.""" + return bitmap.any() + + class Image(object): """Basic image class.""" @@ -58,6 +63,10 @@ class Image(object): grayscale = rgb2gray(self._data) return (grayscale < 1).astype('b') + @cached_property + def isspace(self): + return not _is_nonblank(self.bitmap) + @property def key(self): """Return a byte string uniquely representing the image.""" @@ -79,6 +88,25 @@ class Image(object): def unframe(self, width=2): return self[width:-width,width:-width] + def strip(self): + """Strip top and bottom blank space. + + All-whitespace images are not stripped. + """ + + if self.isspace: + return self + + def _get_margin_height(rows): + for i, row in enumerate(rows): + if _is_nonblank(row): + return i + return 0 + + top_margin = _get_margin_height(self.bitmap) + bottom_margin = _get_margin_height(reversed(self.bitmap)) + return self[top_margin:self.height - bottom_margin, :] + def _iter_children(self, min_space): if self.child_cls is None: raise NotImplementedError @@ -86,11 +114,8 @@ class Image(object): line_start = None prev_line_end = 0 - def is_nonblank(row): - return row.any() - for i, row in enumerate(self.bitmap): - if is_nonblank(row): + if _is_nonblank(row): if line_start is None: line_start = i height = line_start - prev_line_end @@ -112,7 +137,7 @@ class Line(Image): def __iter__(self): for rotated_letter in self.T._iter_children(min_space=10): - yield rotated_letter.T + yield rotated_letter.T.strip() class Page(Image):