Strip vertical whitespace from letters.

This commit is contained in:
Andrey Golovizin 2014-08-08 17:24:32 +02:00
parent 81147f8a50
commit ee22008820

View file

@ -7,6 +7,11 @@ from skimage.color import rgb2gray
from .utils import cached_property
def _is_nonblank(bitmap):
"""Return True if bitmap contains at least one black (=1) pixel."""
return bitmap.any()
class Image(object):
"""Basic image class."""
@ -58,6 +63,10 @@ class Image(object):
grayscale = rgb2gray(self._data)
return (grayscale < 1).astype('b')
@cached_property
def isspace(self):
return not _is_nonblank(self.bitmap)
@property
def key(self):
"""Return a byte string uniquely representing the image."""
@ -79,6 +88,25 @@ class Image(object):
def unframe(self, width=2):
return self[width:-width,width:-width]
def strip(self):
"""Strip top and bottom blank space.
All-whitespace images are not stripped.
"""
if self.isspace:
return self
def _get_margin_height(rows):
for i, row in enumerate(rows):
if _is_nonblank(row):
return i
return 0
top_margin = _get_margin_height(self.bitmap)
bottom_margin = _get_margin_height(reversed(self.bitmap))
return self[top_margin:self.height - bottom_margin, :]
def _iter_children(self, min_space):
if self.child_cls is None:
raise NotImplementedError
@ -86,11 +114,8 @@ class Image(object):
line_start = None
prev_line_end = 0
def is_nonblank(row):
return row.any()
for i, row in enumerate(self.bitmap):
if is_nonblank(row):
if _is_nonblank(row):
if line_start is None:
line_start = i
height = line_start - prev_line_end
@ -112,7 +137,7 @@ class Line(Image):
def __iter__(self):
for rotated_letter in self.T._iter_children(min_space=10):
yield rotated_letter.T
yield rotated_letter.T.strip()
class Page(Image):