Strip vertical whitespace from letters.
This commit is contained in:
parent
81147f8a50
commit
ee22008820
1 changed files with 30 additions and 5 deletions
|
|
@ -7,6 +7,11 @@ from skimage.color import rgb2gray
|
|||
from .utils import cached_property
|
||||
|
||||
|
||||
def _is_nonblank(bitmap):
|
||||
"""Return True if bitmap contains at least one black (=1) pixel."""
|
||||
return bitmap.any()
|
||||
|
||||
|
||||
class Image(object):
|
||||
"""Basic image class."""
|
||||
|
||||
|
|
@ -58,6 +63,10 @@ class Image(object):
|
|||
grayscale = rgb2gray(self._data)
|
||||
return (grayscale < 1).astype('b')
|
||||
|
||||
@cached_property
|
||||
def isspace(self):
|
||||
return not _is_nonblank(self.bitmap)
|
||||
|
||||
@property
|
||||
def key(self):
|
||||
"""Return a byte string uniquely representing the image."""
|
||||
|
|
@ -79,6 +88,25 @@ class Image(object):
|
|||
def unframe(self, width=2):
|
||||
return self[width:-width,width:-width]
|
||||
|
||||
def strip(self):
|
||||
"""Strip top and bottom blank space.
|
||||
|
||||
All-whitespace images are not stripped.
|
||||
"""
|
||||
|
||||
if self.isspace:
|
||||
return self
|
||||
|
||||
def _get_margin_height(rows):
|
||||
for i, row in enumerate(rows):
|
||||
if _is_nonblank(row):
|
||||
return i
|
||||
return 0
|
||||
|
||||
top_margin = _get_margin_height(self.bitmap)
|
||||
bottom_margin = _get_margin_height(reversed(self.bitmap))
|
||||
return self[top_margin:self.height - bottom_margin, :]
|
||||
|
||||
def _iter_children(self, min_space):
|
||||
if self.child_cls is None:
|
||||
raise NotImplementedError
|
||||
|
|
@ -86,11 +114,8 @@ class Image(object):
|
|||
line_start = None
|
||||
prev_line_end = 0
|
||||
|
||||
def is_nonblank(row):
|
||||
return row.any()
|
||||
|
||||
for i, row in enumerate(self.bitmap):
|
||||
if is_nonblank(row):
|
||||
if _is_nonblank(row):
|
||||
if line_start is None:
|
||||
line_start = i
|
||||
height = line_start - prev_line_end
|
||||
|
|
@ -112,7 +137,7 @@ class Line(Image):
|
|||
|
||||
def __iter__(self):
|
||||
for rotated_letter in self.T._iter_children(min_space=10):
|
||||
yield rotated_letter.T
|
||||
yield rotated_letter.T.strip()
|
||||
|
||||
|
||||
class Page(Image):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue