Strip vertical whitespace from letters.
This commit is contained in:
parent
81147f8a50
commit
ee22008820
1 changed files with 30 additions and 5 deletions
|
|
@ -7,6 +7,11 @@ from skimage.color import rgb2gray
|
||||||
from .utils import cached_property
|
from .utils import cached_property
|
||||||
|
|
||||||
|
|
||||||
|
def _is_nonblank(bitmap):
|
||||||
|
"""Return True if bitmap contains at least one black (=1) pixel."""
|
||||||
|
return bitmap.any()
|
||||||
|
|
||||||
|
|
||||||
class Image(object):
|
class Image(object):
|
||||||
"""Basic image class."""
|
"""Basic image class."""
|
||||||
|
|
||||||
|
|
@ -58,6 +63,10 @@ class Image(object):
|
||||||
grayscale = rgb2gray(self._data)
|
grayscale = rgb2gray(self._data)
|
||||||
return (grayscale < 1).astype('b')
|
return (grayscale < 1).astype('b')
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def isspace(self):
|
||||||
|
return not _is_nonblank(self.bitmap)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def key(self):
|
def key(self):
|
||||||
"""Return a byte string uniquely representing the image."""
|
"""Return a byte string uniquely representing the image."""
|
||||||
|
|
@ -79,6 +88,25 @@ class Image(object):
|
||||||
def unframe(self, width=2):
|
def unframe(self, width=2):
|
||||||
return self[width:-width,width:-width]
|
return self[width:-width,width:-width]
|
||||||
|
|
||||||
|
def strip(self):
|
||||||
|
"""Strip top and bottom blank space.
|
||||||
|
|
||||||
|
All-whitespace images are not stripped.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if self.isspace:
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _get_margin_height(rows):
|
||||||
|
for i, row in enumerate(rows):
|
||||||
|
if _is_nonblank(row):
|
||||||
|
return i
|
||||||
|
return 0
|
||||||
|
|
||||||
|
top_margin = _get_margin_height(self.bitmap)
|
||||||
|
bottom_margin = _get_margin_height(reversed(self.bitmap))
|
||||||
|
return self[top_margin:self.height - bottom_margin, :]
|
||||||
|
|
||||||
def _iter_children(self, min_space):
|
def _iter_children(self, min_space):
|
||||||
if self.child_cls is None:
|
if self.child_cls is None:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
@ -86,11 +114,8 @@ class Image(object):
|
||||||
line_start = None
|
line_start = None
|
||||||
prev_line_end = 0
|
prev_line_end = 0
|
||||||
|
|
||||||
def is_nonblank(row):
|
|
||||||
return row.any()
|
|
||||||
|
|
||||||
for i, row in enumerate(self.bitmap):
|
for i, row in enumerate(self.bitmap):
|
||||||
if is_nonblank(row):
|
if _is_nonblank(row):
|
||||||
if line_start is None:
|
if line_start is None:
|
||||||
line_start = i
|
line_start = i
|
||||||
height = line_start - prev_line_end
|
height = line_start - prev_line_end
|
||||||
|
|
@ -112,7 +137,7 @@ class Line(Image):
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for rotated_letter in self.T._iter_children(min_space=10):
|
for rotated_letter in self.T._iter_children(min_space=10):
|
||||||
yield rotated_letter.T
|
yield rotated_letter.T.strip()
|
||||||
|
|
||||||
|
|
||||||
class Page(Image):
|
class Page(Image):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue