Move Image._iter_lines() to Page class.

This commit is contained in:
Andrey Golovizin 2014-08-26 17:09:52 +02:00
parent 22f5c5e2b1
commit ee6aca6c80
2 changed files with 34 additions and 37 deletions

View file

@ -59,7 +59,7 @@ def combine(*images):
return Image(data, left, top)
def _is_nonblank(bitmap):
def is_nonblank(bitmap):
"""Return True if bitmap contains at least one black (=1) pixel."""
return bitmap.any()
@ -159,7 +159,7 @@ class Image(object):
@cached_property
def isspace(self):
return not _is_nonblank(self.bitmap)
return not is_nonblank(self.bitmap)
def serialize(self):
"""Serialize the image as some hashable object."""
@ -196,7 +196,7 @@ class Image(object):
def _get_margin_height(rows):
for i, row in enumerate(rows):
if _is_nonblank(row):
if is_nonblank(row):
return i
return 0
@ -218,34 +218,3 @@ class Image(object):
and self.right <= right
and self.bottom <= bottom
)
def _iter_lines(self, min_space):
def iter_lines():
line_start = None
prev_line_end = 0
for i, row in enumerate(self.bitmap):
if _is_nonblank(row):
if line_start is None:
line_start = i
else:
if line_start is not None:
yield self[line_start:i,:]
line_start = None
prev_line_end = i
def merge_lines(lines):
prev_line = None
for line in lines:
if prev_line is None:
prev_line = line
else:
if line.top - prev_line.bottom < min_space:
prev_line = self[prev_line.top:line.bottom]
else:
yield prev_line
prev_line = line
if prev_line is not None:
yield prev_line
return merge_lines(iter_lines())

View file

@ -22,7 +22,7 @@ from scipy import ndimage
from scipy.ndimage import filters
from .utils import cached_property, collect_iterable, pairwise, neighbourhood
from .image import Image, combine
from .image import Image, combine, is_nonblank
CONNECTIVITY8 = ndimage.generate_binary_structure(2, 2)
@ -111,8 +111,36 @@ class Page(PageObject):
@cached_property
@collect_iterable
def lines(self):
for line_img in self.image._iter_lines(min_space=2):
yield Line(line_img)
return self._merge_lines(self._iter_lines())
def _iter_lines(self):
line_start = None
prev_line_end = 0
for i, row in enumerate(self.image.bitmap):
if is_nonblank(row):
if line_start is None:
line_start = i
else:
if line_start is not None:
yield Line(self.image[line_start:i,:])
line_start = None
prev_line_end = i
def _merge_lines(self, lines, min_space=2):
prev_line = None
for line in lines:
if prev_line is None:
prev_line = line
else:
if line.top - prev_line.bottom < min_space:
prev_line = Line(self.image[prev_line.top:line.bottom])
else:
yield prev_line
prev_line = line
if prev_line is not None:
yield prev_line
class Line(PageObject):