Move Image._iter_lines() to Page class.
This commit is contained in:
parent
22f5c5e2b1
commit
ee6aca6c80
2 changed files with 34 additions and 37 deletions
|
|
@ -59,7 +59,7 @@ def combine(*images):
|
||||||
return Image(data, left, top)
|
return Image(data, left, top)
|
||||||
|
|
||||||
|
|
||||||
def _is_nonblank(bitmap):
|
def is_nonblank(bitmap):
|
||||||
"""Return True if bitmap contains at least one black (=1) pixel."""
|
"""Return True if bitmap contains at least one black (=1) pixel."""
|
||||||
return bitmap.any()
|
return bitmap.any()
|
||||||
|
|
||||||
|
|
@ -159,7 +159,7 @@ class Image(object):
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def isspace(self):
|
def isspace(self):
|
||||||
return not _is_nonblank(self.bitmap)
|
return not is_nonblank(self.bitmap)
|
||||||
|
|
||||||
def serialize(self):
|
def serialize(self):
|
||||||
"""Serialize the image as some hashable object."""
|
"""Serialize the image as some hashable object."""
|
||||||
|
|
@ -196,7 +196,7 @@ class Image(object):
|
||||||
|
|
||||||
def _get_margin_height(rows):
|
def _get_margin_height(rows):
|
||||||
for i, row in enumerate(rows):
|
for i, row in enumerate(rows):
|
||||||
if _is_nonblank(row):
|
if is_nonblank(row):
|
||||||
return i
|
return i
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
@ -218,34 +218,3 @@ class Image(object):
|
||||||
and self.right <= right
|
and self.right <= right
|
||||||
and self.bottom <= bottom
|
and self.bottom <= bottom
|
||||||
)
|
)
|
||||||
|
|
||||||
def _iter_lines(self, min_space):
|
|
||||||
def iter_lines():
|
|
||||||
line_start = None
|
|
||||||
prev_line_end = 0
|
|
||||||
|
|
||||||
for i, row in enumerate(self.bitmap):
|
|
||||||
if _is_nonblank(row):
|
|
||||||
if line_start is None:
|
|
||||||
line_start = i
|
|
||||||
else:
|
|
||||||
if line_start is not None:
|
|
||||||
yield self[line_start:i,:]
|
|
||||||
line_start = None
|
|
||||||
prev_line_end = i
|
|
||||||
|
|
||||||
def merge_lines(lines):
|
|
||||||
prev_line = None
|
|
||||||
for line in lines:
|
|
||||||
if prev_line is None:
|
|
||||||
prev_line = line
|
|
||||||
else:
|
|
||||||
if line.top - prev_line.bottom < min_space:
|
|
||||||
prev_line = self[prev_line.top:line.bottom]
|
|
||||||
else:
|
|
||||||
yield prev_line
|
|
||||||
prev_line = line
|
|
||||||
if prev_line is not None:
|
|
||||||
yield prev_line
|
|
||||||
|
|
||||||
return merge_lines(iter_lines())
|
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ from scipy import ndimage
|
||||||
from scipy.ndimage import filters
|
from scipy.ndimage import filters
|
||||||
|
|
||||||
from .utils import cached_property, collect_iterable, pairwise, neighbourhood
|
from .utils import cached_property, collect_iterable, pairwise, neighbourhood
|
||||||
from .image import Image, combine
|
from .image import Image, combine, is_nonblank
|
||||||
|
|
||||||
|
|
||||||
CONNECTIVITY8 = ndimage.generate_binary_structure(2, 2)
|
CONNECTIVITY8 = ndimage.generate_binary_structure(2, 2)
|
||||||
|
|
@ -111,8 +111,36 @@ class Page(PageObject):
|
||||||
@cached_property
|
@cached_property
|
||||||
@collect_iterable
|
@collect_iterable
|
||||||
def lines(self):
|
def lines(self):
|
||||||
for line_img in self.image._iter_lines(min_space=2):
|
return self._merge_lines(self._iter_lines())
|
||||||
yield Line(line_img)
|
|
||||||
|
def _iter_lines(self):
|
||||||
|
line_start = None
|
||||||
|
prev_line_end = 0
|
||||||
|
|
||||||
|
for i, row in enumerate(self.image.bitmap):
|
||||||
|
if is_nonblank(row):
|
||||||
|
if line_start is None:
|
||||||
|
line_start = i
|
||||||
|
else:
|
||||||
|
if line_start is not None:
|
||||||
|
yield Line(self.image[line_start:i,:])
|
||||||
|
line_start = None
|
||||||
|
prev_line_end = i
|
||||||
|
|
||||||
|
def _merge_lines(self, lines, min_space=2):
|
||||||
|
prev_line = None
|
||||||
|
for line in lines:
|
||||||
|
if prev_line is None:
|
||||||
|
prev_line = line
|
||||||
|
else:
|
||||||
|
if line.top - prev_line.bottom < min_space:
|
||||||
|
prev_line = Line(self.image[prev_line.top:line.bottom])
|
||||||
|
else:
|
||||||
|
yield prev_line
|
||||||
|
prev_line = line
|
||||||
|
if prev_line is not None:
|
||||||
|
yield prev_line
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Line(PageObject):
|
class Line(PageObject):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue