Consider minimum line height when splitting lines.

This commit is contained in:
Andrey Golovizin 2014-08-26 17:29:40 +02:00
parent 85180a4ec1
commit 038e4f06e9

View file

@ -127,18 +127,20 @@ class Page(PageObject):
line_start = None
prev_line_end = i
def _merge_lines(self, lines, min_space=2):
def _merge_lines(self, lines, min_space=2, min_height=5):
prev_line = None
for line in lines:
if prev_line is None:
prev_line = line
else:
distance = line.top - prev_line.bottom
if (
# avoid unnecessary calling optical_distance() which may be expensive
distance < min_space
too_close = (
# the first line is to avoid unnecessary calling optical_distance()
# which may be expensive
line.top - prev_line.bottom < min_space
and prev_line.optical_distance(line) < min_space
):
)
not_high_enough = prev_line.height < min_height
if too_close or not_high_enough:
prev_line = Line(self.image[prev_line.top:line.bottom])
else:
yield prev_line