Consider minimum line height when splitting lines.

This commit is contained in:
Andrey Golovizin 2014-08-26 17:29:40 +02:00
parent 85180a4ec1
commit 038e4f06e9

View file

@ -127,18 +127,20 @@ class Page(PageObject):
line_start = None line_start = None
prev_line_end = i prev_line_end = i
def _merge_lines(self, lines, min_space=2): def _merge_lines(self, lines, min_space=2, min_height=5):
prev_line = None prev_line = None
for line in lines: for line in lines:
if prev_line is None: if prev_line is None:
prev_line = line prev_line = line
else: else:
distance = line.top - prev_line.bottom too_close = (
if ( # the first line is to avoid unnecessary calling optical_distance()
# avoid unnecessary calling optical_distance() which may be expensive # which may be expensive
distance < min_space line.top - prev_line.bottom < min_space
and prev_line.optical_distance(line) < min_space and prev_line.optical_distance(line) < min_space
): )
not_high_enough = prev_line.height < min_height
if too_close or not_high_enough:
prev_line = Line(self.image[prev_line.top:line.bottom]) prev_line = Line(self.image[prev_line.top:line.bottom])
else: else:
yield prev_line yield prev_line