Consider minimum line height when splitting lines.
This commit is contained in:
parent
85180a4ec1
commit
038e4f06e9
1 changed files with 8 additions and 6 deletions
|
|
@ -127,18 +127,20 @@ class Page(PageObject):
|
||||||
line_start = None
|
line_start = None
|
||||||
prev_line_end = i
|
prev_line_end = i
|
||||||
|
|
||||||
def _merge_lines(self, lines, min_space=2):
|
def _merge_lines(self, lines, min_space=2, min_height=5):
|
||||||
prev_line = None
|
prev_line = None
|
||||||
for line in lines:
|
for line in lines:
|
||||||
if prev_line is None:
|
if prev_line is None:
|
||||||
prev_line = line
|
prev_line = line
|
||||||
else:
|
else:
|
||||||
distance = line.top - prev_line.bottom
|
too_close = (
|
||||||
if (
|
# the first line is to avoid unnecessary calling optical_distance()
|
||||||
# avoid unnecessary calling optical_distance() which may be expensive
|
# which may be expensive
|
||||||
distance < min_space
|
line.top - prev_line.bottom < min_space
|
||||||
and prev_line.optical_distance(line) < min_space
|
and prev_line.optical_distance(line) < min_space
|
||||||
):
|
)
|
||||||
|
not_high_enough = prev_line.height < min_height
|
||||||
|
if too_close or not_high_enough:
|
||||||
prev_line = Line(self.image[prev_line.top:line.bottom])
|
prev_line = Line(self.image[prev_line.top:line.bottom])
|
||||||
else:
|
else:
|
||||||
yield prev_line
|
yield prev_line
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue