From 038e4f06e9066336dc555b7261fec5c54f2d022d Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Tue, 26 Aug 2014 17:29:40 +0200 Subject: [PATCH] Consider minimum line height when splitting lines. --- pixelocr/page.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pixelocr/page.py b/pixelocr/page.py index 6a3c2f4..91b593c 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -127,18 +127,20 @@ class Page(PageObject): line_start = None prev_line_end = i - def _merge_lines(self, lines, min_space=2): + def _merge_lines(self, lines, min_space=2, min_height=5): prev_line = None for line in lines: if prev_line is None: prev_line = line else: - distance = line.top - prev_line.bottom - if ( - # avoid unnecessary calling optical_distance() which may be expensive - distance < min_space + too_close = ( + # the first line is to avoid unnecessary calling optical_distance() + # which may be expensive + line.top - prev_line.bottom < min_space and prev_line.optical_distance(line) < min_space - ): + ) + not_high_enough = prev_line.height < min_height + if too_close or not_high_enough: prev_line = Line(self.image[prev_line.top:line.bottom]) else: yield prev_line