diff --git a/pixelocr/config.py b/pixelocr/config.py index ebb68bb..0a61b74 100644 --- a/pixelocr/config.py +++ b/pixelocr/config.py @@ -21,6 +21,7 @@ class Configuration(BaseConfiguration): allow_bold_italic = True min_body_height = 10 + min_word_distance = 15 diacritic_box_left = -3 diacritic_box_right = +3 diff --git a/pixelocr/page.py b/pixelocr/page.py index 5fafc7e..5053a60 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -235,7 +235,8 @@ class Line(PageObject): if glyph not in bodies: # freestanding diacritic-like glyph without a body yield glyph - def _detect_words(self, glyphs, min_distance=15): + def _detect_words(self, glyphs): + min_distance = self.document.config.min_word_distance current_word_glyphs = [] for glyph, next_glyph in pairwise(glyphs): current_word_glyphs.append(glyph)