From 3f0837d7dde80946a6ed09a16cf48d972b8dfc1d Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Tue, 16 Sep 2014 11:47:39 +0200 Subject: [PATCH] Add min_word_distance config variable. --- pixelocr/config.py | 1 + pixelocr/page.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pixelocr/config.py b/pixelocr/config.py index ebb68bb..0a61b74 100644 --- a/pixelocr/config.py +++ b/pixelocr/config.py @@ -21,6 +21,7 @@ class Configuration(BaseConfiguration): allow_bold_italic = True min_body_height = 10 + min_word_distance = 15 diacritic_box_left = -3 diacritic_box_right = +3 diff --git a/pixelocr/page.py b/pixelocr/page.py index 5fafc7e..5053a60 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -235,7 +235,8 @@ class Line(PageObject): if glyph not in bodies: # freestanding diacritic-like glyph without a body yield glyph - def _detect_words(self, glyphs, min_distance=15): + def _detect_words(self, glyphs): + min_distance = self.document.config.min_word_distance current_word_glyphs = [] for glyph, next_glyph in pairwise(glyphs): current_word_glyphs.append(glyph)