From 9b8a52e4c04516520ebba59a3e6416eb2c883e2f Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Mon, 25 Aug 2014 16:02:26 +0200 Subject: [PATCH] Line._combine_diacritics(): do not lose freestanding diacritic-like symbols. --- pixelocr/page.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pixelocr/page.py b/pixelocr/page.py index 43c47d1..dd6df57 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -146,8 +146,6 @@ class Line(PageObject): bodies = defaultdict(list) diacritics = defaultdict(list) for i, glyph in enumerate(glyphs): - if glyph.is_body(): - continue neighbours = neighbourhood(glyphs, i, 5) possible_bodies = [body for body in neighbours if body.detect_diacritic(glyph)] if possible_bodies: @@ -204,6 +202,8 @@ class Glyph(PageObject): Higher score means higher probability. Zero means "absolutely not". """ + if not self.is_body(): + return 0 if glyph.elevation > 0 and glyph.elevation < self.DIACRITIC_MIN_ELEVATION: return 0 if glyph.top >= self.top and glyph.top < self.bottom: