From dd847b4e98f7a64ec2b7a26d7df849fcc1e03721 Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Mon, 25 Aug 2014 15:30:28 +0200 Subject: [PATCH] Tweak diacritic detection to include ? and ! characters. --- pixelocr/page.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pixelocr/page.py b/pixelocr/page.py index fbf2cb5..6ad9482 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -183,6 +183,7 @@ class Glyph(PageObject): DIACRITIC_WINDOW_LEFT = 3 DIACRITIC_WINDOW_RIGHT = 3 DIACRITIC_MIN_ELEVATION = 5 + MIN_BODY_HEIGHT = 10 def __init__(self, image, elevation): super().__init__(image) @@ -195,7 +196,7 @@ class Glyph(PageObject): def is_body(self): """Return True if the glyph is definitely not diacritic.""" - return self.elevation <= 0 + return self.height >= self.MIN_BODY_HEIGHT def detect_diacritic(self, glyph): """Check if the given glyph can be our diacritic and return a numeric score. @@ -203,9 +204,9 @@ class Glyph(PageObject): Higher score means higher probability. Zero means "absolutely not". """ - if glyph.elevation < self.DIACRITIC_MIN_ELEVATION: + if glyph.elevation > 0 and glyph.elevation < self.DIACRITIC_MIN_ELEVATION: return 0 - if glyph.top >= self.top: + if glyph.top >= self.top and glyph.top < self.bottom: return 0 if ( glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT