From 0e44ad8f6a3645a0c902fd59214d47590e311c75 Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Fri, 12 Sep 2014 13:00:28 +0200 Subject: [PATCH] Replace hardcoded values in the diacritic detection code with config variables. --- pixelocr/config.py | 17 +++++++++++++++++ pixelocr/page.py | 38 +++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/pixelocr/config.py b/pixelocr/config.py index 5b700cc..5d54d31 100644 --- a/pixelocr/config.py +++ b/pixelocr/config.py @@ -20,6 +20,23 @@ from confire import Configuration as BaseConfiguration class Configuration(BaseConfiguration): min_body_height = 10 + diacritic_box_left = -3 + diacritic_box_right = +3 + diacritic_box_top = -10 + diacritic_box_bottom = +10 + + apostrophe_box_left = -5 + apostrophe_box_right = +7 + apostrophe_box_top = -5 + apostrophe_box_bottom = +10 + apostrophe_min_height = +3 + apostrophe_max_distance = +4 + + dot_box_left = -3 + dot_box_right = +3 + dot_box_top = +1 + dot_box_bottom = +10 + @classmethod def load_file(cls, filename): class MyConfiguration(cls): diff --git a/pixelocr/page.py b/pixelocr/page.py index 88c75e8..88317be 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -246,8 +246,6 @@ class Line(PageObject): class Glyph(PageObject): - MIN_BODY_HEIGHT = 10 - def __init__(self, line, image, elevation): super().__init__(line.document, image) self.elevation = elevation @@ -255,7 +253,7 @@ class Glyph(PageObject): def is_body(self): """Return True if the glyph is definitely not diacritic.""" - return self.height >= self.MIN_BODY_HEIGHT + return self.height >= self.config.min_body_height def optical_distance(self, other): distance = other.left - self.right @@ -267,32 +265,34 @@ class Glyph(PageObject): if not self.is_body(): return False - #TODO remove hardcoded sizes - # diacritic above the letter if glyph.fits( - self.left - 3, - self.top - 10, - self.right + 3, - self.top + 10, + self.left + self.config.diacritic_box_left, + self.top + self.config.diacritic_box_top, + self.right + self.config.diacritic_box_right, + self.top + self.config.diacritic_box_bottom, ): return True # apostrophe, like in ť - if glyph.fits( - self.right - 5, - self.top - 5, - self.right + 7, - self.top + 10, - ) and glyph.height > 3 and self.optical_distance(glyph) < 4: + if ( + glyph.fits( + self.right + self.config.apostrophe_box_left, + self.top + self.config.apostrophe_box_top, + self.right + self.config.apostrophe_box_right, + self.top + self.config.apostrophe_box_bottom, + ) + and glyph.height > self.config.apostrophe_min_height + and self.optical_distance(glyph) < self.config.apostrophe_max_distance + ): return True # dot in ? and ! if glyph.fits( - self.left - 3, - self.bottom + 1, - self.right + 3, - self.bottom + 10, + self.left + self.config.dot_box_left, + self.bottom + self.config.dot_box_top, + self.right + self.config.dot_box_right, + self.bottom + self.config.dot_box_bottom, ): return True