Tweak diacritic detection to include ? and ! characters.

This commit is contained in:
Andrey Golovizin 2014-08-25 15:30:28 +02:00
parent 4f621493e5
commit dd847b4e98

View file

@ -183,6 +183,7 @@ class Glyph(PageObject):
DIACRITIC_WINDOW_LEFT = 3
DIACRITIC_WINDOW_RIGHT = 3
DIACRITIC_MIN_ELEVATION = 5
MIN_BODY_HEIGHT = 10
def __init__(self, image, elevation):
super().__init__(image)
@ -195,7 +196,7 @@ class Glyph(PageObject):
def is_body(self):
"""Return True if the glyph is definitely not diacritic."""
return self.elevation <= 0
return self.height >= self.MIN_BODY_HEIGHT
def detect_diacritic(self, glyph):
"""Check if the given glyph can be our diacritic and return a numeric score.
@ -203,9 +204,9 @@ class Glyph(PageObject):
Higher score means higher probability. Zero means "absolutely not".
"""
if glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
if glyph.elevation > 0 and glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
return 0
if glyph.top >= self.top:
if glyph.top >= self.top and glyph.top < self.bottom:
return 0
if (
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT