Rewrite diacritic detection code, apostrophes are now detected correctly.

This commit is contained in:
Andrey Golovizin 2014-08-25 16:23:17 +02:00
parent c5d04ff6aa
commit 3ed91dff53

View file

@ -183,9 +183,6 @@ class Line(PageObject):
class Glyph(PageObject):
DIACRITIC_WINDOW_LEFT = 3
DIACRITIC_WINDOW_RIGHT = 3
DIACRITIC_MIN_ELEVATION = 5
MIN_BODY_HEIGHT = 10
def __init__(self, image, elevation):
@ -202,23 +199,41 @@ class Glyph(PageObject):
return self.height >= self.MIN_BODY_HEIGHT
def detect_diacritic(self, glyph):
"""Check if the given glyph can be our diacritic and return a numeric score.
Higher score means higher probability. Zero means "absolutely not".
"""
"""Check if the given glyph can be our diacritic and return a numeric score."""
if not self.is_body():
return 0
if glyph.elevation > 0 and glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
return 0
if glyph.top >= self.top and glyph.top < self.bottom:
return 0
if (
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
or glyph.right > self.right + self.DIACRITIC_WINDOW_RIGHT
return False
#TODO remove hardcoded sizes
# diacritic above the letter
if glyph.fits(
self.left - 3,
self.top - 10,
self.right + 3,
self.top + 3,
):
return 0
return 100 - abs(self.xcenter - glyph.xcenter)
return True
# apostrophe, like in ť
if glyph.fits(
self.right - 5,
self.top - 5,
self.right + 5,
self.top + 5,
) and glyph.height > 3:
return True
# dot in ? and !
if glyph.fits(
self.left - 3,
self.bottom + 1,
self.right + 3,
self.bottom + 10,
):
return True
return False
def add_diacritics(self, *diacritics):
if not diacritics: