Rewrite diacritic detection code, apostrophes are now detected correctly.
This commit is contained in:
parent
c5d04ff6aa
commit
3ed91dff53
1 changed files with 32 additions and 17 deletions
|
|
@ -183,9 +183,6 @@ class Line(PageObject):
|
||||||
|
|
||||||
|
|
||||||
class Glyph(PageObject):
|
class Glyph(PageObject):
|
||||||
DIACRITIC_WINDOW_LEFT = 3
|
|
||||||
DIACRITIC_WINDOW_RIGHT = 3
|
|
||||||
DIACRITIC_MIN_ELEVATION = 5
|
|
||||||
MIN_BODY_HEIGHT = 10
|
MIN_BODY_HEIGHT = 10
|
||||||
|
|
||||||
def __init__(self, image, elevation):
|
def __init__(self, image, elevation):
|
||||||
|
|
@ -202,23 +199,41 @@ class Glyph(PageObject):
|
||||||
return self.height >= self.MIN_BODY_HEIGHT
|
return self.height >= self.MIN_BODY_HEIGHT
|
||||||
|
|
||||||
def detect_diacritic(self, glyph):
|
def detect_diacritic(self, glyph):
|
||||||
"""Check if the given glyph can be our diacritic and return a numeric score.
|
"""Check if the given glyph can be our diacritic and return a numeric score."""
|
||||||
|
|
||||||
Higher score means higher probability. Zero means "absolutely not".
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not self.is_body():
|
if not self.is_body():
|
||||||
return 0
|
return False
|
||||||
if glyph.elevation > 0 and glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
|
|
||||||
return 0
|
#TODO remove hardcoded sizes
|
||||||
if glyph.top >= self.top and glyph.top < self.bottom:
|
|
||||||
return 0
|
# diacritic above the letter
|
||||||
if (
|
if glyph.fits(
|
||||||
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
|
self.left - 3,
|
||||||
or glyph.right > self.right + self.DIACRITIC_WINDOW_RIGHT
|
self.top - 10,
|
||||||
|
self.right + 3,
|
||||||
|
self.top + 3,
|
||||||
):
|
):
|
||||||
return 0
|
return True
|
||||||
return 100 - abs(self.xcenter - glyph.xcenter)
|
|
||||||
|
# apostrophe, like in ť
|
||||||
|
if glyph.fits(
|
||||||
|
self.right - 5,
|
||||||
|
self.top - 5,
|
||||||
|
self.right + 5,
|
||||||
|
self.top + 5,
|
||||||
|
) and glyph.height > 3:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# dot in ? and !
|
||||||
|
if glyph.fits(
|
||||||
|
self.left - 3,
|
||||||
|
self.bottom + 1,
|
||||||
|
self.right + 3,
|
||||||
|
self.bottom + 10,
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
def add_diacritics(self, *diacritics):
|
def add_diacritics(self, *diacritics):
|
||||||
if not diacritics:
|
if not diacritics:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue