Tweak diacritic detection to include ? and ! characters.
This commit is contained in:
parent
4f621493e5
commit
dd847b4e98
1 changed files with 4 additions and 3 deletions
|
|
@ -183,6 +183,7 @@ class Glyph(PageObject):
|
|||
DIACRITIC_WINDOW_LEFT = 3
|
||||
DIACRITIC_WINDOW_RIGHT = 3
|
||||
DIACRITIC_MIN_ELEVATION = 5
|
||||
MIN_BODY_HEIGHT = 10
|
||||
|
||||
def __init__(self, image, elevation):
|
||||
super().__init__(image)
|
||||
|
|
@ -195,7 +196,7 @@ class Glyph(PageObject):
|
|||
|
||||
def is_body(self):
|
||||
"""Return True if the glyph is definitely not diacritic."""
|
||||
return self.elevation <= 0
|
||||
return self.height >= self.MIN_BODY_HEIGHT
|
||||
|
||||
def detect_diacritic(self, glyph):
|
||||
"""Check if the given glyph can be our diacritic and return a numeric score.
|
||||
|
|
@ -203,9 +204,9 @@ class Glyph(PageObject):
|
|||
Higher score means higher probability. Zero means "absolutely not".
|
||||
"""
|
||||
|
||||
if glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
|
||||
if glyph.elevation > 0 and glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
|
||||
return 0
|
||||
if glyph.top >= self.top:
|
||||
if glyph.top >= self.top and glyph.top < self.bottom:
|
||||
return 0
|
||||
if (
|
||||
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue