Tweak diacritic detection to include ? and ! characters.
This commit is contained in:
parent
4f621493e5
commit
dd847b4e98
1 changed files with 4 additions and 3 deletions
|
|
@ -183,6 +183,7 @@ class Glyph(PageObject):
|
||||||
DIACRITIC_WINDOW_LEFT = 3
|
DIACRITIC_WINDOW_LEFT = 3
|
||||||
DIACRITIC_WINDOW_RIGHT = 3
|
DIACRITIC_WINDOW_RIGHT = 3
|
||||||
DIACRITIC_MIN_ELEVATION = 5
|
DIACRITIC_MIN_ELEVATION = 5
|
||||||
|
MIN_BODY_HEIGHT = 10
|
||||||
|
|
||||||
def __init__(self, image, elevation):
|
def __init__(self, image, elevation):
|
||||||
super().__init__(image)
|
super().__init__(image)
|
||||||
|
|
@ -195,7 +196,7 @@ class Glyph(PageObject):
|
||||||
|
|
||||||
def is_body(self):
|
def is_body(self):
|
||||||
"""Return True if the glyph is definitely not diacritic."""
|
"""Return True if the glyph is definitely not diacritic."""
|
||||||
return self.elevation <= 0
|
return self.height >= self.MIN_BODY_HEIGHT
|
||||||
|
|
||||||
def detect_diacritic(self, glyph):
|
def detect_diacritic(self, glyph):
|
||||||
"""Check if the given glyph can be our diacritic and return a numeric score.
|
"""Check if the given glyph can be our diacritic and return a numeric score.
|
||||||
|
|
@ -203,9 +204,9 @@ class Glyph(PageObject):
|
||||||
Higher score means higher probability. Zero means "absolutely not".
|
Higher score means higher probability. Zero means "absolutely not".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
|
if glyph.elevation > 0 and glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
|
||||||
return 0
|
return 0
|
||||||
if glyph.top >= self.top:
|
if glyph.top >= self.top and glyph.top < self.bottom:
|
||||||
return 0
|
return 0
|
||||||
if (
|
if (
|
||||||
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
|
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue