Attach diacritic to the closest body if multiple possibilities found.
This commit is contained in:
parent
5babd24450
commit
9656abe9b2
1 changed files with 28 additions and 14 deletions
|
|
@ -71,6 +71,14 @@ class PageObject(object):
|
|||
def bottom(self):
|
||||
return self.image.bottom
|
||||
|
||||
@property
|
||||
def xcenter(self):
|
||||
return (self.right - self.left) / 2
|
||||
|
||||
@property
|
||||
def ycenter(self):
|
||||
return (self.bottom - self.top) / 2
|
||||
|
||||
|
||||
class Page(PageObject):
|
||||
def __iter__(self):
|
||||
|
|
@ -131,21 +139,20 @@ class Line(PageObject):
|
|||
return margins.min()
|
||||
|
||||
def _combine_diacritics(self, glyphs):
|
||||
|
||||
def find_diacritics(glyphs):
|
||||
def find_correspondence(glyphs):
|
||||
bodies = defaultdict(list)
|
||||
diacritics = defaultdict(list)
|
||||
for i, glyph in enumerate(glyphs):
|
||||
if not glyph.is_body():
|
||||
if glyph.is_body():
|
||||
continue
|
||||
neighbours = glyphs[i - 5: i] + glyphs[i + 1: i + 6]
|
||||
for neighbour in neighbours:
|
||||
if glyph.detect_diacritic(neighbour):
|
||||
diacritics[glyph].append(neighbour)
|
||||
bodies[neighbour].append(glyph)
|
||||
body = max(neighbours, key=lambda neighbour: neighbour.detect_diacritic(glyph))
|
||||
if body.detect_diacritic(glyph):
|
||||
diacritics[body].append(glyph)
|
||||
bodies[glyph].append(body)
|
||||
return bodies, diacritics
|
||||
|
||||
bodies, diacritics = find_diacritics(glyphs)
|
||||
bodies, diacritics = find_correspondence(glyphs)
|
||||
for glyph in glyphs:
|
||||
if glyph.is_body():
|
||||
yield glyph.add_diacritics(*diacritics[glyph])
|
||||
|
|
@ -187,12 +194,19 @@ class Glyph(PageObject):
|
|||
return self.elevation <= 0
|
||||
|
||||
def detect_diacritic(self, glyph):
|
||||
"""Return True if the given glyph can be our diacritic."""
|
||||
return (
|
||||
glyph.elevation >= self.DIACRITIC_MIN_ELEVATION
|
||||
and glyph.left >= self.left - self.DIACRITIC_WINDOW_LEFT
|
||||
and glyph.right <= self.right + self.DIACRITIC_WINDOW_RIGHT
|
||||
)
|
||||
"""Check if the given glyph can be our diacritic and return a numeric score.
|
||||
|
||||
Higher score means higher probability. Zero means "absolutely not".
|
||||
"""
|
||||
|
||||
if glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
|
||||
return 0
|
||||
if (
|
||||
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
|
||||
or glyph.right > self.right + self.DIACRITIC_WINDOW_RIGHT
|
||||
):
|
||||
return 0
|
||||
return 100 - abs(self.xcenter - glyph.xcenter)
|
||||
|
||||
def add_diacritics(self, *diacritics):
|
||||
if not diacritics:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue