Attach diacritic to the closest body if multiple possibilities found.

This commit is contained in:
Andrey Golovizin 2014-08-21 21:40:37 +02:00
parent 5babd24450
commit 9656abe9b2

View file

@ -71,6 +71,14 @@ class PageObject(object):
def bottom(self):
return self.image.bottom
@property
def xcenter(self):
return (self.right - self.left) / 2
@property
def ycenter(self):
return (self.bottom - self.top) / 2
class Page(PageObject):
def __iter__(self):
@ -131,21 +139,20 @@ class Line(PageObject):
return margins.min()
def _combine_diacritics(self, glyphs):
def find_diacritics(glyphs):
def find_correspondence(glyphs):
bodies = defaultdict(list)
diacritics = defaultdict(list)
for i, glyph in enumerate(glyphs):
if not glyph.is_body():
if glyph.is_body():
continue
neighbours = glyphs[i - 5: i] + glyphs[i + 1: i + 6]
for neighbour in neighbours:
if glyph.detect_diacritic(neighbour):
diacritics[glyph].append(neighbour)
bodies[neighbour].append(glyph)
body = max(neighbours, key=lambda neighbour: neighbour.detect_diacritic(glyph))
if body.detect_diacritic(glyph):
diacritics[body].append(glyph)
bodies[glyph].append(body)
return bodies, diacritics
bodies, diacritics = find_diacritics(glyphs)
bodies, diacritics = find_correspondence(glyphs)
for glyph in glyphs:
if glyph.is_body():
yield glyph.add_diacritics(*diacritics[glyph])
@ -187,12 +194,19 @@ class Glyph(PageObject):
return self.elevation <= 0
def detect_diacritic(self, glyph):
"""Return True if the given glyph can be our diacritic."""
return (
glyph.elevation >= self.DIACRITIC_MIN_ELEVATION
and glyph.left >= self.left - self.DIACRITIC_WINDOW_LEFT
and glyph.right <= self.right + self.DIACRITIC_WINDOW_RIGHT
)
"""Check if the given glyph can be our diacritic and return a numeric score.
Higher score means higher probability. Zero means "absolutely not".
"""
if glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
return 0
if (
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
or glyph.right > self.right + self.DIACRITIC_WINDOW_RIGHT
):
return 0
return 100 - abs(self.xcenter - glyph.xcenter)
def add_diacritics(self, *diacritics):
if not diacritics: