Attach diacritic to the closest body if multiple possibilities found.
This commit is contained in:
parent
5babd24450
commit
9656abe9b2
1 changed files with 28 additions and 14 deletions
|
|
@ -71,6 +71,14 @@ class PageObject(object):
|
||||||
def bottom(self):
|
def bottom(self):
|
||||||
return self.image.bottom
|
return self.image.bottom
|
||||||
|
|
||||||
|
@property
|
||||||
|
def xcenter(self):
|
||||||
|
return (self.right - self.left) / 2
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ycenter(self):
|
||||||
|
return (self.bottom - self.top) / 2
|
||||||
|
|
||||||
|
|
||||||
class Page(PageObject):
|
class Page(PageObject):
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
|
@ -131,21 +139,20 @@ class Line(PageObject):
|
||||||
return margins.min()
|
return margins.min()
|
||||||
|
|
||||||
def _combine_diacritics(self, glyphs):
|
def _combine_diacritics(self, glyphs):
|
||||||
|
def find_correspondence(glyphs):
|
||||||
def find_diacritics(glyphs):
|
|
||||||
bodies = defaultdict(list)
|
bodies = defaultdict(list)
|
||||||
diacritics = defaultdict(list)
|
diacritics = defaultdict(list)
|
||||||
for i, glyph in enumerate(glyphs):
|
for i, glyph in enumerate(glyphs):
|
||||||
if not glyph.is_body():
|
if glyph.is_body():
|
||||||
continue
|
continue
|
||||||
neighbours = glyphs[i - 5: i] + glyphs[i + 1: i + 6]
|
neighbours = glyphs[i - 5: i] + glyphs[i + 1: i + 6]
|
||||||
for neighbour in neighbours:
|
body = max(neighbours, key=lambda neighbour: neighbour.detect_diacritic(glyph))
|
||||||
if glyph.detect_diacritic(neighbour):
|
if body.detect_diacritic(glyph):
|
||||||
diacritics[glyph].append(neighbour)
|
diacritics[body].append(glyph)
|
||||||
bodies[neighbour].append(glyph)
|
bodies[glyph].append(body)
|
||||||
return bodies, diacritics
|
return bodies, diacritics
|
||||||
|
|
||||||
bodies, diacritics = find_diacritics(glyphs)
|
bodies, diacritics = find_correspondence(glyphs)
|
||||||
for glyph in glyphs:
|
for glyph in glyphs:
|
||||||
if glyph.is_body():
|
if glyph.is_body():
|
||||||
yield glyph.add_diacritics(*diacritics[glyph])
|
yield glyph.add_diacritics(*diacritics[glyph])
|
||||||
|
|
@ -187,12 +194,19 @@ class Glyph(PageObject):
|
||||||
return self.elevation <= 0
|
return self.elevation <= 0
|
||||||
|
|
||||||
def detect_diacritic(self, glyph):
|
def detect_diacritic(self, glyph):
|
||||||
"""Return True if the given glyph can be our diacritic."""
|
"""Check if the given glyph can be our diacritic and return a numeric score.
|
||||||
return (
|
|
||||||
glyph.elevation >= self.DIACRITIC_MIN_ELEVATION
|
Higher score means higher probability. Zero means "absolutely not".
|
||||||
and glyph.left >= self.left - self.DIACRITIC_WINDOW_LEFT
|
"""
|
||||||
and glyph.right <= self.right + self.DIACRITIC_WINDOW_RIGHT
|
|
||||||
)
|
if glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
|
||||||
|
return 0
|
||||||
|
if (
|
||||||
|
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
|
||||||
|
or glyph.right > self.right + self.DIACRITIC_WINDOW_RIGHT
|
||||||
|
):
|
||||||
|
return 0
|
||||||
|
return 100 - abs(self.xcenter - glyph.xcenter)
|
||||||
|
|
||||||
def add_diacritics(self, *diacritics):
|
def add_diacritics(self, *diacritics):
|
||||||
if not diacritics:
|
if not diacritics:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue