Add Line.words property.
This commit is contained in:
parent
2f06b66b89
commit
1847bbcd9e
1 changed files with 32 additions and 5 deletions
|
|
@ -191,7 +191,7 @@ class Line(PageObject):
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
@collect_iterable
|
@collect_iterable
|
||||||
def glyphs(self):
|
def words(self):
|
||||||
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
|
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
|
||||||
blob_slices = enumerate(ndimage.find_objects(labels, max_label), 1)
|
blob_slices = enumerate(ndimage.find_objects(labels, max_label), 1)
|
||||||
glyph_images = (
|
glyph_images = (
|
||||||
|
|
@ -204,7 +204,15 @@ class Line(PageObject):
|
||||||
)
|
)
|
||||||
glyphs = sorted(glyphs, key=lambda glyph: (glyph.left, -glyph.bottom))
|
glyphs = sorted(glyphs, key=lambda glyph: (glyph.left, -glyph.bottom))
|
||||||
glyphs = self._combine_diacritics(glyphs)
|
glyphs = self._combine_diacritics(glyphs)
|
||||||
return self._insert_spaces(glyphs)
|
return self._detect_words(glyphs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
@collect_iterable
|
||||||
|
def glyphs(self):
|
||||||
|
for word in self.words:
|
||||||
|
yield from word.glyphs
|
||||||
|
if word.space_after is not None:
|
||||||
|
yield word.space_after
|
||||||
|
|
||||||
def _combine_diacritics(self, glyphs):
|
def _combine_diacritics(self, glyphs):
|
||||||
def find_correspondence(glyphs):
|
def find_correspondence(glyphs):
|
||||||
|
|
@ -227,13 +235,18 @@ class Line(PageObject):
|
||||||
if glyph not in bodies: # freestanding diacritic-like glyph without a body
|
if glyph not in bodies: # freestanding diacritic-like glyph without a body
|
||||||
yield glyph
|
yield glyph
|
||||||
|
|
||||||
def _insert_spaces(self, glyphs, min_distance=15):
|
def _detect_words(self, glyphs, min_distance=15):
|
||||||
|
current_word_glyphs = []
|
||||||
for glyph, next_glyph in pairwise(glyphs):
|
for glyph, next_glyph in pairwise(glyphs):
|
||||||
yield glyph
|
current_word_glyphs.append(glyph)
|
||||||
if next_glyph is not None:
|
if next_glyph is not None:
|
||||||
distance = glyph.optical_distance(next_glyph)
|
distance = glyph.optical_distance(next_glyph)
|
||||||
if distance >= min_distance:
|
if distance >= min_distance:
|
||||||
yield Space(self, self.image.space(glyph.right, self.top, distance, self.height), self.baseline - self.top)
|
space_after = Space(self, self.image.space(glyph.right, self.top, distance, self.height), self.baseline - self.top)
|
||||||
|
yield Word(self, current_word_glyphs, space_after)
|
||||||
|
current_word_glyphs = []
|
||||||
|
if current_word_glyphs:
|
||||||
|
yield Word(self, current_word_glyphs)
|
||||||
|
|
||||||
def _extract_blob(self, blob_slice, label, labels):
|
def _extract_blob(self, blob_slice, label, labels):
|
||||||
image = self.image[blob_slice]
|
image = self.image[blob_slice]
|
||||||
|
|
@ -245,7 +258,21 @@ class Line(PageObject):
|
||||||
return distance + self._optical_correction(other, T=True)
|
return distance + self._optical_correction(other, T=True)
|
||||||
|
|
||||||
|
|
||||||
|
class Word(PageObject):
|
||||||
|
def __init__(self, line, glyphs, space_after=None):
|
||||||
|
self.glyphs = glyphs
|
||||||
|
for glyph in glyphs:
|
||||||
|
glyph.word = self
|
||||||
|
self.space_after = space_after
|
||||||
|
beginning = self.glyphs[0]
|
||||||
|
end = space_after if space_after is not None else self.glyphs[-1]
|
||||||
|
image = line.image[:, beginning.left:end.right]
|
||||||
|
super().__init__(line.document, image)
|
||||||
|
|
||||||
|
|
||||||
class Glyph(PageObject):
|
class Glyph(PageObject):
|
||||||
|
word = None
|
||||||
|
|
||||||
def __init__(self, line, image, elevation):
|
def __init__(self, line, image, elevation):
|
||||||
super().__init__(line.document, image)
|
super().__init__(line.document, image)
|
||||||
self.elevation = elevation
|
self.elevation = elevation
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue