diff --git a/pixelocr/page.py b/pixelocr/page.py index fdf6b53..30059a1 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -124,11 +124,11 @@ class Page(PageObject): line_start = i else: if line_start is not None: - yield Line(self.image[line_start:i,:]) + yield Line(self, self.image[line_start:i,:]) line_start = None prev_line_end = i if line_start is not None: - yield Line(self.image[line_start:,:]) + yield Line(self, self.image[line_start:,:]) def _merge_lines(self, lines, min_space=2, min_height=10): prev_line = None @@ -144,7 +144,7 @@ class Page(PageObject): ) not_high_enough = prev_line.height < min_height if too_close or not_high_enough: - prev_line = Line(self.image[prev_line.top:line.bottom]) + prev_line = Line(self, self.image[prev_line.top:line.bottom]) else: yield prev_line prev_line = line @@ -153,6 +153,10 @@ class Page(PageObject): class Line(PageObject): + def __init__(self, page, image): + super().__init__(image) + self.page = page + def __iter__(self): return iter(self.glyphs) @@ -180,7 +184,7 @@ class Line(PageObject): for (label, blob_slice) in blob_slices ) glyphs = ( - Glyph(image, self.baseline - image.bottom) + Glyph(self, image, self.baseline - image.bottom) for image in glyph_images ) glyphs = sorted(glyphs, key=lambda glyph: (glyph.left, -glyph.bottom)) @@ -214,7 +218,7 @@ class Line(PageObject): if next_glyph is not None: distance = glyph.optical_distance(next_glyph) if distance >= min_distance: - yield Space(self.image.space(glyph.right, self.top, distance, self.height), self.baseline - self.top) + yield Space(self, self.image.space(glyph.right, self.top, distance, self.height), self.baseline - self.top) def _extract_blob(self, blob_slice, label, labels): image = self.image[blob_slice] @@ -229,9 +233,10 @@ class Line(PageObject): class Glyph(PageObject): MIN_BODY_HEIGHT = 10 - def __init__(self, image, elevation): + def __init__(self, line, image, elevation): super().__init__(image) self.elevation = elevation + self.line = line @property def key(self): @@ -287,7 +292,7 @@ class Glyph(PageObject): if not diacritics: return self diacritic_images = (diacritic.image for diacritic in diacritics) - return Glyph(combine(self.image, *diacritic_images), self.elevation) + return Glyph(self.line, combine(self.image, *diacritic_images), self.elevation) class Space(Glyph):