Add Page.lines, Line.words, Line.letters and Word.letters properties.

This commit is contained in:
Andrey Golovizin 2014-08-13 15:20:35 +02:00
parent e8d7d1f4d1
commit b756ea484b

View file

@ -16,6 +16,7 @@
from scipy import ndimage
from .utils import cached_property, collect_iterable
from .image import Image
@ -60,18 +61,38 @@ class PageObject(object):
class Page(PageObject):
def __iter__(self):
return iter(self.lines)
@cached_property
@collect_iterable
def lines(self):
for line_img in self.image._iter_lines(min_space=5):
yield Line(line_img)
class Line(PageObject):
def __iter__(self):
return iter(self.words)
@cached_property
@collect_iterable
def words(self):
for rotated_word_img in self.image.T._iter_lines(min_space=10):
yield Word(rotated_word_img.T)
@property
def letters(self):
for word in self.words:
yield from word.letters
class Word(PageObject):
def __iter__(self):
return iter(self.letters)
@cached_property
@collect_iterable
def letters(self):
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
obj_indices = ndimage.find_objects(labels, max_label)
letter_images = (self.image[obj_index] for obj_index in obj_indices)