Add Page.lines, Line.words, Line.letters and Word.letters properties.
This commit is contained in:
parent
e8d7d1f4d1
commit
b756ea484b
1 changed files with 21 additions and 0 deletions
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
from scipy import ndimage
|
||||
|
||||
from .utils import cached_property, collect_iterable
|
||||
from .image import Image
|
||||
|
||||
|
||||
|
|
@ -60,18 +61,38 @@ class PageObject(object):
|
|||
|
||||
class Page(PageObject):
|
||||
def __iter__(self):
|
||||
return iter(self.lines)
|
||||
|
||||
@cached_property
|
||||
@collect_iterable
|
||||
def lines(self):
|
||||
for line_img in self.image._iter_lines(min_space=5):
|
||||
yield Line(line_img)
|
||||
|
||||
|
||||
class Line(PageObject):
|
||||
def __iter__(self):
|
||||
return iter(self.words)
|
||||
|
||||
@cached_property
|
||||
@collect_iterable
|
||||
def words(self):
|
||||
for rotated_word_img in self.image.T._iter_lines(min_space=10):
|
||||
yield Word(rotated_word_img.T)
|
||||
|
||||
@property
|
||||
def letters(self):
|
||||
for word in self.words:
|
||||
yield from word.letters
|
||||
|
||||
|
||||
class Word(PageObject):
|
||||
def __iter__(self):
|
||||
return iter(self.letters)
|
||||
|
||||
@cached_property
|
||||
@collect_iterable
|
||||
def letters(self):
|
||||
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
|
||||
obj_indices = ndimage.find_objects(labels, max_label)
|
||||
letter_images = (self.image[obj_index] for obj_index in obj_indices)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue