Add Page.lines, Line.words, Line.letters and Word.letters properties.
This commit is contained in:
parent
e8d7d1f4d1
commit
b756ea484b
1 changed files with 21 additions and 0 deletions
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
from scipy import ndimage
|
from scipy import ndimage
|
||||||
|
|
||||||
|
from .utils import cached_property, collect_iterable
|
||||||
from .image import Image
|
from .image import Image
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -60,18 +61,38 @@ class PageObject(object):
|
||||||
|
|
||||||
class Page(PageObject):
|
class Page(PageObject):
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
return iter(self.lines)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
@collect_iterable
|
||||||
|
def lines(self):
|
||||||
for line_img in self.image._iter_lines(min_space=5):
|
for line_img in self.image._iter_lines(min_space=5):
|
||||||
yield Line(line_img)
|
yield Line(line_img)
|
||||||
|
|
||||||
|
|
||||||
class Line(PageObject):
|
class Line(PageObject):
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
return iter(self.words)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
@collect_iterable
|
||||||
|
def words(self):
|
||||||
for rotated_word_img in self.image.T._iter_lines(min_space=10):
|
for rotated_word_img in self.image.T._iter_lines(min_space=10):
|
||||||
yield Word(rotated_word_img.T)
|
yield Word(rotated_word_img.T)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def letters(self):
|
||||||
|
for word in self.words:
|
||||||
|
yield from word.letters
|
||||||
|
|
||||||
|
|
||||||
class Word(PageObject):
|
class Word(PageObject):
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
|
return iter(self.letters)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
@collect_iterable
|
||||||
|
def letters(self):
|
||||||
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
|
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
|
||||||
obj_indices = ndimage.find_objects(labels, max_label)
|
obj_indices = ndimage.find_objects(labels, max_label)
|
||||||
letter_images = (self.image[obj_index] for obj_index in obj_indices)
|
letter_images = (self.image[obj_index] for obj_index in obj_indices)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue