pixelocr/pixelocr/page.py
2014-08-13 15:30:04 +02:00

117 lines
3 KiB
Python

# Copyright (C) 2014 Andrey Golovizin
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import numpy as np
from scipy import ndimage
from scipy.ndimage import filters
from .utils import cached_property, collect_iterable
from .image import Image
CONNECTIVITY8 = ndimage.generate_binary_structure(2, 2)
class PageObject(object):
def __init__(self, image):
self.image = image
def _repr_png_(self):
return self.image._repr_png_()
@property
def shape(self):
return self.image.shape
@property
def height(self):
return self.image.height
@property
def width(self):
return self.image.width
@property
def x1(self):
return self.image.x1
@property
def x2(self):
return self.image.x2
@property
def y1(self):
return self.image.y1
@property
def y2(self):
return self.image.y2
class Page(PageObject):
def __iter__(self):
return iter(self.lines)
@cached_property
@collect_iterable
def lines(self):
for line_img in self.image._iter_lines(min_space=5):
yield Line(line_img)
class Line(PageObject):
def __iter__(self):
return iter(self.words)
@cached_property
@collect_iterable
def words(self):
for rotated_word_img in self.image.T._iter_lines(min_space=10):
yield Word(rotated_word_img.T)
@property
def letters(self):
for word in self.words:
yield from word.letters
@cached_property
def baseline(self):
"""Detect baseline height, relative to the top."""
bitmap = self.image.bitmap.astype(np.float)
histogram = bitmap.sum(axis=1)
gradient = filters.convolve1d(histogram, [1, -1], axis=0)
# top = gradient.argmax()
bottom = gradient.argmin()
return bottom
class Word(PageObject):
def __iter__(self):
return iter(self.letters)
@cached_property
@collect_iterable
def letters(self):
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
obj_indices = ndimage.find_objects(labels, max_label)
letter_images = (self.image[obj_index] for obj_index in obj_indices)
letters = (Letter(image) for image in letter_images)
sorted_letters = sorted(letters, key=lambda letter: (letter.x1, -letter.y1))
return iter(sorted_letters)
class Letter(PageObject):
pass