pixelocr/pixelocr/page.py
2014-08-21 23:11:31 +02:00

219 lines
6.7 KiB
Python

# Copyright (C) 2014 Andrey Golovizin
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import itertools
from collections import defaultdict
import numpy as np
from scipy import ndimage
from scipy.ndimage import filters
from .utils import cached_property, collect_iterable, pairwise
from .image import Image, combine
CONNECTIVITY8 = ndimage.generate_binary_structure(2, 2)
class PageObject(object):
def __init__(self, image):
self.image = image
def _repr_png_(self):
return self.image._repr_png_()
@property
def shape(self):
return self.image.shape
@property
def height(self):
return self.image.height
@property
def width(self):
return self.image.width
@property
def x(self):
return self.image.x
@property
def y(self):
return self.image.y
@property
def left(self):
return self.image.left
@property
def right(self):
return self.image.right
@property
def top(self):
return self.image.top
@property
def bottom(self):
return self.image.bottom
@property
def xcenter(self):
return (self.right - self.left) / 2
@property
def ycenter(self):
return (self.bottom - self.top) / 2
class Page(PageObject):
def __iter__(self):
return iter(self.lines)
@cached_property
@collect_iterable
def lines(self):
for line_img in self.image._iter_lines(min_space=5):
yield Line(line_img)
class Line(PageObject):
def __iter__(self):
return iter(self.glyphs)
@cached_property
def baseline(self):
"""Detect baseline height, relative to the top."""
bitmap = self.image.bitmap.astype(np.float)
histogram = bitmap.sum(axis=1)
gradient = filters.correlate1d(histogram, [-1, 1], axis=0)
# top = gradient.argmax()
bottom = gradient.argmin()
return self.y + bottom
@property
@collect_iterable
def glyphs(self):
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
blob_slices = enumerate(ndimage.find_objects(labels, max_label), 1)
glyph_images = (
self._extract_blob(blob_slice, label, labels)
for (label, blob_slice) in blob_slices
)
glyphs = (
Glyph(image, self.baseline - image.bottom)
for image in glyph_images
)
glyphs = sorted(glyphs, key=lambda glyph: (glyph.left, -glyph.bottom))
glyphs = self._combine_diacritics(glyphs)
return self._insert_spaces(glyphs)
def _optical_correction(self, glyph1, glyph2):
base = min(glyph1.top, glyph2.top)
height = max(glyph1.bottom, glyph2.bottom) - base
bitmap1 = np.hstack([np.ones((glyph1.height, 1)), glyph1.image.bitmap])
bitmap2 = np.hstack([glyph2.image.bitmap, np.ones((glyph2.height, 1))])
margin1 = np.zeros(height, np.int)
margin1.fill(glyph1.width)
margin2 = np.zeros(height, np.int)
margin2.fill(glyph2.width)
margin1[glyph1.top - base: glyph1.bottom - base] = np.fliplr(bitmap1).argmax(axis=1)
margin2[glyph2.top - base: glyph2.bottom - base] = bitmap2.argmax(axis=1)
margins = margin1 + margin2
return margins.min()
def _combine_diacritics(self, glyphs):
def find_correspondence(glyphs):
bodies = defaultdict(list)
diacritics = defaultdict(list)
for i, glyph in enumerate(glyphs):
if glyph.is_body():
continue
neighbours = glyphs[i - 5: i] + glyphs[i + 1: i + 6]
body = max(neighbours, key=lambda neighbour: neighbour.detect_diacritic(glyph))
if body.detect_diacritic(glyph):
diacritics[body].append(glyph)
bodies[glyph].append(body)
return bodies, diacritics
bodies, diacritics = find_correspondence(glyphs)
for glyph in glyphs:
if glyph.is_body():
yield glyph.add_diacritics(*diacritics[glyph])
else:
if glyph not in bodies: # freestanding diacritic-like glyphacter without a body
yield glyph
def _insert_spaces(self, glyphs):
for glyph, next_glyph in pairwise(glyphs):
yield glyph
if next_glyph is not None:
correction = self._optical_correction(glyph, next_glyph)
distance = next_glyph.left - glyph.right + correction
if distance > 5:
yield Space(self.image.space(glyph.right, self.top, distance, self.height), self.baseline - self.top)
def _extract_blob(self, blob_slice, label, labels):
image = self.image[blob_slice]
mask = labels[blob_slice] != label
return image.mask(mask)
class Glyph(PageObject):
DIACRITIC_WINDOW_LEFT = 3
DIACRITIC_WINDOW_RIGHT = 5
DIACRITIC_MIN_ELEVATION = 5
def __init__(self, image, elevation):
super().__init__(image)
self.elevation = elevation
@property
def key(self):
"""Return a dictionary key uniquely representing this glyph."""
return self.elevation, self.image.serialize()
def is_body(self):
"""Return True if the glyph is definitely not diacritic."""
return self.elevation <= 0
def detect_diacritic(self, glyph):
"""Check if the given glyph can be our diacritic and return a numeric score.
Higher score means higher probability. Zero means "absolutely not".
"""
if glyph.elevation < self.DIACRITIC_MIN_ELEVATION:
return 0
if (
glyph.left < self.left - self.DIACRITIC_WINDOW_LEFT
or glyph.right > self.right + self.DIACRITIC_WINDOW_RIGHT
):
return 0
return 100 - abs(self.xcenter - glyph.xcenter)
def add_diacritics(self, *diacritics):
if not diacritics:
return self
diacritic_images = (diacritic.image for diacritic in diacritics)
return Glyph(combine(self.image, *diacritic_images), self.elevation)
class Space(Glyph):
pass