pixelocr/pixelocr/image.py
2014-08-21 23:20:30 +02:00

244 lines
6.8 KiB
Python

# Copyright (C) 2014 Andrey Golovizin
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import itertools
from io import BytesIO
import numpy as np
from skimage.io import imread, imsave
from skimage.color import rgb2gray
from .utils import cached_property, pairwise
def bbox(*images):
return (
min(image.left for image in images),
min(image.top for image in images),
max(image.right for image in images),
max(image.bottom for image in images),
)
def overlap(*images):
for image1, image2 in itertools.combinations(images, 2):
xoverlap = (image1.right > image2.left and image2.right > image1.left)
yoverlap = (image1.bottom > image2.top and image2.bottom > image1.top)
if xoverlap and yoverlap:
return True
return False
def combine(*images):
if overlap(*images):
raise NotImplementedError
left, top, right, bottom = bbox(*images)
width = right - left
height = bottom - top
data = np.zeros((height, width, images[0].data.shape[2]), images[0].data.dtype)
data.fill(255)
for image in images:
xoffset = image.left - left
yoffset = image.top - top
data[yoffset:yoffset + image.height, xoffset:xoffset + image.width] = image.data
return Image(data, left, top)
def _is_nonblank(bitmap):
"""Return True if bitmap contains at least one black (=1) pixel."""
return bitmap.any()
class Image(object):
"""Basic image class."""
def __init__(self, data, x=0, y=0):
self.data = data
self.x = x
self.y = y
def __getitem__(self, key):
"""Return an Image for the specified region."""
def indices(sliceobj, length):
"""Decode a slice object and return a pair of end:start indices."""
if sliceobj is None:
return 0, length
elif isinstance(sliceobj, int):
return sliceobj, sliceobj + 1
elif isinstance(sliceobj, slice):
start, end, stride = sliceobj.indices(length)
if stride != 1:
raise NotImplementedError
return start, end
else:
raise NotImplementedError(sliceobj)
if not isinstance(key, tuple):
yslice = key
xslice = None
else:
yslice, xslice = key
xstart, xend = indices(xslice, self.width)
ystart, yend = indices(yslice, self.height)
x = self.x + xstart
y = self.y + ystart
return Image(self.data[key], x, y)
def space(self, x, y, width, height):
data = np.zeros((height, width, self.shape[2]), dtype=self.data.dtype)
data.fill(255)
return Image(data, x, y)
def _repr_png_(self):
buf = BytesIO()
imsave(buf, self.data)
return buf.getvalue()
@classmethod
def fromfile(cls, filename):
return cls(imread(filename))
@property
def left(self):
return self.x
@property
def right(self):
return self.x + self.width
@property
def top(self):
return self.y
@property
def bottom(self):
return self.y + self.height
@property
def shape(self):
return self.data.shape
@property
def height(self):
return self.data.shape[0]
@property
def width(self):
return self.data.shape[1]
@cached_property
def T(self):
return type(self)(self.data.swapaxes(0, 1), x=self.y, y=self.x)
@cached_property
def bitmap(self):
"""Return a two-color version of the image.
0 = white (blank) pixel
1 = black (glyph) pixel
"""
grayscale = rgb2gray(self.data)
return (grayscale < 1).astype('b')
@cached_property
def isspace(self):
return not _is_nonblank(self.bitmap)
def serialize(self):
"""Serialize the image as some hashable object."""
bitmap = self.data.astype(np.uint8).tostring()
return self.shape, bitmap
@classmethod
def deserialize(cls, obj):
"""Deserialize an image."""
shape, data = obj
array = np.fromstring(data, dtype=np.uint8).reshape(shape)
return cls(array)
def toqimage(self):
from PyQt4.QtGui import QImage
return QImage(
np.ascontiguousarray(self.data.astype(np.uint8)).data,
self.data.shape[1], self.data.shape[0],
self.data.shape[1] * 3,
QImage.Format_RGB888,
)
def unframe(self, width=2):
return Image(self.data[width:-width,width:-width])
def strip(self):
"""Strip top and bottom blank space.
All-whitespace images are not stripped.
"""
if self.isspace:
return self
def _get_margin_height(rows):
for i, row in enumerate(rows):
if _is_nonblank(row):
return i
return 0
top_margin = _get_margin_height(self.bitmap)
bottom_margin = _get_margin_height(reversed(self.bitmap))
return self[top_margin:self.height - bottom_margin, :]
def mask(self, mask):
mask3 = np.dstack([mask] * 3)
data = np.ma.masked_array(self.data, mask3, fill_value=255).filled()
return Image(data, self.x, self.y)
def _iter_lines(self, min_space):
def iter_lines():
line_start = None
prev_line_end = 0
for i, row in enumerate(self.bitmap):
if _is_nonblank(row):
if line_start is None:
line_start = i
else:
if line_start is not None:
yield self[line_start:i,:]
line_start = None
prev_line_end = i
def merge_lines(lines):
prev_line = None
for line in lines:
if prev_line is None:
prev_line = line
else:
if line.top - prev_line.bottom < min_space:
prev_line = self[prev_line.top:line.bottom]
else:
yield prev_line
prev_line = line
if prev_line is not None:
yield prev_line
return merge_lines(iter_lines())