pixelocr/pixelocr/image.py

# Copyright (C) 2014  Andrey Golovizin
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import itertools
from io import BytesIO

import numpy as np
from skimage.io import imread, imsave
from skimage.color import rgb2gray

from .utils import cached_property, pairwise


def bbox(*images):
    return (
        min(image.left for image in images),
        min(image.top for image in images),
        max(image.right for image in images),
        max(image.bottom for image in images),
    )


def overlap(*images):
    for image1, image2 in itertools.combinations(images, 2):
        xoverlap = (image1.right > image2.left and image2.right > image1.left)
        yoverlap = (image1.bottom > image2.top and image2.bottom > image1.top)
        if xoverlap and yoverlap:
            return True
    return False


def combine(*images):
    if overlap(*images):
        raise NotImplementedError

    left, top, right, bottom = bbox(*images)
    width = right - left
    height = bottom - top

    data = np.zeros((height, width, images[0].data.shape[2]), images[0].data.dtype)
    data.fill(255)

    for image in images:
        xoffset = image.left - left
        yoffset = image.top - top
        data[yoffset:yoffset + image.height, xoffset:xoffset + image.width] = image.data
    return Image(data, left, top)


def _is_nonblank(bitmap):
    """Return True if bitmap contains at least one black (=1) pixel."""
    return bitmap.any()


class Image(object):
    """Basic image class."""

    def __init__(self, data, x=0, y=0):
        self.data = data
        self.x = x
        self.y = y

    def __getitem__(self, key):
        """Return an Image for the specified region."""

        def indices(sliceobj, length):
            """Decode a slice object and return a pair of end:start indices."""
            if sliceobj is None:
                return 0, length
            elif isinstance(sliceobj, int):
                return sliceobj, sliceobj + 1
            elif isinstance(sliceobj, slice):
                start, end, stride = sliceobj.indices(length)
                if stride != 1:
                    raise NotImplementedError
                return start, end
            else:
                raise NotImplementedError(sliceobj)

        if not isinstance(key, tuple):
            yslice = key
            xslice = None
        else:
            yslice, xslice = key

        xstart, xend = indices(xslice, self.width)
        ystart, yend = indices(yslice, self.height)

        x = self.x + xstart
        y = self.y + ystart
        return Image(self.data[key], x, y)

    def space(self, x, y, width, height):
        data = np.zeros((height, width, self.shape[2]), dtype=self.data.dtype)
        data.fill(255)
        return Image(data, x, y)

    def _repr_png_(self):
        buf = BytesIO()
        imsave(buf, self.data)
        return buf.getvalue()

    @classmethod
    def fromfile(cls, filename):
        return cls(imread(filename))

    @property
    def left(self):
        return self.x

    @property
    def right(self):
        return self.x + self.width

    @property
    def top(self):
        return self.y

    @property
    def bottom(self):
        return self.y + self.height

    @property
    def shape(self):
        return self.data.shape

    @property
    def height(self):
        return self.data.shape[0]

    @property
    def width(self):
        return self.data.shape[1]

    @cached_property
    def T(self):
        return type(self)(self.data.swapaxes(0, 1), x=self.y, y=self.x)

    @cached_property
    def bitmap(self):
        """Return a two-color version of the image.

        0 = white (blank) pixel
        1 = black (glyph) pixel
        """

        grayscale = rgb2gray(self.data)
        return (grayscale < 1).astype('b')

    @cached_property
    def isspace(self):
        return not _is_nonblank(self.bitmap)

    def serialize(self):
        """Serialize the image as some hashable object."""
        bitmap = self.data.astype(np.uint8).tostring()
        return self.shape, bitmap

    @classmethod
    def deserialize(cls, obj):
        """Deserialize an image."""
        shape, data = obj
        array = np.fromstring(data, dtype=np.uint8).reshape(shape)
        return cls(array)

    def toqimage(self):
        from PyQt4.QtGui import QImage
        return QImage(
            np.ascontiguousarray(self.data.astype(np.uint8)).data,
            self.data.shape[1], self.data.shape[0],
            self.data.shape[1] * 3,
            QImage.Format_RGB888,
        )

    def unframe(self, width=2):
        return Image(self.data[width:-width,width:-width])

    def strip(self):
        """Strip top and bottom blank space.

        All-whitespace images are not stripped.
        """

        if self.isspace:
            return self

        def _get_margin_height(rows):
            for i, row in enumerate(rows):
                if _is_nonblank(row):
                    return i
            return 0

        top_margin = _get_margin_height(self.bitmap)
        bottom_margin = _get_margin_height(reversed(self.bitmap))
        return self[top_margin:self.height - bottom_margin, :]

    def mask(self, mask):
        mask3 = np.dstack([mask] * 3)
        data = np.ma.masked_array(self.data, mask3, fill_value=255).filled()
        return Image(data, self.x, self.y)

    def _iter_lines(self, min_space):
        def iter_lines():
            line_start = None
            prev_line_end = 0

            for i, row in enumerate(self.bitmap):
                if _is_nonblank(row):
                    if line_start is None:
                        line_start = i
                else:
                    if line_start is not None:
                        yield self[line_start:i,:]
                        line_start = None
                        prev_line_end = i

        def merge_lines(lines):
            prev_line = None
            for line in lines:
                if prev_line is None:
                    prev_line = line
                else:
                    if line.top - prev_line.bottom < min_space:
                        prev_line = self[prev_line.top:line.bottom]
                    else:
                        yield prev_line
                        prev_line = line
            if prev_line is not None:
                yield prev_line

        return merge_lines(iter_lines())