From b934788bc0fd9808f975f3194112b01ce627bcc2 Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Fri, 8 Aug 2014 12:46:17 +0200 Subject: [PATCH] Initial commit. --- .hgignore | 6 ++++ pixelocr/__init__.py | 0 pixelocr/image.py | 78 ++++++++++++++++++++++++++++++++++++++++++++ pixelocr/utils.py | 15 +++++++++ 4 files changed, 99 insertions(+) create mode 100644 .hgignore create mode 100644 pixelocr/__init__.py create mode 100644 pixelocr/image.py create mode 100644 pixelocr/utils.py diff --git a/.hgignore b/.hgignore new file mode 100644 index 0000000..655371c --- /dev/null +++ b/.hgignore @@ -0,0 +1,6 @@ +syntax: glob + +*~ +*.pyc +*.swp +.ipynb_checkpoints diff --git a/pixelocr/__init__.py b/pixelocr/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pixelocr/image.py b/pixelocr/image.py new file mode 100644 index 0000000..3419556 --- /dev/null +++ b/pixelocr/image.py @@ -0,0 +1,78 @@ +from io import BytesIO + +import numpy as np +from skimage.io import imread, imsave +from skimage.color import rgb2gray + +from .utils import cached_property + + +class Image(object): + def __init__(self, data): + self._data = data + + def __getitem__(self, key): + return type(self)(self._data.__getitem__(key)) + + @classmethod + def fromfile(cls, filename): + return cls(imread(filename)) + + @classmethod + def space(cls, height, width): + return cls(np.ones((height, width))) + + @property + def shape(self): + return self._data.shape + + @property + def T(self): + return type(self)(self._data.swapaxes(0, 1)) + + def _repr_png_(self): + buf = BytesIO() + imsave(buf, self._data) + return buf.getvalue() + + @property + def height(self): + return self._data.shape[0] + + @property + def width(self): + return self._data.shape[1] + + @cached_property + def bitmap(self): + grayscale = rgb2gray(self._data) + return (grayscale < 1).astype('b') + + @property + def key(self): + height, width, *_ = self.shape + shape = '{}x{}'.format(height, width) + bitmap = np.packbits(self.bitmap).tostring() + return shape.encode('latin1') + b':' + bitmap + + def unframe(self, width=2): + return self[width:-width,width:-width] + + def iter_lines(self, min_space=200): + line_start = None + prev_line_end = 0 + for i, row in enumerate(self.bitmap): + if row.any(): # non-blank row + if line_start is None: + line_start = i + if line_start - prev_line_end >= min_space: + yield Image.space(line_start - prev_line_end, self.width) + else: + if line_start is not None: + yield self[line_start:i, :] + line_start = None + prev_line_end = i + + def iter_letters(self, min_space=10): + for letter in self.T.iter_lines(min_space=7): + yield letter.T diff --git a/pixelocr/utils.py b/pixelocr/utils.py new file mode 100644 index 0000000..0f8ed1c --- /dev/null +++ b/pixelocr/utils.py @@ -0,0 +1,15 @@ +import functools + +def cached_property(fun): + """A memoize decorator for class properties.""" + @functools.wraps(fun) + def get(self): + try: + return self._cache[fun] + except AttributeError: + self._cache = {} + except KeyError: + pass + ret = self._cache[fun] = fun(self) + return ret + return property(get)