Initial commit.

This commit is contained in:
Andrey Golovizin 2014-08-08 12:46:17 +02:00
commit b934788bc0
4 changed files with 99 additions and 0 deletions

6
.hgignore Normal file
View file

@ -0,0 +1,6 @@
syntax: glob
*~
*.pyc
*.swp
.ipynb_checkpoints

0
pixelocr/__init__.py Normal file
View file

78
pixelocr/image.py Normal file
View file

@ -0,0 +1,78 @@
from io import BytesIO
import numpy as np
from skimage.io import imread, imsave
from skimage.color import rgb2gray
from .utils import cached_property
class Image(object):
def __init__(self, data):
self._data = data
def __getitem__(self, key):
return type(self)(self._data.__getitem__(key))
@classmethod
def fromfile(cls, filename):
return cls(imread(filename))
@classmethod
def space(cls, height, width):
return cls(np.ones((height, width)))
@property
def shape(self):
return self._data.shape
@property
def T(self):
return type(self)(self._data.swapaxes(0, 1))
def _repr_png_(self):
buf = BytesIO()
imsave(buf, self._data)
return buf.getvalue()
@property
def height(self):
return self._data.shape[0]
@property
def width(self):
return self._data.shape[1]
@cached_property
def bitmap(self):
grayscale = rgb2gray(self._data)
return (grayscale < 1).astype('b')
@property
def key(self):
height, width, *_ = self.shape
shape = '{}x{}'.format(height, width)
bitmap = np.packbits(self.bitmap).tostring()
return shape.encode('latin1') + b':' + bitmap
def unframe(self, width=2):
return self[width:-width,width:-width]
def iter_lines(self, min_space=200):
line_start = None
prev_line_end = 0
for i, row in enumerate(self.bitmap):
if row.any(): # non-blank row
if line_start is None:
line_start = i
if line_start - prev_line_end >= min_space:
yield Image.space(line_start - prev_line_end, self.width)
else:
if line_start is not None:
yield self[line_start:i, :]
line_start = None
prev_line_end = i
def iter_letters(self, min_space=10):
for letter in self.T.iter_lines(min_space=7):
yield letter.T

15
pixelocr/utils.py Normal file
View file

@ -0,0 +1,15 @@
import functools
def cached_property(fun):
"""A memoize decorator for class properties."""
@functools.wraps(fun)
def get(self):
try:
return self._cache[fun]
except AttributeError:
self._cache = {}
except KeyError:
pass
ret = self._cache[fun] = fun(self)
return ret
return property(get)