Initial commit.
This commit is contained in:
commit
b934788bc0
4 changed files with 99 additions and 0 deletions
6
.hgignore
Normal file
6
.hgignore
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
syntax: glob
|
||||
|
||||
*~
|
||||
*.pyc
|
||||
*.swp
|
||||
.ipynb_checkpoints
|
||||
0
pixelocr/__init__.py
Normal file
0
pixelocr/__init__.py
Normal file
78
pixelocr/image.py
Normal file
78
pixelocr/image.py
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
from io import BytesIO
|
||||
|
||||
import numpy as np
|
||||
from skimage.io import imread, imsave
|
||||
from skimage.color import rgb2gray
|
||||
|
||||
from .utils import cached_property
|
||||
|
||||
|
||||
class Image(object):
|
||||
def __init__(self, data):
|
||||
self._data = data
|
||||
|
||||
def __getitem__(self, key):
|
||||
return type(self)(self._data.__getitem__(key))
|
||||
|
||||
@classmethod
|
||||
def fromfile(cls, filename):
|
||||
return cls(imread(filename))
|
||||
|
||||
@classmethod
|
||||
def space(cls, height, width):
|
||||
return cls(np.ones((height, width)))
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return self._data.shape
|
||||
|
||||
@property
|
||||
def T(self):
|
||||
return type(self)(self._data.swapaxes(0, 1))
|
||||
|
||||
def _repr_png_(self):
|
||||
buf = BytesIO()
|
||||
imsave(buf, self._data)
|
||||
return buf.getvalue()
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
return self._data.shape[0]
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
return self._data.shape[1]
|
||||
|
||||
@cached_property
|
||||
def bitmap(self):
|
||||
grayscale = rgb2gray(self._data)
|
||||
return (grayscale < 1).astype('b')
|
||||
|
||||
@property
|
||||
def key(self):
|
||||
height, width, *_ = self.shape
|
||||
shape = '{}x{}'.format(height, width)
|
||||
bitmap = np.packbits(self.bitmap).tostring()
|
||||
return shape.encode('latin1') + b':' + bitmap
|
||||
|
||||
def unframe(self, width=2):
|
||||
return self[width:-width,width:-width]
|
||||
|
||||
def iter_lines(self, min_space=200):
|
||||
line_start = None
|
||||
prev_line_end = 0
|
||||
for i, row in enumerate(self.bitmap):
|
||||
if row.any(): # non-blank row
|
||||
if line_start is None:
|
||||
line_start = i
|
||||
if line_start - prev_line_end >= min_space:
|
||||
yield Image.space(line_start - prev_line_end, self.width)
|
||||
else:
|
||||
if line_start is not None:
|
||||
yield self[line_start:i, :]
|
||||
line_start = None
|
||||
prev_line_end = i
|
||||
|
||||
def iter_letters(self, min_space=10):
|
||||
for letter in self.T.iter_lines(min_space=7):
|
||||
yield letter.T
|
||||
15
pixelocr/utils.py
Normal file
15
pixelocr/utils.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
import functools
|
||||
|
||||
def cached_property(fun):
|
||||
"""A memoize decorator for class properties."""
|
||||
@functools.wraps(fun)
|
||||
def get(self):
|
||||
try:
|
||||
return self._cache[fun]
|
||||
except AttributeError:
|
||||
self._cache = {}
|
||||
except KeyError:
|
||||
pass
|
||||
ret = self._cache[fun] = fun(self)
|
||||
return ret
|
||||
return property(get)
|
||||
Loading…
Add table
Add a link
Reference in a new issue