Mask adjacent blobs when extracting blob images.

This commit is contained in:
Andrey Golovizin 2014-08-15 11:57:33 +02:00
parent 4ff37720d9
commit 26ba867089
2 changed files with 17 additions and 3 deletions

View file

@ -20,7 +20,7 @@ import numpy as np
from skimage.io import imread, imsave
from skimage.color import rgb2gray
from .utils import cached_property
from .utils import cached_property, pairwise
def _is_nonblank(bitmap):
@ -162,6 +162,11 @@ class Image(object):
bottom_margin = _get_margin_height(reversed(self.bitmap))
return self[top_margin:self.height - bottom_margin, :]
def mask(self, mask):
mask3 = np.dstack([mask] * 3)
data = np.ma.masked_array(self.data, mask3, fill_value=255).filled()
return Image(data, self.x, self.y)
def _iter_lines(self, min_space):
def iter_lines():
line_start = None

View file

@ -118,8 +118,11 @@ class Word(PageObject):
@collect_iterable
def letters(self):
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
obj_indices = ndimage.find_objects(labels, max_label)
letter_images = (self.image[obj_index] for obj_index in obj_indices)
blob_slices = enumerate(ndimage.find_objects(labels, max_label), 1)
letter_images = (
self._extract_blob(blob_slice, label, labels)
for (label, blob_slice) in blob_slices
)
letters = (
Letter(image, self.baseline - image.bottom)
for image in letter_images
@ -127,6 +130,12 @@ class Word(PageObject):
sorted_letters = sorted(letters, key=lambda letter: (letter.left, -letter.bottom))
return iter(sorted_letters)
def _extract_blob(self, blob_slice, label, labels):
image = self.image[blob_slice]
mask = labels[blob_slice] != label
return image.mask(mask)
class Letter(PageObject):
def __init__(self, image, elevation):