Mask adjacent blobs when extracting blob images.
This commit is contained in:
parent
4ff37720d9
commit
26ba867089
2 changed files with 17 additions and 3 deletions
|
|
@ -20,7 +20,7 @@ import numpy as np
|
||||||
from skimage.io import imread, imsave
|
from skimage.io import imread, imsave
|
||||||
from skimage.color import rgb2gray
|
from skimage.color import rgb2gray
|
||||||
|
|
||||||
from .utils import cached_property
|
from .utils import cached_property, pairwise
|
||||||
|
|
||||||
|
|
||||||
def _is_nonblank(bitmap):
|
def _is_nonblank(bitmap):
|
||||||
|
|
@ -162,6 +162,11 @@ class Image(object):
|
||||||
bottom_margin = _get_margin_height(reversed(self.bitmap))
|
bottom_margin = _get_margin_height(reversed(self.bitmap))
|
||||||
return self[top_margin:self.height - bottom_margin, :]
|
return self[top_margin:self.height - bottom_margin, :]
|
||||||
|
|
||||||
|
def mask(self, mask):
|
||||||
|
mask3 = np.dstack([mask] * 3)
|
||||||
|
data = np.ma.masked_array(self.data, mask3, fill_value=255).filled()
|
||||||
|
return Image(data, self.x, self.y)
|
||||||
|
|
||||||
def _iter_lines(self, min_space):
|
def _iter_lines(self, min_space):
|
||||||
def iter_lines():
|
def iter_lines():
|
||||||
line_start = None
|
line_start = None
|
||||||
|
|
|
||||||
|
|
@ -118,8 +118,11 @@ class Word(PageObject):
|
||||||
@collect_iterable
|
@collect_iterable
|
||||||
def letters(self):
|
def letters(self):
|
||||||
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
|
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
|
||||||
obj_indices = ndimage.find_objects(labels, max_label)
|
blob_slices = enumerate(ndimage.find_objects(labels, max_label), 1)
|
||||||
letter_images = (self.image[obj_index] for obj_index in obj_indices)
|
letter_images = (
|
||||||
|
self._extract_blob(blob_slice, label, labels)
|
||||||
|
for (label, blob_slice) in blob_slices
|
||||||
|
)
|
||||||
letters = (
|
letters = (
|
||||||
Letter(image, self.baseline - image.bottom)
|
Letter(image, self.baseline - image.bottom)
|
||||||
for image in letter_images
|
for image in letter_images
|
||||||
|
|
@ -127,6 +130,12 @@ class Word(PageObject):
|
||||||
sorted_letters = sorted(letters, key=lambda letter: (letter.left, -letter.bottom))
|
sorted_letters = sorted(letters, key=lambda letter: (letter.left, -letter.bottom))
|
||||||
return iter(sorted_letters)
|
return iter(sorted_letters)
|
||||||
|
|
||||||
|
def _extract_blob(self, blob_slice, label, labels):
|
||||||
|
image = self.image[blob_slice]
|
||||||
|
mask = labels[blob_slice] != label
|
||||||
|
return image.mask(mask)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Letter(PageObject):
|
class Letter(PageObject):
|
||||||
def __init__(self, image, elevation):
|
def __init__(self, image, elevation):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue