Mask adjacent blobs when extracting blob images.
This commit is contained in:
parent
4ff37720d9
commit
26ba867089
2 changed files with 17 additions and 3 deletions
|
|
@ -20,7 +20,7 @@ import numpy as np
|
|||
from skimage.io import imread, imsave
|
||||
from skimage.color import rgb2gray
|
||||
|
||||
from .utils import cached_property
|
||||
from .utils import cached_property, pairwise
|
||||
|
||||
|
||||
def _is_nonblank(bitmap):
|
||||
|
|
@ -162,6 +162,11 @@ class Image(object):
|
|||
bottom_margin = _get_margin_height(reversed(self.bitmap))
|
||||
return self[top_margin:self.height - bottom_margin, :]
|
||||
|
||||
def mask(self, mask):
|
||||
mask3 = np.dstack([mask] * 3)
|
||||
data = np.ma.masked_array(self.data, mask3, fill_value=255).filled()
|
||||
return Image(data, self.x, self.y)
|
||||
|
||||
def _iter_lines(self, min_space):
|
||||
def iter_lines():
|
||||
line_start = None
|
||||
|
|
|
|||
|
|
@ -118,8 +118,11 @@ class Word(PageObject):
|
|||
@collect_iterable
|
||||
def letters(self):
|
||||
labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8)
|
||||
obj_indices = ndimage.find_objects(labels, max_label)
|
||||
letter_images = (self.image[obj_index] for obj_index in obj_indices)
|
||||
blob_slices = enumerate(ndimage.find_objects(labels, max_label), 1)
|
||||
letter_images = (
|
||||
self._extract_blob(blob_slice, label, labels)
|
||||
for (label, blob_slice) in blob_slices
|
||||
)
|
||||
letters = (
|
||||
Letter(image, self.baseline - image.bottom)
|
||||
for image in letter_images
|
||||
|
|
@ -127,6 +130,12 @@ class Word(PageObject):
|
|||
sorted_letters = sorted(letters, key=lambda letter: (letter.left, -letter.bottom))
|
||||
return iter(sorted_letters)
|
||||
|
||||
def _extract_blob(self, blob_slice, label, labels):
|
||||
image = self.image[blob_slice]
|
||||
mask = labels[blob_slice] != label
|
||||
return image.mask(mask)
|
||||
|
||||
|
||||
|
||||
class Letter(PageObject):
|
||||
def __init__(self, image, elevation):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue