diff --git a/pixelocr/page.py b/pixelocr/page.py index 5051fbe..89e99b3 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -19,7 +19,7 @@ from collections import defaultdict import numpy as np from scipy import ndimage -from scipy.ndimage import filters +from scipy.ndimage import filters, grey_closing from .utils import cached_property, collect_iterable, pairwise, neighbourhood from .image import Image, combine, is_nonblank @@ -158,6 +158,7 @@ class Line(PageObject): def baseline(self): """Detect baseline height, relative to the top.""" bitmap = self.image.bitmap + bitmap = grey_closing(bitmap, (0, 10), mode='constant') histogram = bitmap.sum(axis=1) gradient = list(filters.correlate1d(histogram, [-1, 1], axis=0, mode='constant')) gradient[0] = histogram[0]