diff --git a/pixelocr/page.py b/pixelocr/page.py index 89e99b3..5dcc198 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -85,7 +85,7 @@ class PageObject(object): return self.image.fits(left, top, right, bottom) - def _optical_correction(self, other, T=False): + def _optical_correction(self, other, max_correction=1000, T=False): image1 = self.image.T if T else self.image image2 = other.image.T if T else other.image base = min(image1.top, image2.top) @@ -101,7 +101,8 @@ class PageObject(object): margin1[image1.top - base: image1.bottom - base] = np.fliplr(bitmap1).argmax(axis=1) margin2[image2.top - base: image2.bottom - base] = bitmap2.argmax(axis=1) margins = margin1 + margin2 - return margins.min() + correction = margins.min() + return min(correction, max_correction) class Page(PageObject): @@ -241,7 +242,7 @@ class Glyph(PageObject): def optical_distance(self, other): distance = other.left - self.right - return distance + self._optical_correction(other) + return distance + self._optical_correction(other, max_correction=3) def detect_diacritic(self, glyph): """Check if the given glyph can be our diacritic and return a numeric score."""