diff --git a/pixelocr/page.py b/pixelocr/page.py index b48a8fa..798187e 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -85,6 +85,24 @@ class PageObject(object): return self.image.fits(left, top, right, bottom) + def _optical_correction(self, other): + image1 = self.image + image2 = other.image + base = min(image1.top, image2.top) + height = max(image1.bottom, image2.bottom) - base + bitmap1 = np.hstack([np.ones((image1.height, 1)), image1.bitmap]) + bitmap2 = np.hstack([image2.bitmap, np.ones((image2.height, 1))]) + + margin1 = np.zeros(height, np.int) + margin1.fill(image1.width) + margin2 = np.zeros(height, np.int) + margin2.fill(image2.width) + + margin1[image1.top - base: image1.bottom - base] = np.fliplr(bitmap1).argmax(axis=1) + margin2[image2.top - base: image2.bottom - base] = bitmap2.argmax(axis=1) + margins = margin1 + margin2 + return margins.min() + class Page(PageObject): def __iter__(self): @@ -181,24 +199,6 @@ class Glyph(PageObject): """Return True if the glyph is definitely not diacritic.""" return self.height >= self.MIN_BODY_HEIGHT - def _optical_correction(self, other): - glyph1 = self - glyph2 = other - base = min(glyph1.top, glyph2.top) - height = max(glyph1.bottom, glyph2.bottom) - base - bitmap1 = np.hstack([np.ones((glyph1.height, 1)), glyph1.image.bitmap]) - bitmap2 = np.hstack([glyph2.image.bitmap, np.ones((glyph2.height, 1))]) - - margin1 = np.zeros(height, np.int) - margin1.fill(glyph1.width) - margin2 = np.zeros(height, np.int) - margin2.fill(glyph2.width) - - margin1[glyph1.top - base: glyph1.bottom - base] = np.fliplr(bitmap1).argmax(axis=1) - margin2[glyph2.top - base: glyph2.bottom - base] = bitmap2.argmax(axis=1) - margins = margin1 + margin2 - return margins.min() - def optical_distance(self, other): distance = other.left - self.right return distance + self._optical_correction(other)