From 03a481d3a2b4fb1cbbba0f2a51046a251d2baa60 Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Tue, 26 Aug 2014 18:23:28 +0200 Subject: [PATCH] =?UTF-8?q?Limit=20maximal=20optical=20correction=20to=20p?= =?UTF-8?q?revent=20splitting=20glyph=20pairs=20like=20=D0=93=D0=BE.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pixelocr/page.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pixelocr/page.py b/pixelocr/page.py index 89e99b3..5dcc198 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -85,7 +85,7 @@ class PageObject(object): return self.image.fits(left, top, right, bottom) - def _optical_correction(self, other, T=False): + def _optical_correction(self, other, max_correction=1000, T=False): image1 = self.image.T if T else self.image image2 = other.image.T if T else other.image base = min(image1.top, image2.top) @@ -101,7 +101,8 @@ class PageObject(object): margin1[image1.top - base: image1.bottom - base] = np.fliplr(bitmap1).argmax(axis=1) margin2[image2.top - base: image2.bottom - base] = bitmap2.argmax(axis=1) margins = margin1 + margin2 - return margins.min() + correction = margins.min() + return min(correction, max_correction) class Page(PageObject): @@ -241,7 +242,7 @@ class Glyph(PageObject): def optical_distance(self, other): distance = other.left - self.right - return distance + self._optical_correction(other) + return distance + self._optical_correction(other, max_correction=3) def detect_diacritic(self, glyph): """Check if the given glyph can be our diacritic and return a numeric score."""