From a7f2c332ca164fd93e69e942c2637c8dbb67018d Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Mon, 25 Aug 2014 13:26:01 +0200 Subject: [PATCH] Fix Line._combine_diacritics(): the first 5 glyphs were not examined. --- pixelocr/page.py | 4 ++-- pixelocr/utils.py | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pixelocr/page.py b/pixelocr/page.py index 98616e7..e7b7fa1 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -21,7 +21,7 @@ import numpy as np from scipy import ndimage from scipy.ndimage import filters -from .utils import cached_property, collect_iterable, pairwise +from .utils import cached_property, collect_iterable, pairwise, neighbourhood from .image import Image, combine @@ -145,7 +145,7 @@ class Line(PageObject): for i, glyph in enumerate(glyphs): if glyph.is_body(): continue - neighbours = glyphs[i - 5: i] + glyphs[i + 1: i + 6] + neighbours = neighbourhood(glyphs, i, 5) body = max(neighbours, key=lambda neighbour: neighbour.detect_diacritic(glyph)) if body.detect_diacritic(glyph): diacritics[body].append(glyph) diff --git a/pixelocr/utils.py b/pixelocr/utils.py index 8fbb0a4..6a85974 100644 --- a/pixelocr/utils.py +++ b/pixelocr/utils.py @@ -44,3 +44,11 @@ def pairwise(iterable): a, b = itertools.tee(iterable) next(b, None) return itertools.zip_longest(a, b) + + +def neighbourhood(lst, index, window=5): + """Return adjacent list items.""" + + before = lst[:index] + after = lst[index + 1:] + return before[-window:] + after[:window]