From 4e4848e1bbc7c85b30cc85f2f557e8b42bb1e07b Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Mon, 15 Sep 2014 23:32:24 +0200 Subject: [PATCH] Use last known glyph for guessing bold/italic attributes of totally unknown words. --- pixelocr/document.py | 4 +++- pixelocr/page.py | 12 ++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pixelocr/document.py b/pixelocr/document.py index f64ac27..2cc507e 100644 --- a/pixelocr/document.py +++ b/pixelocr/document.py @@ -73,8 +73,10 @@ class Document(object): try: glyph_data = self.glyphdb[glyph] except KeyError: + guessed_bold = glyph.word.guess_bold(default=self.last_style.bold) + guessed_italic = glyph.word.guess_italic(default=self.last_style.italic) self.switch_layout(glyph.word.guess_language()) - text, bold, italic = self.ui.ask_for_help(glyph, glyph.word.guess_bold(), glyph.word.guess_italic()) + text, bold, italic = self.ui.ask_for_help(glyph, guessed_bold, guessed_italic) glyph_data = self.glyphdb.add_glyph(glyph, text, bold, italic) self.last_style = glyph_data.style return glyph_data diff --git a/pixelocr/page.py b/pixelocr/page.py index a53809e..1eeab47 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -280,16 +280,16 @@ class Word(PageObject): language, count = sorted(counts.items(), reverse=True)[0] return language - def guess_bold(self): - return self._guess('bold') + def guess_bold(self, *args, **kwargs): + return self._guess('bold', *args, **kwargs) - def guess_italic(self): - return self._guess('italic') + def guess_italic(self, *args, **kwargs): + return self._guess('italic', *args, **kwargs) - def _guess(self, attr): + def _guess(self, attr, default=None): known_glyph_info = self._known_glyph_info() if not known_glyph_info: - return None + return default total = sum(getattr(glyph_info.style, attr) for glyph_info in self._known_glyph_info()) avg = total / len(known_glyph_info) return avg >= 0.5