From 807ab6291cbe8d4abc2157c325dade78931f7bdb Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Mon, 15 Sep 2014 23:37:06 +0200 Subject: [PATCH] Use last the language of the last known glyph for totally unknown words. --- pixelocr/document.py | 4 +++- pixelocr/page.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pixelocr/document.py b/pixelocr/document.py index 2cc507e..85ff092 100644 --- a/pixelocr/document.py +++ b/pixelocr/document.py @@ -39,6 +39,7 @@ class Document(object): self.config = Configuration.load_file(path.join(self.dirname, 'config.yaml')) self.output_format = output_format self.last_style = Style(bold=False, italic=False, color=(255, 255, 255)) # FIXME get rid of hardcoded value + self.last_language = None def save_glyphdb(self): self.glyphdb.save() @@ -75,10 +76,11 @@ class Document(object): except KeyError: guessed_bold = glyph.word.guess_bold(default=self.last_style.bold) guessed_italic = glyph.word.guess_italic(default=self.last_style.italic) - self.switch_layout(glyph.word.guess_language()) + self.switch_layout(glyph.word.guess_language(self.last_language)) text, bold, italic = self.ui.ask_for_help(glyph, guessed_bold, guessed_italic) glyph_data = self.glyphdb.add_glyph(glyph, text, bold, italic) self.last_style = glyph_data.style + self.last_language = self.language_map.get(glyph_data.text) return glyph_data @cached_property diff --git a/pixelocr/page.py b/pixelocr/page.py index 1eeab47..c0cd38b 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -269,12 +269,12 @@ class Word(PageObject): image = line.image[:, beginning.left:end.right] super().__init__(line.document, image) - def guess_language(self): + def guess_language(self, default=None): counts = defaultdict(int) language_map = self.document.language_map known_glyph_info = self._known_glyph_info() if not known_glyph_info: - return None + return default for glyph_info in known_glyph_info: counts[language_map.get(glyph_info.text)] += 1 language, count = sorted(counts.items(), reverse=True)[0]