diff --git a/pixelocr/document.py b/pixelocr/document.py index bc4eff3..f64ac27 100644 --- a/pixelocr/document.py +++ b/pixelocr/document.py @@ -15,6 +15,7 @@ import itertools +import subprocess from glob import glob from os import path @@ -23,6 +24,7 @@ from .image import Image from .page import Page, Space from .glyphdb import GlyphDB, Style, SPACE, NEWLINE from .config import Configuration +from .utils import cached_property class Document(object): @@ -71,8 +73,18 @@ class Document(object): try: glyph_data = self.glyphdb[glyph] except KeyError: + self.switch_layout(glyph.word.guess_language()) text, bold, italic = self.ui.ask_for_help(glyph, glyph.word.guess_bold(), glyph.word.guess_italic()) glyph_data = self.glyphdb.add_glyph(glyph, text, bold, italic) self.last_style = glyph_data.style return glyph_data + @cached_property + def language_map(self): + languages = self.config.get('languages', {}) + return {letter: lang_name for lang_name, opts in languages.items() for letter in opts.get('alphabet', ())} + + def switch_layout(self, language): + cmd = self.config.get('languages', {}).get(language, {}).get('command') + if cmd: + return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) diff --git a/pixelocr/page.py b/pixelocr/page.py index 7bbf0c8..cd19a5a 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -269,6 +269,17 @@ class Word(PageObject): image = line.image[:, beginning.left:end.right] super().__init__(line.document, image) + def guess_language(self): + counts = defaultdict(int) + language_map = self.document.language_map + known_glyph_info = self._known_glyph_info() + if not known_glyph_info: + return None + for glyph_info in known_glyph_info: + counts[language_map.get(glyph_info.text)] += 1 + language, count = sorted(counts.items(), reverse=True)[0] + return language + def guess_bold(self): return self._guess('bold')