diff --git a/pixelocr/document.py b/pixelocr/document.py index 85ff092..0a445c9 100644 --- a/pixelocr/document.py +++ b/pixelocr/document.py @@ -39,7 +39,7 @@ class Document(object): self.config = Configuration.load_file(path.join(self.dirname, 'config.yaml')) self.output_format = output_format self.last_style = Style(bold=False, italic=False, color=(255, 255, 255)) # FIXME get rid of hardcoded value - self.last_language = None + self.last_language = self.default_language def save_glyphdb(self): self.glyphdb.save() @@ -88,7 +88,15 @@ class Document(object): languages = self.config.get('languages', {}) return {letter: lang_name for lang_name, opts in languages.items() for letter in opts.get('alphabet', ())} + @cached_property + def default_language(self): + for lang_name, opts in self.config.get('lenauges', {}): + if opts.get('default'): + return lang_name + def switch_layout(self, language): + if language is None: + language = self.default_language cmd = self.config.get('languages', {}).get(language, {}).get('command') if cmd: return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) diff --git a/pixelocr/page.py b/pixelocr/page.py index 5053a60..e34fa5b 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -274,12 +274,14 @@ class Word(PageObject): counts = defaultdict(int) language_map = self.document.language_map known_glyph_info = self._known_glyph_info() - if not known_glyph_info: - return default for glyph_info in known_glyph_info: - counts[language_map.get(glyph_info.text)] += 1 - language = max(counts, key=lambda lang: counts[lang]) - return language + language = language_map.get(glyph_info.text) + if language is not None: + counts[language] += 1 + if not counts: + return default + else: + return max(counts, key=lambda lang: counts[lang]) def guess_bold(self, *args, **kwargs): return self._guess('bold', *args, **kwargs)