Ignore glyphs from unknown alphabets when guessing language.

This commit is contained in:
Andrey Golovizin 2014-09-16 11:53:00 +02:00
parent 3f0837d7dd
commit 4275671c03
2 changed files with 16 additions and 6 deletions

View file

@ -39,7 +39,7 @@ class Document(object):
self.config = Configuration.load_file(path.join(self.dirname, 'config.yaml')) self.config = Configuration.load_file(path.join(self.dirname, 'config.yaml'))
self.output_format = output_format self.output_format = output_format
self.last_style = Style(bold=False, italic=False, color=(255, 255, 255)) # FIXME get rid of hardcoded value self.last_style = Style(bold=False, italic=False, color=(255, 255, 255)) # FIXME get rid of hardcoded value
self.last_language = None self.last_language = self.default_language
def save_glyphdb(self): def save_glyphdb(self):
self.glyphdb.save() self.glyphdb.save()
@ -88,7 +88,15 @@ class Document(object):
languages = self.config.get('languages', {}) languages = self.config.get('languages', {})
return {letter: lang_name for lang_name, opts in languages.items() for letter in opts.get('alphabet', ())} return {letter: lang_name for lang_name, opts in languages.items() for letter in opts.get('alphabet', ())}
@cached_property
def default_language(self):
for lang_name, opts in self.config.get('lenauges', {}):
if opts.get('default'):
return lang_name
def switch_layout(self, language): def switch_layout(self, language):
if language is None:
language = self.default_language
cmd = self.config.get('languages', {}).get(language, {}).get('command') cmd = self.config.get('languages', {}).get(language, {}).get('command')
if cmd: if cmd:
return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

View file

@ -274,12 +274,14 @@ class Word(PageObject):
counts = defaultdict(int) counts = defaultdict(int)
language_map = self.document.language_map language_map = self.document.language_map
known_glyph_info = self._known_glyph_info() known_glyph_info = self._known_glyph_info()
if not known_glyph_info:
return default
for glyph_info in known_glyph_info: for glyph_info in known_glyph_info:
counts[language_map.get(glyph_info.text)] += 1 language = language_map.get(glyph_info.text)
language = max(counts, key=lambda lang: counts[lang]) if language is not None:
return language counts[language] += 1
if not counts:
return default
else:
return max(counts, key=lambda lang: counts[lang])
def guess_bold(self, *args, **kwargs): def guess_bold(self, *args, **kwargs):
return self._guess('bold', *args, **kwargs) return self._guess('bold', *args, **kwargs)