Ignore glyphs from unknown alphabets when guessing language.
This commit is contained in:
parent
3f0837d7dd
commit
4275671c03
2 changed files with 16 additions and 6 deletions
|
|
@ -39,7 +39,7 @@ class Document(object):
|
|||
self.config = Configuration.load_file(path.join(self.dirname, 'config.yaml'))
|
||||
self.output_format = output_format
|
||||
self.last_style = Style(bold=False, italic=False, color=(255, 255, 255)) # FIXME get rid of hardcoded value
|
||||
self.last_language = None
|
||||
self.last_language = self.default_language
|
||||
|
||||
def save_glyphdb(self):
|
||||
self.glyphdb.save()
|
||||
|
|
@ -88,7 +88,15 @@ class Document(object):
|
|||
languages = self.config.get('languages', {})
|
||||
return {letter: lang_name for lang_name, opts in languages.items() for letter in opts.get('alphabet', ())}
|
||||
|
||||
@cached_property
|
||||
def default_language(self):
|
||||
for lang_name, opts in self.config.get('lenauges', {}):
|
||||
if opts.get('default'):
|
||||
return lang_name
|
||||
|
||||
def switch_layout(self, language):
|
||||
if language is None:
|
||||
language = self.default_language
|
||||
cmd = self.config.get('languages', {}).get(language, {}).get('command')
|
||||
if cmd:
|
||||
return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
|
|
|
|||
|
|
@ -274,12 +274,14 @@ class Word(PageObject):
|
|||
counts = defaultdict(int)
|
||||
language_map = self.document.language_map
|
||||
known_glyph_info = self._known_glyph_info()
|
||||
if not known_glyph_info:
|
||||
return default
|
||||
for glyph_info in known_glyph_info:
|
||||
counts[language_map.get(glyph_info.text)] += 1
|
||||
language = max(counts, key=lambda lang: counts[lang])
|
||||
return language
|
||||
language = language_map.get(glyph_info.text)
|
||||
if language is not None:
|
||||
counts[language] += 1
|
||||
if not counts:
|
||||
return default
|
||||
else:
|
||||
return max(counts, key=lambda lang: counts[lang])
|
||||
|
||||
def guess_bold(self, *args, **kwargs):
|
||||
return self._guess('bold', *args, **kwargs)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue