# Copyright (C) 2014 Andrey Golovizin # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import itertools from glob import glob from os import path from . import formatting from .image import Image from .page import Page, Space from .glyphdb import GlyphDB, SPACE, NEWLINE class Document(object): SPACE_WIDTH = 15 def __init__(self, dirname, ui, skip=0, limit=None, output_format='text'): super().__init__() self.dirname = dirname self.ui = ui self.filenames = sorted(glob(path.join(dirname, '*.png')))[skip:skip + limit if limit else None] self.glyphdb = GlyphDB(path.join(self.dirname, 'glyphdb.pickle')) self.output_format = output_format self.last_style = (False, False, (255, 255, 255)) # FIXME get rid of hardcoded value def save_glyphdb(self): self.glyphdb.save() def load_page(self, filename): return Page(Image.fromfile(filename), filename) def recognize(self): for filename in self.filenames: page = self.load_page(filename) self.ui.turn_page(page) page_text = self.recognize_page(page) print(page_text) with open(filename + self.output_format.suffix, 'w') as page_text_file: page_text_file.write(page_text) def recognize_page(self, page): glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines)) return ''.join(self.output_format.format(glyph_data_seq)) def recognize_line(self, line): yield from [SPACE] * int(line.indent / self.SPACE_WIDTH) for glyph in line.glyphs: yield self.recognize_glyph(glyph) yield NEWLINE def recognize_glyph(self, glyph): self.ui.process_events() if isinstance(glyph, Space): return SPACE try: glyph_data = self.glyphdb[glyph] except KeyError: text, bold, italic = self.ui.ask_for_help(glyph) glyph_data = self.glyphdb.add_glyph(glyph, text, bold, italic) self.last_style = glyph_data.style return glyph_data