From 08a6d004e76c669f3038e3ab7a30f48366936bc1 Mon Sep 17 00:00:00 2001 From: Andrey Golovizin Date: Fri, 15 Aug 2014 18:09:28 +0200 Subject: [PATCH] Add OCREngine class. --- pixelocr/gui/ocrengine.py | 78 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 pixelocr/gui/ocrengine.py diff --git a/pixelocr/gui/ocrengine.py b/pixelocr/gui/ocrengine.py new file mode 100644 index 0000000..d0dc64d --- /dev/null +++ b/pixelocr/gui/ocrengine.py @@ -0,0 +1,78 @@ +# Copyright (C) 2014 Andrey Golovizin +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +from queue import Queue + +from PyQt4.QtCore import ( + signal, + slot, + QThread, +) + +from ..image import Image +from ..page import Page, Letter, Space + + +class OCREngine(QThread): + unknownLetter = signal([Letter]) + pageChanged = signal([Page]) + + def __init__(self, filenames): + super().__init__() + self.filenames = filenames + self.chardb = {} + self.help_queue = Queue() + + def load_page(self, filename): + return Page(Image.fromfile(filename).unframe(10)) + + def run(self): + for page_text in self.recognize(): + print(page_text) + + def recognize(self): + for filename in self.filenames: + page = self.load_page(filename) + self.pageChanged.emit(page) + yield '\n'.join(self.recognize_page(page)) + + def recognize_page(self, page): + for line in page.lines: + yield ''.join(self.recognize_line(line)) + + def recognize_line(self, line): + for letter in line.letters: + yield self.recognize_letter(letter) + + def recognize_letter(self, letter): + if isinstance(letter, Space): + return ' ' + try: + return self.chardb[letter.key] + except KeyError: + text = self.ask_for_help(letter) + self.chardb[letter.key] = text + return text + + def ask_for_help(self, unknown_letter): + self.unknownLetter.emit(unknown_letter) + return self.receive_help() + + def give_help(self, text): + self.help_queue.put(text) + + def receive_help(self): + return self.help_queue.get()