Rename OCREngine to Document and move it to pixelocr.document.
This commit is contained in:
parent
4e35d56696
commit
f771722d0b
3 changed files with 20 additions and 20 deletions
|
|
@ -37,9 +37,9 @@ from PyQt4.QtGui import (
|
|||
QApplication,
|
||||
)
|
||||
|
||||
from ..document import Document
|
||||
from .guiproxy import GUIProxy
|
||||
from .window import MainWindow
|
||||
from .ocrengine import OCREngine
|
||||
|
||||
|
||||
parser = ArgumentParser(description='PixelOCR')
|
||||
|
|
@ -58,13 +58,13 @@ def load_entry_point(group, name):
|
|||
|
||||
|
||||
class WorkerThread(QThread):
|
||||
def __init__(self, ocr, quit=False):
|
||||
def __init__(self, document, quit=False):
|
||||
super().__init__()
|
||||
self.ocr = ocr
|
||||
self.document = document
|
||||
self.quit = quit
|
||||
|
||||
def run(self):
|
||||
self.ocr.recognize()
|
||||
self.document.recognize()
|
||||
if self.quit:
|
||||
qApp.quit()
|
||||
|
||||
|
|
@ -77,20 +77,20 @@ def main():
|
|||
|
||||
args = parser.parse_args()
|
||||
gui_proxy = GUIProxy()
|
||||
ocr = OCREngine(
|
||||
document = Document(
|
||||
args.filename,
|
||||
ui=gui_proxy,
|
||||
skip=args.skip,
|
||||
limit=args.limit,
|
||||
output_format=load_entry_point('pixelocr.formatting', args.output_format).load()(),
|
||||
)
|
||||
app.aboutToQuit.connect(ocr.save_glyphdb)
|
||||
ocr_thread = WorkerThread(ocr, quit=args.quit)
|
||||
app.aboutToQuit.connect(document.save_glyphdb)
|
||||
worker_thread = WorkerThread(document, quit=args.quit)
|
||||
|
||||
win = MainWindow(ocr)
|
||||
win = MainWindow(document)
|
||||
win.glyphEntered.connect(gui_proxy.give_help)
|
||||
win.show()
|
||||
ocr_thread.start()
|
||||
worker_thread.start()
|
||||
|
||||
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
||||
sys.exit(app.exec_())
|
||||
|
|
|
|||
|
|
@ -1,76 +0,0 @@
|
|||
# Copyright (C) 2014 Andrey Golovizin
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
import itertools
|
||||
from glob import glob
|
||||
from os import path
|
||||
|
||||
from .. import formatting
|
||||
from ..image import Image
|
||||
from ..page import Page, Space
|
||||
from ..glyphdb import GlyphDB, SPACE, NEWLINE
|
||||
|
||||
|
||||
class OCREngine(object):
|
||||
SPACE_WIDTH = 15
|
||||
|
||||
def __init__(self, dirname, ui, skip=0, limit=None, output_format='text'):
|
||||
super().__init__()
|
||||
self.dirname = dirname
|
||||
self.ui = ui
|
||||
self.filenames = sorted(glob(path.join(dirname, '*.png')))[skip:skip + limit if limit else None]
|
||||
self.glyphdb = GlyphDB(path.join(self.dirname, 'glyphdb.pickle'))
|
||||
self.output_format = output_format
|
||||
self.last_style = (False, False, (255, 255, 255)) # FIXME get rid of hardcoded value
|
||||
|
||||
def save_glyphdb(self):
|
||||
self.glyphdb.save()
|
||||
|
||||
def load_page(self, filename):
|
||||
return Page(Image.fromfile(filename), filename)
|
||||
|
||||
def recognize(self):
|
||||
for filename in self.filenames:
|
||||
page = self.load_page(filename)
|
||||
self.ui.turn_page(page)
|
||||
page_text = self.recognize_page(page)
|
||||
print(page_text)
|
||||
with open(filename + self.output_format.suffix, 'w') as page_text_file:
|
||||
page_text_file.write(page_text)
|
||||
|
||||
def recognize_page(self, page):
|
||||
glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines))
|
||||
return ''.join(self.output_format.format(glyph_data_seq))
|
||||
|
||||
def recognize_line(self, line):
|
||||
yield from [SPACE] * int(line.indent / self.SPACE_WIDTH)
|
||||
for glyph in line.glyphs:
|
||||
yield self.recognize_glyph(glyph)
|
||||
yield NEWLINE
|
||||
|
||||
def recognize_glyph(self, glyph):
|
||||
self.ui.process_events()
|
||||
|
||||
if isinstance(glyph, Space):
|
||||
return SPACE
|
||||
try:
|
||||
glyph_data = self.glyphdb[glyph]
|
||||
except KeyError:
|
||||
text, bold, italic = self.ui.ask_for_help(glyph)
|
||||
glyph_data = self.glyphdb.add_glyph(glyph, text, bold, italic)
|
||||
self.last_style = glyph_data.style
|
||||
return glyph_data
|
||||
|
||||
|
|
@ -42,11 +42,11 @@ class MainWindow(QMainWindow):
|
|||
dbedit = None
|
||||
glyphEntered = signal([str, bool, bool])
|
||||
|
||||
def __init__(self, ocr):
|
||||
def __init__(self, document):
|
||||
super().__init__()
|
||||
self.setDocumentTitle(None)
|
||||
|
||||
self.ocr = ocr
|
||||
self.document = document
|
||||
|
||||
centralWidget = QWidget(self)
|
||||
self.setCentralWidget(centralWidget)
|
||||
|
|
@ -54,14 +54,14 @@ class MainWindow(QMainWindow):
|
|||
self.page = PageView(self.pageScene, centralWidget)
|
||||
self.glyphEdit = GlyphEdit(centralWidget)
|
||||
self.glyphEdit.setEnabled(False)
|
||||
self.glyphDBEdit = GlyphDBEdit(self.ocr.glyphdb)
|
||||
self.glyphDBEdit = GlyphDBEdit(self.document.glyphdb)
|
||||
|
||||
self.glyphEdit.glyphEntered.connect(self.unknownGlyphEntered)
|
||||
self.glyphEdit.glyphEntered.connect(self.pageScene.clearHighlight)
|
||||
self.glyphEdit.glyphEntered.connect(self.glyphDBEdit.updateData)
|
||||
ocr.ui.pageChanged.connect(self.pageScene.setPage)
|
||||
ocr.ui.pageChanged.connect(self.showPageTitle)
|
||||
ocr.ui.unknownGlyph.connect(self.unknownGlyph)
|
||||
document.ui.pageChanged.connect(self.pageScene.setPage)
|
||||
document.ui.pageChanged.connect(self.showPageTitle)
|
||||
document.ui.unknownGlyph.connect(self.unknownGlyph)
|
||||
self.page.setFocusProxy(self.glyphEdit)
|
||||
|
||||
layout = QVBoxLayout(centralWidget)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue