Use entry points for output formats, add --output-format option.
This commit is contained in:
parent
a59d528253
commit
69c5b4ce0a
3 changed files with 25 additions and 5 deletions
|
|
@ -19,6 +19,8 @@ import signal
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
|
|
||||||
|
from pkg_resources import iter_entry_points
|
||||||
|
|
||||||
import sip
|
import sip
|
||||||
sip.setapi('QString', 2)
|
sip.setapi('QString', 2)
|
||||||
|
|
||||||
|
|
@ -38,9 +40,17 @@ parser = ArgumentParser(description='PixelOCR')
|
||||||
parser.add_argument('--skip', metavar='N', type=int, default=0)
|
parser.add_argument('--skip', metavar='N', type=int, default=0)
|
||||||
parser.add_argument('--limit', metavar='N', type=int, default=None)
|
parser.add_argument('--limit', metavar='N', type=int, default=None)
|
||||||
parser.add_argument('--quit', action='store_true')
|
parser.add_argument('--quit', action='store_true')
|
||||||
|
parser.add_argument('-f', '--output-format', type=str, default='text')
|
||||||
parser.add_argument('filename')
|
parser.add_argument('filename')
|
||||||
|
|
||||||
|
|
||||||
|
def load_entry_point(group, name):
|
||||||
|
try:
|
||||||
|
return next(iter_entry_points(group, name))
|
||||||
|
except StopIteration:
|
||||||
|
raise ValueError('Entry point {} in group {} not found'.format(name, group))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
app = QApplication(sys.argv)
|
app = QApplication(sys.argv)
|
||||||
|
|
||||||
|
|
@ -48,7 +58,13 @@ def main():
|
||||||
QApplication.setApplicationName("PixelOCR");
|
QApplication.setApplicationName("PixelOCR");
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
ocr = OCREngine(args.filename, skip=args.skip, limit=args.limit, quit=args.quit)
|
ocr = OCREngine(
|
||||||
|
args.filename,
|
||||||
|
skip=args.skip,
|
||||||
|
limit=args.limit,
|
||||||
|
quit=args.quit,
|
||||||
|
output_format=load_entry_point('pixelocr.formatting', args.output_format).load()(),
|
||||||
|
)
|
||||||
app.aboutToQuit.connect(ocr.save_glyphdb)
|
app.aboutToQuit.connect(ocr.save_glyphdb)
|
||||||
|
|
||||||
win = MainWindow(ocr)
|
win = MainWindow(ocr)
|
||||||
|
|
|
||||||
|
|
@ -39,13 +39,14 @@ class OCREngine(QThread):
|
||||||
unknownGlyph = signal([Glyph])
|
unknownGlyph = signal([Glyph])
|
||||||
pageChanged = signal([Page])
|
pageChanged = signal([Page])
|
||||||
|
|
||||||
def __init__(self, dirname, skip=0, limit=None, quit=False):
|
def __init__(self, dirname, skip=0, limit=None, quit=False, output_format='text'):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.dirname = dirname
|
self.dirname = dirname
|
||||||
self.filenames = sorted(glob(path.join(dirname, '*.png')))[skip:skip + limit if limit else None]
|
self.filenames = sorted(glob(path.join(dirname, '*.png')))[skip:skip + limit if limit else None]
|
||||||
self.glyphdb = GlyphDB(path.join(self.dirname, 'glyphdb.pickle'))
|
self.glyphdb = GlyphDB(path.join(self.dirname, 'glyphdb.pickle'))
|
||||||
self.help_queue = Queue()
|
self.help_queue = Queue()
|
||||||
self.quit = quit
|
self.quit = quit
|
||||||
|
self.output_format = output_format
|
||||||
|
|
||||||
def save_glyphdb(self):
|
def save_glyphdb(self):
|
||||||
self.glyphdb.save()
|
self.glyphdb.save()
|
||||||
|
|
@ -67,8 +68,7 @@ class OCREngine(QThread):
|
||||||
|
|
||||||
def recognize_page(self, page):
|
def recognize_page(self, page):
|
||||||
glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines))
|
glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines))
|
||||||
output_format = formatting.HTMLFormat()
|
return ''.join(self.output_format.format(glyph_data_seq))
|
||||||
return ''.join(output_format.format(glyph_data_seq))
|
|
||||||
|
|
||||||
def recognize_line(self, line):
|
def recognize_line(self, line):
|
||||||
yield from [SPACE] * int(line.indent / self.SPACE_WIDTH)
|
yield from [SPACE] * int(line.indent / self.SPACE_WIDTH)
|
||||||
|
|
|
||||||
6
setup.py
6
setup.py
|
|
@ -18,7 +18,11 @@ setup(
|
||||||
'Operating System :: OS Independent',
|
'Operating System :: OS Independent',
|
||||||
],
|
],
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': ['pixelocr = pixelocr.gui:main']
|
'console_scripts': ['pixelocr = pixelocr.gui:main'],
|
||||||
|
'pixelocr.formatting': [
|
||||||
|
'text = pixelocr.formatting:TextFormat',
|
||||||
|
'html = pixelocr.formatting:HTMLFormat',
|
||||||
|
],
|
||||||
},
|
},
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue