diff --git a/pixelocr/gui/__init__.py b/pixelocr/gui/__init__.py index 8758514..0e8e670 100644 --- a/pixelocr/gui/__init__.py +++ b/pixelocr/gui/__init__.py @@ -19,6 +19,8 @@ import signal from threading import Thread from argparse import ArgumentParser +from pkg_resources import iter_entry_points + import sip sip.setapi('QString', 2) @@ -38,9 +40,17 @@ parser = ArgumentParser(description='PixelOCR') parser.add_argument('--skip', metavar='N', type=int, default=0) parser.add_argument('--limit', metavar='N', type=int, default=None) parser.add_argument('--quit', action='store_true') +parser.add_argument('-f', '--output-format', type=str, default='text') parser.add_argument('filename') +def load_entry_point(group, name): + try: + return next(iter_entry_points(group, name)) + except StopIteration: + raise ValueError('Entry point {} in group {} not found'.format(name, group)) + + def main(): app = QApplication(sys.argv) @@ -48,7 +58,13 @@ def main(): QApplication.setApplicationName("PixelOCR"); args = parser.parse_args() - ocr = OCREngine(args.filename, skip=args.skip, limit=args.limit, quit=args.quit) + ocr = OCREngine( + args.filename, + skip=args.skip, + limit=args.limit, + quit=args.quit, + output_format=load_entry_point('pixelocr.formatting', args.output_format).load()(), + ) app.aboutToQuit.connect(ocr.save_glyphdb) win = MainWindow(ocr) diff --git a/pixelocr/gui/ocrengine.py b/pixelocr/gui/ocrengine.py index 13f3dcd..505a6fa 100644 --- a/pixelocr/gui/ocrengine.py +++ b/pixelocr/gui/ocrengine.py @@ -39,13 +39,14 @@ class OCREngine(QThread): unknownGlyph = signal([Glyph]) pageChanged = signal([Page]) - def __init__(self, dirname, skip=0, limit=None, quit=False): + def __init__(self, dirname, skip=0, limit=None, quit=False, output_format='text'): super().__init__() self.dirname = dirname self.filenames = sorted(glob(path.join(dirname, '*.png')))[skip:skip + limit if limit else None] self.glyphdb = GlyphDB(path.join(self.dirname, 'glyphdb.pickle')) self.help_queue = Queue() self.quit = quit + self.output_format = output_format def save_glyphdb(self): self.glyphdb.save() @@ -67,8 +68,7 @@ class OCREngine(QThread): def recognize_page(self, page): glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines)) - output_format = formatting.HTMLFormat() - return ''.join(output_format.format(glyph_data_seq)) + return ''.join(self.output_format.format(glyph_data_seq)) def recognize_line(self, line): yield from [SPACE] * int(line.indent / self.SPACE_WIDTH) diff --git a/setup.py b/setup.py index 63f97bd..e08a6b5 100755 --- a/setup.py +++ b/setup.py @@ -18,7 +18,11 @@ setup( 'Operating System :: OS Independent', ], entry_points={ - 'console_scripts': ['pixelocr = pixelocr.gui:main'] + 'console_scripts': ['pixelocr = pixelocr.gui:main'], + 'pixelocr.formatting': [ + 'text = pixelocr.formatting:TextFormat', + 'html = pixelocr.formatting:HTMLFormat', + ], }, packages=find_packages(), )