Add HTML output format.

This commit is contained in:
Andrey Golovizin 2014-09-04 17:47:07 +02:00
parent 296035c966
commit a59d528253
2 changed files with 19 additions and 1 deletions

View file

@ -70,3 +70,21 @@ class TextFormat(OutputFormat):
def format_tag(self, tag, text):
return text
class HTMLFormat(OutputFormat):
def assign_glyph_tag(self, glyph_data):
style = glyph_data.style
if style.bold:
return 'b'
elif style.italic:
return 'i'
else:
return None
def format_tag(self, tag, text):
from xml.sax.saxutils import escape
if tag:
return '<{tag}>{text}</{tag}>'.format(tag=tag, text=escape(text))
else:
return text

View file

@ -67,7 +67,7 @@ class OCREngine(QThread):
def recognize_page(self, page):
glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines))
output_format = formatting.TextFormat()
output_format = formatting.HTMLFormat()
return ''.join(output_format.format(glyph_data_seq))
def recognize_line(self, line):