Add HTML output format.
This commit is contained in:
parent
296035c966
commit
a59d528253
2 changed files with 19 additions and 1 deletions
|
|
@ -70,3 +70,21 @@ class TextFormat(OutputFormat):
|
|||
|
||||
def format_tag(self, tag, text):
|
||||
return text
|
||||
|
||||
|
||||
class HTMLFormat(OutputFormat):
|
||||
def assign_glyph_tag(self, glyph_data):
|
||||
style = glyph_data.style
|
||||
if style.bold:
|
||||
return 'b'
|
||||
elif style.italic:
|
||||
return 'i'
|
||||
else:
|
||||
return None
|
||||
|
||||
def format_tag(self, tag, text):
|
||||
from xml.sax.saxutils import escape
|
||||
if tag:
|
||||
return '<{tag}>{text}</{tag}>'.format(tag=tag, text=escape(text))
|
||||
else:
|
||||
return text
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ class OCREngine(QThread):
|
|||
|
||||
def recognize_page(self, page):
|
||||
glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines))
|
||||
output_format = formatting.TextFormat()
|
||||
output_format = formatting.HTMLFormat()
|
||||
return ''.join(output_format.format(glyph_data_seq))
|
||||
|
||||
def recognize_line(self, line):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue