Add HTML output format.
This commit is contained in:
parent
296035c966
commit
a59d528253
2 changed files with 19 additions and 1 deletions
|
|
@ -70,3 +70,21 @@ class TextFormat(OutputFormat):
|
||||||
|
|
||||||
def format_tag(self, tag, text):
|
def format_tag(self, tag, text):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
class HTMLFormat(OutputFormat):
|
||||||
|
def assign_glyph_tag(self, glyph_data):
|
||||||
|
style = glyph_data.style
|
||||||
|
if style.bold:
|
||||||
|
return 'b'
|
||||||
|
elif style.italic:
|
||||||
|
return 'i'
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def format_tag(self, tag, text):
|
||||||
|
from xml.sax.saxutils import escape
|
||||||
|
if tag:
|
||||||
|
return '<{tag}>{text}</{tag}>'.format(tag=tag, text=escape(text))
|
||||||
|
else:
|
||||||
|
return text
|
||||||
|
|
|
||||||
|
|
@ -67,7 +67,7 @@ class OCREngine(QThread):
|
||||||
|
|
||||||
def recognize_page(self, page):
|
def recognize_page(self, page):
|
||||||
glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines))
|
glyph_data_seq = itertools.chain(*(self.recognize_line(line) for line in page.lines))
|
||||||
output_format = formatting.TextFormat()
|
output_format = formatting.HTMLFormat()
|
||||||
return ''.join(output_format.format(glyph_data_seq))
|
return ''.join(output_format.format(glyph_data_seq))
|
||||||
|
|
||||||
def recognize_line(self, line):
|
def recognize_line(self, line):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue