95 lines
2.8 KiB
Python
95 lines
2.8 KiB
Python
# Copyright (C) 2014 Andrey Golovizin
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
import itertools
|
|
|
|
from .utils import pipe
|
|
from .glyphdb import WhitespaceData
|
|
|
|
|
|
class OutputFormat(object):
|
|
suffix = ''
|
|
last_tag = None
|
|
|
|
def __call__(self, glyph_data_stream):
|
|
return self.format(glyph_data_stream)
|
|
|
|
def format(self, glyph_data_seq):
|
|
last_tag = None
|
|
return pipe(glyph_data_seq, self.group_by_tag, self.fix_spaces, self.format_tags)
|
|
|
|
def group_by_tag(self, styled_glyphs):
|
|
for tag, group in itertools.groupby(styled_glyphs, key=self.assign_tag):
|
|
text = ''.join(glyph_data.text for glyph_data in group)
|
|
yield tag, text
|
|
|
|
def fix_spaces(self, tagged_text):
|
|
for tag, text in tagged_text:
|
|
stripped_text = text.rstrip()
|
|
if stripped_text and len(stripped_text) < len(text):
|
|
traling_whitespace = text[len(stripped_text):]
|
|
yield tag, stripped_text
|
|
yield None, traling_whitespace
|
|
else:
|
|
yield tag, text
|
|
|
|
def format_tags(self, tagged_text):
|
|
for tag, text in tagged_text:
|
|
yield self.format_tag(tag, text)
|
|
|
|
def assign_tag(self, glyph_data):
|
|
if isinstance(glyph_data, WhitespaceData):
|
|
return self.last_tag
|
|
else:
|
|
tag = self.assign_glyph_tag(glyph_data)
|
|
self.last_tag = tag
|
|
return tag
|
|
|
|
def assign_glyph_tag(self, glyph_data):
|
|
raise NotImplementedError
|
|
|
|
def format_tag(self, tag, text):
|
|
raise NotImplementedError
|
|
|
|
|
|
class TextFormat(OutputFormat):
|
|
suffix = '.txt'
|
|
def assign_tag(self, glyph_data):
|
|
return None
|
|
|
|
def format_tag(self, tag, text):
|
|
return text
|
|
|
|
|
|
class HTMLFormat(OutputFormat):
|
|
suffix = '.html'
|
|
def assign_glyph_tag(self, glyph_data):
|
|
style = glyph_data.style
|
|
if style.bold:
|
|
return 'b'
|
|
elif style.italic:
|
|
return 'i'
|
|
else:
|
|
return None
|
|
|
|
def format_tag(self, tag, text, escape=True):
|
|
from xml.sax import saxutils
|
|
if escape:
|
|
text = saxutils.escape(text)
|
|
if tag:
|
|
return '<{tag}>{text}</{tag}>'.format(tag=tag, text=text)
|
|
else:
|
|
return text
|