diff --git a/pixelocr/glyphdb.py b/pixelocr/glyphdb.py index f7d1b6e..e5be0c0 100644 --- a/pixelocr/glyphdb.py +++ b/pixelocr/glyphdb.py @@ -18,34 +18,90 @@ import pickle from collections import OrderedDict from os import path +from .utils import cached_property +from .image import Image + + +class GlyphData(object): + def __init__(self, image_data, elevation, text, bold=False, italic=False): + self.image_data = image_data + self.elevation = elevation + self.text = text + self.bold = bold + self.italic = italic + + @classmethod + def from_glyph(cls, glyph, *args, **kwargs): + return cls(glyph.image.serialize(), glyph.elevation, *args, **kwargs) + + @property + def color(self): + return self.image.color + + @cached_property + def image(self): + return Image.deserialize(self.image_data) + + def serialize(self): + return ( + self.image_data, + self.elevation, + self.text, + self.bold, + self.italic, + ) + + @classmethod + def deserialize(cls, args): + return cls(*args) + class GlyphDB(object): def __init__(self, filename): self.filename = filename + self._dict = OrderedDict() if path.isfile(self.filename): - with open(self.filename, 'rb') as fileobj: - self.data = pickle.load(fileobj) - else: - self.data = OrderedDict() + self.load() - def __getitem__(self, key): - return self.data.__getitem__(key) + def _key_from_glyph(self, glyph): + return (glyph.image.serialize(), glyph.elevation) - def __setitem__(self, key, value): - return self.data.__setitem__(key, value) + def _key_from_data(self, data): + return (data.image_data, data.elevation) - def __delitem__(self, key): - return self.data.__delitem__(key) + def __getitem__(self, glyph): + key = self._key_from_glyph(glyph) + return self._dict[key] + + def add_glyph(self, glyph, text, bold=False, italic=False): + data = GlyphData.from_glyph(glyph, text, bold=bold, italic=italic) + key = self._key_from_glyph(glyph) + self._dict[key] = data + + def update(self, data): + key = self._key_from_data(data) + self._dict[key] = data + + def remove(self, data): + key = self._key_from_data(data) + del self._dict[key] def keys(self): - return self.data.keys() + return self._dict.keys() def items(self): - return self.data.items() + return self._dict.items() def values(self): - return self.data.values() + return self._dict.values() + + def load(self): + with open(self.filename, 'rb') as fileobj: + data = pickle.load(fileobj) + for item in data: + self.update(GlyphData.deserialize(item)) def save(self): + data = [data.serialize() for data in self.values()] with open(self.filename, 'wb') as fileobj: - pickle.dump(self.data, fileobj) + pickle.dump(data, fileobj) diff --git a/pixelocr/gui/glyphdbedit.py b/pixelocr/gui/glyphdbedit.py index 1b319ed..0631adf 100644 --- a/pixelocr/gui/glyphdbedit.py +++ b/pixelocr/gui/glyphdbedit.py @@ -36,27 +36,14 @@ class GlyphDBModel(QAbstractTableModel): def __init__(self, glyphdb, parent=None): super().__init__(parent) self.glyphdb = glyphdb - self.keys = list(glyphdb.keys()) - self.images = {} + self.values = list(glyphdb.values()) def rowCount(self, parent): - return len(self.keys) + return len(self.values) def columnCount(self, parent): return 2 - def _deserialize_image(self, key): - elevation_, serialized_image = key - return Image.deserialize(serialized_image).toqimage() - - def get_image(self, key): - try: - image = self.images[key] - except KeyError: - image = self._deserialize_image(key) - self.images[key] = image - return image - def headerData(self, section, orientation, role): if orientation != Qt.Horizontal or role != Qt.DisplayRole: return None @@ -66,24 +53,23 @@ class GlyphDBModel(QAbstractTableModel): return 'Elevation' def data(self, index, role): + data = self.values[index.row()] if index.column() == 0: - key = self.keys[index.row()] if role == Qt.DisplayRole: - return self.glyphdb[key] + return data.text elif role == Qt.DecorationRole: - return self.get_image(key) + return data.image.qimage elif index.column() == 1: if role == Qt.DisplayRole: - elevation, _ = self.keys[index.row()] - return str(elevation) + return str(data.elevation) def removeRows(self, row, count, parent=None): self.beginRemoveRows(parent, row, row + count - 1) - keys = self.keys[row: row + count] - for key in keys: - del self.glyphdb[key] - del self.keys[row] + values = self.values[row: row + count] + for value in values: + self.glyphdb.remove(value) + del self.values[row] self.endRemoveRows() return True @@ -91,16 +77,15 @@ class GlyphDBModel(QAbstractTableModel): def sort(self, column, order): key_func = None if column == 0: - def key_func(key): - return self.glyphdb[key] + def key_func(value): + return value.text elif column == 1: - def key_func(key): - elevation, *_ = key - return elevation + def key_func(value): + return value.elevation if key_func: self.layoutAboutToBeChanged.emit() - self.keys.sort(key=key_func, reverse = (order == Qt.DescendingOrder)) + self.values.sort(key=key_func, reverse = (order == Qt.DescendingOrder)) self.layoutChanged.emit() diff --git a/pixelocr/gui/ocrengine.py b/pixelocr/gui/ocrengine.py index c58903c..7674aec 100644 --- a/pixelocr/gui/ocrengine.py +++ b/pixelocr/gui/ocrengine.py @@ -75,10 +75,10 @@ class OCREngine(QThread): if isinstance(glyph, Space): return ' ' try: - return self.glyphdb[glyph.key] + return self.glyphdb[glyph].text except KeyError: text = self.ask_for_help(glyph) - self.glyphdb[glyph.key] = text + self.glyphdb.add_glyph(glyph, text) return text def ask_for_help(self, unknown_glyph): diff --git a/pixelocr/gui/pageview.py b/pixelocr/gui/pageview.py index 972871b..8f4fd17 100644 --- a/pixelocr/gui/pageview.py +++ b/pixelocr/gui/pageview.py @@ -84,7 +84,7 @@ class PageScene(QGraphicsScene): self.addRect(glyph.x - 1, glyph.y - 1, glyph.width + 1, glyph.height + 1, self.spacePen, self.spaceBrush) def addPage(self, page): - qimage = page.image.toqimage() + qimage = page.image.qimage graphicsitem = self.addPixmap(QPixmap.fromImage(qimage)) return graphicsitem diff --git a/pixelocr/image.py b/pixelocr/image.py index 7f57984..57a9762 100644 --- a/pixelocr/image.py +++ b/pixelocr/image.py @@ -185,7 +185,8 @@ class Image(object): array = np.fromstring(data, dtype=np.uint8).reshape(shape) return cls(array) - def toqimage(self): + @cached_property + def qimage(self): from PyQt4.QtGui import QImage return QImage( self.data.astype(np.uint8).data, diff --git a/pixelocr/page.py b/pixelocr/page.py index 910cfa1..3e07de5 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -251,11 +251,6 @@ class Glyph(PageObject): self.elevation = elevation self.line = line - @property - def key(self): - """Return a dictionary key uniquely representing this glyph.""" - return self.elevation, self.image.serialize() - def is_body(self): """Return True if the glyph is definitely not diacritic.""" return self.height >= self.MIN_BODY_HEIGHT