diff --git a/pixelocr/gui/pageview.py b/pixelocr/gui/pageview.py index 352a380..fa387e1 100644 --- a/pixelocr/gui/pageview.py +++ b/pixelocr/gui/pageview.py @@ -60,12 +60,13 @@ class PageScene(QGraphicsScene): self.pageItem.setGraphicsEffect(shadow) letterPen = QPen(QColor(50, 50, 50, 100)) - letterBrush = QBrush(QColor(255, 255, 0, 60)) + letterBrush = QBrush(QColor(255, 255, 0, 80)) linePen = QPen(QColor(255, 150, 150, 100)) for line in page: - for letter in line: - if not letter.image.isspace: - self.addRect(letter.x1, letter.y1, letter.width, letter.height, letterPen, letterBrush) + for word in line: + for letter in word: + if not letter.image.isspace: + self.addRect(letter.x1, letter.y1, letter.width, letter.height, letterPen, letterBrush) self.addRect(line.x1, line.y1, line.width, line.height, Qt.red) def addPage(self, page): diff --git a/pixelocr/image.py b/pixelocr/image.py index d9a5316..2cbb984 100644 --- a/pixelocr/image.py +++ b/pixelocr/image.py @@ -151,22 +151,36 @@ class Image(object): bottom_margin = _get_margin_height(reversed(self.bitmap)) return self[top_margin:self.height - bottom_margin, :] - def _iter_lines(self, min_space, T=False): - line_start = None - prev_line_end = 0 + def _iter_lines(self, min_space): + def iter_lines(): + line_start = None + prev_line_end = 0 - for i, row in enumerate(self.bitmap): - if _is_nonblank(row): - if line_start is None: - line_start = i - height = line_start - prev_line_end - if height >= min_space: - yield self[prev_line_end:line_start] - else: - if line_start is not None: - yield self[line_start:i,:] - line_start = None - prev_line_end = i + for i, row in enumerate(self.bitmap): + if _is_nonblank(row): + if line_start is None: + line_start = i + else: + if line_start is not None: + yield self[line_start:i,:] + line_start = None + prev_line_end = i + + def merge_lines(lines): + prev_line = None + for line in lines: + if prev_line is None: + prev_line = line + else: + if line.y1 - prev_line.y2 < min_space: + prev_line = self[prev_line.y1:line.y2] + else: + yield prev_line + prev_line = line + if prev_line is not None: + yield prev_line + + return merge_lines(iter_lines()) class SubImage(Image): diff --git a/pixelocr/page.py b/pixelocr/page.py index 074c3b3..3d74acb 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -14,9 +14,14 @@ # along with this program. If not, see . +from scipy import ndimage + from .image import Image +CONNECTIVITY8 = ndimage.generate_binary_structure(2, 2) + + class PageObject(object): def __init__(self, image): self.image = image @@ -55,14 +60,24 @@ class PageObject(object): class Page(PageObject): def __iter__(self): - for line_img in self.image._iter_lines(min_space=200): + for line_img in self.image._iter_lines(min_space=5): yield Line(line_img) class Line(PageObject): def __iter__(self): - for rotated_letter_img in self.image.T._iter_lines(min_space=10, T=True): - yield Letter(rotated_letter_img.T.strip()) + for rotated_word_img in self.image.T._iter_lines(min_space=10): + yield Word(rotated_word_img.T) + + +class Word(PageObject): + def __iter__(self): + labels, max_label = ndimage.label(self.image.bitmap, CONNECTIVITY8) + obj_indices = ndimage.find_objects(labels, max_label) + letter_images = (self.image[obj_index] for obj_index in obj_indices) + letters = (Letter(image) for image in letter_images) + sorted_letters = sorted(letters, key=lambda letter: (letter.x1, -letter.y1)) + return iter(sorted_letters) class Letter(PageObject):