diff --git a/pixelocr/gui/pageview.py b/pixelocr/gui/pageview.py index 764051a..53c4355 100644 --- a/pixelocr/gui/pageview.py +++ b/pixelocr/gui/pageview.py @@ -68,7 +68,8 @@ class PageScene(QGraphicsScene): for letter in word: if not letter.image.isspace: self.addRect(letter.x1, letter.y1, letter.width, letter.height, letterPen, letterBrush) - self.addRect(line.x1, line.y1, line.width, line.height, Qt.red) + self.addLine(line.x1, line.y1 + line.baseline, line.x2, line.y1 + line.baseline, linePen) +# self.addRect(line.x1, line.y1, line.width, line.height, Qt.red) def addPage(self, page): qimage = ndimage2qimage(page.image.data) diff --git a/pixelocr/page.py b/pixelocr/page.py index 1d34fdc..1d215c2 100644 --- a/pixelocr/page.py +++ b/pixelocr/page.py @@ -14,7 +14,9 @@ # along with this program. If not, see . +import numpy as np from scipy import ndimage +from scipy.ndimage import filters from .utils import cached_property, collect_iterable from .image import Image @@ -85,6 +87,16 @@ class Line(PageObject): for word in self.words: yield from word.letters + @cached_property + def baseline(self): + """Detect baseline height, relative to the top.""" + bitmap = self.image.bitmap.astype(np.float) + histogram = bitmap.sum(axis=1) + gradient = filters.convolve1d(histogram, [1, -1], axis=0) + # top = gradient.argmax() + bottom = gradient.argmin() + return bottom + class Word(PageObject): def __iter__(self):