Leptonica 1.83.1
Image processing and image analysis suite
Loading...
Searching...
No Matches
pageseg.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
81
82#ifdef HAVE_CONFIG_H
83#include <config_auto.h>
84#endif /* HAVE_CONFIG_H */
85
86#include <math.h>
87#include "allheaders.h"
88#include "pix_internal.h"
89
90 /* These functions are not intended to work on very low-res images */
91static const l_int32 MinWidth = 100;
92static const l_int32 MinHeight = 100;
93
94/*------------------------------------------------------------------*
95 * Top level page segmentation *
96 *------------------------------------------------------------------*/
113l_ok
115 PIX **ppixhm,
116 PIX **ppixtm,
117 PIX **ppixtb,
118 PIXA *pixadb)
119{
120l_int32 w, h, htfound, tlfound;
121PIX *pixr, *pix1, *pix2;
122PIX *pixtext; /* text pixels only */
123PIX *pixhm2; /* halftone mask; 2x reduction */
124PIX *pixhm; /* halftone mask; */
125PIX *pixtm2; /* textline mask; 2x reduction */
126PIX *pixtm; /* textline mask */
127PIX *pixvws; /* vertical white space mask */
128PIX *pixtb2; /* textblock mask; 2x reduction */
129PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */
130PIX *pixtb; /* textblock mask */
131
132 if (ppixhm) *ppixhm = NULL;
133 if (ppixtm) *ppixtm = NULL;
134 if (ppixtb) *ppixtb = NULL;
135 if (!pixs || pixGetDepth(pixs) != 1)
136 return ERROR_INT("pixs undefined or not 1 bpp", __func__, 1);
137 pixGetDimensions(pixs, &w, &h, NULL);
138 if (w < MinWidth || h < MinHeight) {
139 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
140 return 1;
141 }
142
143 /* 2x reduce, to 150 -200 ppi */
144 pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
145 if (pixadb) pixaAddPix(pixadb, pixr, L_COPY);
146
147 /* Get the halftone mask */
148 pixhm2 = pixGenerateHalftoneMask(pixr, &pixtext, &htfound, pixadb);
149
150 /* Get the textline mask from the text pixels */
151 pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, pixadb);
152
153 /* Get the textblock mask from the textline mask */
154 pixtb2 = pixGenTextblockMask(pixtm2, pixvws, pixadb);
155 pixDestroy(&pixr);
156 pixDestroy(&pixtext);
157 pixDestroy(&pixvws);
158
159 /* Remove small components from the mask, where a small
160 * component is defined as one with both width and height < 60 */
161 pixtbf2 = NULL;
162 if (pixtb2) {
163 pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
164 L_SELECT_IF_GTE, NULL);
165 pixDestroy(&pixtb2);
166 if (pixadb) pixaAddPix(pixadb, pixtbf2, L_COPY);
167 }
168
169 /* Expand all masks to full resolution, and do filling or
170 * small dilations for better coverage. */
171 pixhm = pixExpandReplicate(pixhm2, 2);
172 pix1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
173 pixOr(pixhm, pixhm, pix1);
174 pixDestroy(&pixhm2);
175 pixDestroy(&pix1);
176 if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
177
178 pix1 = pixExpandReplicate(pixtm2, 2);
179 pixtm = pixDilateBrick(NULL, pix1, 3, 3);
180 pixDestroy(&pixtm2);
181 pixDestroy(&pix1);
182 if (pixadb) pixaAddPix(pixadb, pixtm, L_COPY);
183
184 if (pixtbf2) {
185 pix1 = pixExpandReplicate(pixtbf2, 2);
186 pixtb = pixDilateBrick(NULL, pix1, 3, 3);
187 pixDestroy(&pixtbf2);
188 pixDestroy(&pix1);
189 if (pixadb) pixaAddPix(pixadb, pixtb, L_COPY);
190 } else {
191 pixtb = pixCreateTemplate(pixs); /* empty mask */
192 }
193
194 /* Debug: identify objects that are neither text nor halftone image */
195 if (pixadb) {
196 pix1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */
197 pix2 = pixSubtract(NULL, pix1, pixhm); /* remove halftone pixels */
198 pixaAddPix(pixadb, pix2, L_INSERT);
199 pixDestroy(&pix1);
200 }
201
202 /* Debug: display textline components with random colors */
203 if (pixadb) {
204 l_int32 w, h;
205 BOXA *boxa;
206 PIXA *pixa;
207 boxa = pixConnComp(pixtm, &pixa, 8);
208 pixGetDimensions(pixtm, &w, &h, NULL);
209 pix1 = pixaDisplayRandomCmap(pixa, w, h);
210 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
211 pixaAddPix(pixadb, pix1, L_INSERT);
212 pixaDestroy(&pixa);
213 boxaDestroy(&boxa);
214 }
215
216 /* Debug: identify the outlines of each textblock */
217 if (pixadb) {
218 PIXCMAP *cmap;
219 PTAA *ptaa;
220 ptaa = pixGetOuterBordersPtaa(pixtb);
221 lept_mkdir("lept/pageseg");
222 ptaaWriteDebug("/tmp/lept/pageseg/tb_outlines.ptaa", ptaa, 1);
223 pix1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
224 cmap = pixGetColormap(pix1);
225 pixcmapResetColor(cmap, 0, 130, 130, 130);
226 pixaAddPix(pixadb, pix1, L_INSERT);
227 ptaaDestroy(&ptaa);
228 }
229
230 /* Debug: get b.b. for all mask components */
231 if (pixadb) {
232 BOXA *bahm, *batm, *batb;
233 bahm = pixConnComp(pixhm, NULL, 4);
234 batm = pixConnComp(pixtm, NULL, 4);
235 batb = pixConnComp(pixtb, NULL, 4);
236 boxaWriteDebug("/tmp/lept/pageseg/htmask.boxa", bahm);
237 boxaWriteDebug("/tmp/lept/pageseg/textmask.boxa", batm);
238 boxaWriteDebug("/tmp/lept/pageseg/textblock.boxa", batb);
239 boxaDestroy(&bahm);
240 boxaDestroy(&batm);
241 boxaDestroy(&batb);
242 }
243 if (pixadb) {
244 pixaConvertToPdf(pixadb, 0, 1.0, 0, 0, "Debug page segmentation",
245 "/tmp/lept/pageseg/debug.pdf");
246 L_INFO("Writing debug pdf to /tmp/lept/pageseg/debug.pdf\n", __func__);
247 }
248
249 if (ppixhm)
250 *ppixhm = pixhm;
251 else
252 pixDestroy(&pixhm);
253 if (ppixtm)
254 *ppixtm = pixtm;
255 else
256 pixDestroy(&pixtm);
257 if (ppixtb)
258 *ppixtb = pixtb;
259 else
260 pixDestroy(&pixtb);
261
262 return 0;
263}
264
265
266/*------------------------------------------------------------------*
267 * Halftone region extraction *
268 *------------------------------------------------------------------*/
279PIX *
281 PIX **ppixtext,
282 l_int32 *phtfound,
283 l_int32 debug)
284{
285 return pixGenerateHalftoneMask(pixs, ppixtext, phtfound, NULL);
286}
287
288
304PIX *
306 PIX **ppixtext,
307 l_int32 *phtfound,
308 PIXA *pixadb)
309{
310l_int32 w, h, empty;
311PIX *pix1, *pix2, *pixhs, *pixhm, *pixd;
312
313 if (ppixtext) *ppixtext = NULL;
314 if (phtfound) *phtfound = 0;
315 if (!pixs || pixGetDepth(pixs) != 1)
316 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
317 pixGetDimensions(pixs, &w, &h, NULL);
318 if (w < MinWidth || h < MinHeight) {
319 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
320 return NULL;
321 }
322
323 /* Compute seed for halftone parts at 8x reduction */
324 pix1 = pixReduceRankBinaryCascade(pixs, 4, 4, 0, 0);
325 pix2 = pixOpenBrick(NULL, pix1, 5, 5);
326 pixhs = pixExpandReplicate(pix2, 4); /* back to 2x reduction */
327 pixDestroy(&pix1);
328 pixDestroy(&pix2);
329 if (pixadb) pixaAddPix(pixadb, pixhs, L_COPY);
330
331 /* Compute mask for connected regions */
332 pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
333 if (pixadb) pixaAddPix(pixadb, pixhm, L_COPY);
334
335 /* Fill seed into mask to get halftone mask */
336 pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
337 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
338
339#if 0
340 pixOpenBrick(pixd, pixd, 9, 9);
341#endif
342
343 /* Check if mask is empty */
344 pixZero(pixd, &empty);
345 if (phtfound && !empty)
346 *phtfound = 1;
347
348 /* Optionally, get all pixels that are not under the halftone mask */
349 if (ppixtext) {
350 if (empty)
351 *ppixtext = pixCopy(NULL, pixs);
352 else
353 *ppixtext = pixSubtract(NULL, pixs, pixd);
354 if (pixadb) pixaAddPix(pixadb, *ppixtext, L_COPY);
355 }
356
357 pixDestroy(&pixhs);
358 pixDestroy(&pixhm);
359 return pixd;
360}
361
362
363/*------------------------------------------------------------------*
364 * Textline extraction *
365 *------------------------------------------------------------------*/
385PIX *
387 PIX **ppixvws,
388 l_int32 *ptlfound,
389 PIXA *pixadb)
390{
391l_int32 w, h, empty;
392PIX *pix1, *pix2, *pixvws, *pixd;
393
394 if (ptlfound) *ptlfound = 0;
395 if (!ppixvws)
396 return (PIX *)ERROR_PTR("&pixvws not defined", __func__, NULL);
397 *ppixvws = NULL;
398 if (!pixs || pixGetDepth(pixs) != 1)
399 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
400 pixGetDimensions(pixs, &w, &h, NULL);
401 if (w < MinWidth || h < MinHeight) {
402 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
403 return NULL;
404 }
405
406 /* First we need a vertical whitespace mask. Invert the image. */
407 pix1 = pixInvert(NULL, pixs);
408
409 /* The whitespace mask will break textlines where there
410 * is a large amount of white space below or above.
411 * This can be prevented by identifying regions of the
412 * inverted image that have large horizontal extent (bigger than
413 * the separation between columns) and significant
414 * vertical extent (bigger than the separation between
415 * textlines), and subtracting this from the bg. */
416 pix2 = pixMorphCompSequence(pix1, "o80.60", 0);
417 pixSubtract(pix1, pix1, pix2);
418 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
419 pixDestroy(&pix2);
420
421 /* Identify vertical whitespace by opening the remaining bg.
422 * o5.1 removes thin vertical bg lines and o1.200 extracts
423 * long vertical bg lines. */
424 pixvws = pixMorphCompSequence(pix1, "o5.1 + o1.200", 0);
425 *ppixvws = pixvws;
426 if (pixadb) pixaAddPix(pixadb, pixvws, L_COPY);
427 pixDestroy(&pix1);
428
429 /* Three steps to getting text line mask:
430 * (1) close the characters and words in the textlines
431 * (2) open the vertical whitespace corridors back up
432 * (3) small opening to remove noise */
433 pix1 = pixMorphSequence(pixs, "c30.1", 0);
434 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
435 pixd = pixSubtract(NULL, pix1, pixvws);
436 pixOpenBrick(pixd, pixd, 3, 3);
437 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
438 pixDestroy(&pix1);
439
440 /* Check if text line mask is empty */
441 if (ptlfound) {
442 pixZero(pixd, &empty);
443 if (!empty)
444 *ptlfound = 1;
445 }
446
447 return pixd;
448}
449
450
451/*------------------------------------------------------------------*
452 * Textblock extraction *
453 *------------------------------------------------------------------*/
475PIX *
477 PIX *pixvws,
478 PIXA *pixadb)
479{
480l_int32 w, h, empty;
481PIX *pix1, *pix2, *pix3, *pixd;
482
483 if (!pixs || pixGetDepth(pixs) != 1)
484 return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
485 pixGetDimensions(pixs, &w, &h, NULL);
486 if (w < MinWidth || h < MinHeight) {
487 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
488 return NULL;
489 }
490 if (!pixvws)
491 return (PIX *)ERROR_PTR("pixvws not defined", __func__, NULL);
492
493 /* Join pixels vertically to make a textblock mask */
494 pix1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
495 pixZero(pix1, &empty);
496 if (empty) {
497 pixDestroy(&pix1);
498 L_INFO("no fg pixels in textblock mask\n", __func__);
499 return NULL;
500 }
501 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
502
503 /* Solidify the textblock mask and remove noise:
504 * (1) For each cc, close the blocks and dilate slightly
505 * to form a solid mask.
506 * (2) Small horizontal closing between components.
507 * (3) Open the white space between columns, again.
508 * (4) Remove small components. */
509 pix2 = pixMorphSequenceByComponent(pix1, "c30.30 + d3.3", 8, 0, 0, NULL);
510 pixCloseSafeBrick(pix2, pix2, 10, 1);
511 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
512 pix3 = pixSubtract(NULL, pix2, pixvws);
513 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
514 pixd = pixSelectBySize(pix3, 25, 5, 8, L_SELECT_IF_BOTH,
515 L_SELECT_IF_GTE, NULL);
516 if (pixadb) pixaAddPix(pixadb, pixd, L_COPY);
517
518 pixDestroy(&pix1);
519 pixDestroy(&pix2);
520 pixDestroy(&pix3);
521 return pixd;
522}
523
524
525/*------------------------------------------------------------------*
526 * Location of page foreground *
527 *------------------------------------------------------------------*/
563BOX *
565 l_int32 threshold,
566 l_int32 mindist,
567 l_int32 erasedist,
568 l_int32 showmorph,
569 PIXAC *pixac)
570{
571l_int32 flag, nbox, intersects;
572l_int32 w, h, bx, by, bw, bh, left, right, top, bottom;
573PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2;
574BOX *box, *boxfg, *boxin, *boxd;
575BOXA *ba1, *ba2;
576
577 if (!pixs)
578 return (BOX *)ERROR_PTR("pixs not defined", __func__, NULL);
579 pixGetDimensions(pixs, &w, &h, NULL);
580 if (w < MinWidth || h < MinHeight) {
581 L_ERROR("pix too small: w = %d, h = %d\n", __func__, w, h);
582 return NULL;
583 }
584
585 /* Binarize, downscale by 0.5, remove the noise to generate a seed,
586 * and do a seedfill back from the seed into those 8-connected
587 * components of the binarized image for which there was at least
588 * one seed pixel. Also clear out any components that are within
589 * 10 pixels of the edge at 2x reduction. */
590 flag = (showmorph) ? 100 : 0;
591 pixb = pixConvertTo1(pixs, threshold);
592 pixb2 = pixScale(pixb, 0.5, 0.5);
593 pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.3", flag);
594 pix1 = pixMorphSequence(pixb2, "o50.1", 0);
595 pixOr(pixseed, pixseed, pix1);
596 pixDestroy(&pix1);
597 pix1 = pixMorphSequence(pixb2, "o1.50", 0);
598 pixOr(pixseed, pixseed, pix1);
599 pixDestroy(&pix1);
600 pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8);
601 pixSetOrClearBorder(pixsf, 10, 10, 10, 10, PIX_SET);
602 pixm = pixRemoveBorderConnComps(pixsf, 8);
603
604 /* Now, where is the main block of text? We want to remove noise near
605 * the edge of the image, but to do that, we have to be convinced that
606 * (1) there is noise and (2) it is far enough from the text block
607 * and close enough to the edge. For each edge, if the block
608 * is more than mindist from that edge, then clean 'erasedist'
609 * pixels from the edge. */
610 pix1 = pixMorphSequence(pixm, "c50.50", flag);
611 ba1 = pixConnComp(pix1, NULL, 8);
612 ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
613 pixGetDimensions(pix1, &w, &h, NULL);
614 nbox = boxaGetCount(ba2);
615 if (nbox > 1) {
616 box = boxaGetBox(ba2, 0, L_CLONE);
617 boxGetGeometry(box, &bx, &by, &bw, &bh);
618 left = (bx > mindist) ? erasedist : 0;
619 right = (w - bx - bw > mindist) ? erasedist : 0;
620 top = (by > mindist) ? erasedist : 0;
621 bottom = (h - by - bh > mindist) ? erasedist : 0;
622 pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR);
623 boxDestroy(&box);
624 }
625 pixDestroy(&pix1);
626 boxaDestroy(&ba1);
627 boxaDestroy(&ba2);
628
629 /* Locate the foreground region; don't bother cropping */
630 pixClipToForeground(pixm, NULL, &boxfg);
631
632 /* Sanity check the fg region. Make sure it's not confined
633 * to a thin boundary on the left and right sides of the image,
634 * in which case it is likely to be noise. */
635 if (boxfg) {
636 boxin = boxCreate(0.1 * w, 0, 0.8 * w, h);
637 boxIntersects(boxfg, boxin, &intersects);
638 boxDestroy(&boxin);
639 if (!intersects) boxDestroy(&boxfg);
640 }
641
642 boxd = NULL;
643 if (boxfg) {
644 boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */
645 boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0);
646
647 /* Save the debug image showing the box for this page */
648 if (pixac) {
649 pixg2 = pixConvert1To4Cmap(pixb);
650 pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0);
651 pixacompAddPix(pixac, pixg2, IFF_DEFAULT);
652 pixDestroy(&pixg2);
653 }
654 }
655
656 pixDestroy(&pixb);
657 pixDestroy(&pixb2);
658 pixDestroy(&pixseed);
659 pixDestroy(&pixsf);
660 pixDestroy(&pixm);
661 boxDestroy(&boxfg);
662 return boxd;
663}
664
665
666/*------------------------------------------------------------------*
667 * Extraction of characters from image with only text *
668 *------------------------------------------------------------------*/
691l_ok
693 l_int32 minw,
694 l_int32 minh,
695 BOXA **pboxa,
696 PIXA **ppixa,
697 PIX **ppixdebug)
698{
699l_int32 ncomp, i, xoff, yoff;
700BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
701BOXAA *baa;
702PIX *pix, *pix1, *pix2, *pixdb;
703PIXA *pixa1, *pixadb;
704
705 if (pboxa) *pboxa = NULL;
706 if (ppixa) *ppixa = NULL;
707 if (ppixdebug) *ppixdebug = NULL;
708 if (!pixs || pixGetDepth(pixs) != 1)
709 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
710
711 /* Remove the small stuff */
712 pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
713 L_SELECT_IF_GT, NULL);
714
715 /* Small vertical close for consolidation */
716 pix2 = pixMorphSequence(pix1, "c1.10", 0);
717 pixDestroy(&pix1);
718
719 /* Get the 8-connected components */
720 boxa1 = pixConnComp(pix2, &pixa1, 8);
721 pixDestroy(&pix2);
722 boxaDestroy(&boxa1);
723
724 /* Split the components if obvious */
725 ncomp = pixaGetCount(pixa1);
726 boxa2 = boxaCreate(ncomp);
727 pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
728 for (i = 0; i < ncomp; i++) {
729 pix = pixaGetPix(pixa1, i, L_CLONE);
730 if (ppixdebug) {
731 boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
732 if (pixdb)
733 pixaAddPix(pixadb, pixdb, L_INSERT);
734 } else {
735 boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
736 }
737 pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
738 boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
739 boxaJoin(boxa2, boxat2, 0, -1);
740 pixDestroy(&pix);
741 boxaDestroy(&boxat1);
742 boxaDestroy(&boxat2);
743 }
744 pixaDestroy(&pixa1);
745
746 /* Generate the debug image */
747 if (ppixdebug) {
748 if (pixaGetCount(pixadb) > 0) {
749 *ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
750 1.0, 0, 20, 1);
751 }
752 pixaDestroy(&pixadb);
753 }
754
755 /* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
756 baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
757 boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
758 boxaaDestroy(&baa);
759 boxaDestroy(&boxa2);
760
761 /* Optionally extract the pieces from the input image */
762 if (ppixa)
763 *ppixa = pixClipRectangles(pixs, boxad);
764 if (pboxa)
765 *pboxa = boxad;
766 else
767 boxaDestroy(&boxad);
768 return 0;
769}
770
771
790BOXA *
792 l_int32 delta,
793 l_int32 mindel,
794 PIX **ppixdebug)
795{
796l_int32 w, h, n2, i, firstmin, xmin, xshift;
797l_int32 nmin, nleft, nright, nsplit, isplit, ncomp;
798l_int32 *array1, *array2;
799BOX *box;
800BOXA *boxad;
801NUMA *na1, *na2, *nasplit;
802PIX *pix1, *pixdb;
803
804 if (ppixdebug) *ppixdebug = NULL;
805 if (!pixs || pixGetDepth(pixs) != 1)
806 return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", __func__, NULL);
807 pixGetDimensions(pixs, &w, &h, NULL);
808
809 /* Closing to consolidate characters vertically */
810 pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100);
811
812 /* Get extrema of column projections */
813 boxad = boxaCreate(2);
814 na1 = pixCountPixelsByColumn(pix1); /* w elements */
815 pixDestroy(&pix1);
816 na2 = numaFindExtrema(na1, delta, NULL);
817 n2 = numaGetCount(na2);
818 if (n2 < 3) { /* no split possible */
819 box = boxCreate(0, 0, w, h);
820 boxaAddBox(boxad, box, L_INSERT);
821 numaDestroy(&na1);
822 numaDestroy(&na2);
823 return boxad;
824 }
825
826 /* Look for sufficiently deep and narrow minima.
827 * All minima of of interest must be surrounded by max on each
828 * side. firstmin is the index of first possible minimum. */
829 array1 = numaGetIArray(na1);
830 array2 = numaGetIArray(na2);
831 if (ppixdebug) numaWriteStderr(na2);
832 firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2;
833 nasplit = numaCreate(n2); /* will hold split locations */
834 for (i = firstmin; i < n2 - 1; i+= 2) {
835 xmin = array2[i];
836 nmin = array1[xmin];
837 if (xmin + 2 >= w) break; /* no more splits possible */
838 nleft = array1[xmin - 2];
839 nright = array1[xmin + 2];
840 if (ppixdebug) {
841 lept_stderr(
842 "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n",
843 xmin, w, nleft, nmin, nright);
844 }
845 if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */
846 numaAddNumber(nasplit, xmin);
847 }
848 nsplit = numaGetCount(nasplit);
849
850#if 0
851 if (ppixdebug && nsplit > 0) {
852 lept_mkdir("lept/split");
853 gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/split/split", NULL);
854 }
855#endif
856
857 numaDestroy(&na1);
858 numaDestroy(&na2);
859 LEPT_FREE(array1);
860 LEPT_FREE(array2);
861
862 if (nsplit == 0) { /* no splitting */
863 numaDestroy(&nasplit);
864 box = boxCreate(0, 0, w, h);
865 boxaAddBox(boxad, box, L_INSERT);
866 return boxad;
867 }
868
869 /* Use split points to generate b.b. after splitting */
870 for (i = 0, xshift = 0; i < nsplit; i++) {
871 numaGetIValue(nasplit, i, &isplit);
872 box = boxCreate(xshift, 0, isplit - xshift, h);
873 boxaAddBox(boxad, box, L_INSERT);
874 xshift = isplit + 1;
875 }
876 box = boxCreate(xshift, 0, w - xshift, h);
877 boxaAddBox(boxad, box, L_INSERT);
878 numaDestroy(&nasplit);
879
880 if (ppixdebug) {
881 pixdb = pixConvertTo32(pixs);
882 ncomp = boxaGetCount(boxad);
883 for (i = 0; i < ncomp; i++) {
884 box = boxaGetBox(boxad, i, L_CLONE);
885 pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5);
886 boxDestroy(&box);
887 }
888 *ppixdebug = pixdb;
889 }
890
891 return boxad;
892}
893
894
895/*------------------------------------------------------------------*
896 * Extraction of lines of text *
897 *------------------------------------------------------------------*/
944PIXA *
946 l_int32 maxw,
947 l_int32 maxh,
948 l_int32 minw,
949 l_int32 minh,
950 l_int32 adjw,
951 l_int32 adjh,
952 PIXA *pixadb)
953{
954char buf[64];
955l_int32 res, csize, empty;
956BOXA *boxa1, *boxa2, *boxa3;
957PIX *pix1, *pix2, *pix3;
958PIXA *pixa1, *pixa2, *pixa3;
959
960 if (!pixs)
961 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
962
963 /* Binarize carefully, if necessary */
964 if (pixGetDepth(pixs) > 1) {
965 pix2 = pixConvertTo8(pixs, FALSE);
966 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
967 pix1 = pixThresholdToBinary(pix3, 150);
968 pixDestroy(&pix2);
969 pixDestroy(&pix3);
970 } else {
971 pix1 = pixClone(pixs);
972 }
973 pixZero(pix1, &empty);
974 if (empty) {
975 pixDestroy(&pix1);
976 L_INFO("no fg pixels in input image\n", __func__);
977 return NULL;
978 }
979 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
980
981 /* Remove any very tall or very wide connected components */
982 pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
983 L_SELECT_IF_LT, NULL);
984 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
985 pixDestroy(&pix1);
986
987 /* Filter to solidify the text lines within the x-height region.
988 * The closing (csize) bridges gaps between words. The opening
989 * removes isolated bridges between textlines. */
990 if ((res = pixGetXRes(pixs)) == 0) {
991 L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
992 res = 300;
993 }
994 csize = L_MIN(120., 60.0 * res / 300.0);
995 snprintf(buf, sizeof(buf), "c%d.1 + o%d.1", csize, csize / 3);
996 pix3 = pixMorphCompSequence(pix2, buf, 0);
997 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
998
999 /* Extract the connected components. These should be dilated lines */
1000 boxa1 = pixConnComp(pix3, &pixa1, 4);
1001 if (pixadb) {
1002 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1003 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1004 pixaAddPix(pixadb, pix1, L_INSERT);
1005 }
1006
1007 /* Set minw, minh if default is requested */
1008 minw = (minw != 0) ? minw : (l_int32)(0.12 * res);
1009 minh = (minh != 0) ? minh : (l_int32)(0.07 * res);
1010
1011 /* Remove line components that are too small */
1012 pixa2 = pixaSelectBySize(pixa1, minw, minh, L_SELECT_IF_BOTH,
1013 L_SELECT_IF_GTE, NULL);
1014 if (pixadb) {
1015 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1016 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1017 pixaAddPix(pixadb, pix1, L_INSERT);
1018 pix1 = pixConvertTo32(pix2);
1019 pixRenderBoxaArb(pix1, pixa2->boxa, 2, 255, 0, 0);
1020 pixaAddPix(pixadb, pix1, L_INSERT);
1021 }
1022
1023 /* Selectively AND with the version before dilation, and save */
1024 boxa2 = pixaGetBoxa(pixa2, L_CLONE);
1025 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1026 pixa3 = pixClipRectangles(pix2, boxa3);
1027 if (pixadb) {
1028 pix1 = pixaDisplayRandomCmap(pixa3, 0, 0);
1029 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1030 pixaAddPix(pixadb, pix1, L_INSERT);
1031 }
1032
1033 pixDestroy(&pix2);
1034 pixDestroy(&pix3);
1035 pixaDestroy(&pixa1);
1036 pixaDestroy(&pixa2);
1037 boxaDestroy(&boxa1);
1038 boxaDestroy(&boxa2);
1039 boxaDestroy(&boxa3);
1040 return pixa3;
1041}
1042
1043
1082PIXA *
1084 l_int32 maxw,
1085 l_int32 maxh,
1086 l_int32 adjw,
1087 l_int32 adjh,
1088 PIXA *pixadb)
1089{
1090char buf[64];
1091l_int32 res, csize, empty;
1092BOXA *boxa1, *boxa2, *boxa3;
1093BOXAA *baa1;
1094PIX *pix1, *pix2, *pix3;
1095PIXA *pixa1, *pixa2;
1096
1097 if (!pixs)
1098 return (PIXA *)ERROR_PTR("pixs not defined", __func__, NULL);
1099
1100 /* Set maxw, maxh if default is requested */
1101 if ((res = pixGetXRes(pixs)) == 0) {
1102 L_INFO("Resolution is not set: setting to 300 ppi\n", __func__);
1103 res = 300;
1104 }
1105 maxw = (maxw != 0) ? maxw : (l_int32)(0.5 * res);
1106 maxh = (maxh != 0) ? maxh : (l_int32)(0.5 * res);
1107
1108 /* Binarize carefully, if necessary */
1109 if (pixGetDepth(pixs) > 1) {
1110 pix2 = pixConvertTo8(pixs, FALSE);
1111 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 190);
1112 pix1 = pixThresholdToBinary(pix3, 150);
1113 pixDestroy(&pix2);
1114 pixDestroy(&pix3);
1115 } else {
1116 pix1 = pixClone(pixs);
1117 }
1118 pixZero(pix1, &empty);
1119 if (empty) {
1120 pixDestroy(&pix1);
1121 L_INFO("no fg pixels in input image\n", __func__);
1122 return NULL;
1123 }
1124 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1125
1126 /* Remove any very tall or very wide connected components */
1127 pix2 = pixSelectBySize(pix1, maxw, maxh, 8, L_SELECT_IF_BOTH,
1128 L_SELECT_IF_LT, NULL);
1129 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1130 pixDestroy(&pix1);
1131
1132 /* Filter to solidify the text lines within the x-height region.
1133 * The closing (csize) bridges gaps between words. */
1134 csize = L_MIN(120., 60.0 * res / 300.0);
1135 snprintf(buf, sizeof(buf), "c%d.1", csize);
1136 pix3 = pixMorphCompSequence(pix2, buf, 0);
1137 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1138
1139 /* Extract the connected components. These should be dilated lines */
1140 boxa1 = pixConnComp(pix3, &pixa1, 4);
1141 if (pixadb) {
1142 pix1 = pixaDisplayRandomCmap(pixa1, 0, 0);
1143 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1144 pixaAddPix(pixadb, pix1, L_INSERT);
1145 }
1146
1147 /* Do a 2-d sort, and generate a bounding box for each set of text
1148 * line segments that is aligned horizontally (i.e., has vertical
1149 * overlap) into a box representing a single text line. */
1150 baa1 = boxaSort2d(boxa1, NULL, -1, -1, 5);
1151 boxaaGetExtent(baa1, NULL, NULL, NULL, &boxa2);
1152 if (pixadb) {
1153 pix1 = pixConvertTo32(pix2);
1154 pixRenderBoxaArb(pix1, boxa2, 2, 255, 0, 0);
1155 pixaAddPix(pixadb, pix1, L_INSERT);
1156 }
1157
1158 /* Optionally adjust the sides of each text line box, and then
1159 * use the boxes to generate a pixa of the text lines. */
1160 boxa3 = boxaAdjustSides(boxa2, -adjw, adjw, -adjh, adjh);
1161 pixa2 = pixClipRectangles(pix2, boxa3);
1162 if (pixadb) {
1163 pix1 = pixaDisplayRandomCmap(pixa2, 0, 0);
1164 pixcmapResetColor(pixGetColormap(pix1), 0, 255, 255, 255);
1165 pixaAddPix(pixadb, pix1, L_INSERT);
1166 }
1167
1168 pixDestroy(&pix2);
1169 pixDestroy(&pix3);
1170 pixaDestroy(&pixa1);
1171 boxaDestroy(&boxa1);
1172 boxaDestroy(&boxa2);
1173 boxaDestroy(&boxa3);
1174 boxaaDestroy(&baa1);
1175 return pixa2;
1176}
1177
1178
1179/*------------------------------------------------------------------*
1180 * How many text columns *
1181 *------------------------------------------------------------------*/
1208l_ok
1210 l_float32 deltafract,
1211 l_float32 peakfract,
1212 l_float32 clipfract,
1213 l_int32 *pncols,
1214 PIXA *pixadb)
1215{
1216l_int32 w, h, res, i, n, npeak;
1217l_float32 scalefact, redfact, minval, maxval, val4, val5, fract;
1218BOX *box;
1219NUMA *na1, *na2, *na3, *na4, *na5;
1220PIX *pix1, *pix2, *pix3, *pix4, *pix5;
1221
1222 if (!pncols)
1223 return ERROR_INT("&ncols not defined", __func__, 1);
1224 *pncols = -1; /* init */
1225 if (!pixs || pixGetDepth(pixs) != 1)
1226 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1227 if (deltafract < 0.15 || deltafract > 0.75)
1228 L_WARNING("deltafract not in [0.15 ... 0.75]\n", __func__);
1229 if (peakfract < 0.25 || peakfract > 0.9)
1230 L_WARNING("peakfract not in [0.25 ... 0.9]\n", __func__);
1231 if (clipfract < 0.0 || clipfract >= 0.5)
1232 return ERROR_INT("clipfract not in [0.0 ... 0.5)\n", __func__, 1);
1233 if (pixadb) pixaAddPix(pixadb, pixs, L_COPY);
1234
1235 /* Scale to between 37.5 and 75 ppi */
1236 if ((res = pixGetXRes(pixs)) == 0) {
1237 L_WARNING("resolution undefined; set to 300\n", __func__);
1238 pixSetResolution(pixs, 300, 300);
1239 res = 300;
1240 }
1241 if (res < 37) {
1242 L_WARNING("resolution %d very low\n", __func__, res);
1243 scalefact = 37.5 / res;
1244 pix1 = pixScale(pixs, scalefact, scalefact);
1245 } else {
1246 redfact = (l_float32)res / 37.5;
1247 if (redfact < 2.0)
1248 pix1 = pixClone(pixs);
1249 else if (redfact < 4.0)
1250 pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
1251 else if (redfact < 8.0)
1252 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 0, 0);
1253 else if (redfact < 16.0)
1254 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 0);
1255 else
1256 pix1 = pixReduceRankBinaryCascade(pixs, 1, 2, 2, 2);
1257 }
1258 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1259
1260 /* Crop inner 80% of image */
1261 pixGetDimensions(pix1, &w, &h, NULL);
1262 box = boxCreate(clipfract * w, clipfract * h,
1263 (1.0 - 2 * clipfract) * w, (1.0 - 2 * clipfract) * h);
1264 pix2 = pixClipRectangle(pix1, box, NULL);
1265 pixGetDimensions(pix2, &w, &h, NULL);
1266 boxDestroy(&box);
1267 if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1268
1269 /* Deskew */
1270 pix3 = pixDeskew(pix2, 0);
1271 if (pixadb) pixaAddPix(pixadb, pix3, L_COPY);
1272
1273 /* Close to increase column counts for text */
1274 pix4 = pixCloseSafeBrick(NULL, pix3, 5, 21);
1275 if (pixadb) pixaAddPix(pixadb, pix4, L_COPY);
1276 pixInvert(pix4, pix4);
1277 na1 = pixCountByColumn(pix4, NULL);
1278
1279 if (pixadb) {
1280 gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/plot", NULL);
1281 pix5 = pixRead("/tmp/lept/plot.png");
1282 pixaAddPix(pixadb, pix5, L_INSERT);
1283 }
1284
1285 /* Analyze the column counts. na4 gives the locations of
1286 * the extrema in normalized units (0.0 to 1.0) across the
1287 * cropped image. na5 gives the magnitude of the
1288 * extrema, normalized to the dynamic range. The peaks
1289 * are values that are at least peakfract of (max - min). */
1290 numaGetMax(na1, &maxval, NULL);
1291 numaGetMin(na1, &minval, NULL);
1292 fract = (l_float32)(maxval - minval) / h; /* is there much at all? */
1293 if (fract < 0.05) {
1294 L_INFO("very little content on page; 0 text columns\n", __func__);
1295 *pncols = 0;
1296 } else {
1297 na2 = numaFindExtrema(na1, deltafract * (maxval - minval), &na3);
1298 na4 = numaTransform(na2, 0, 1.0 / w);
1299 na5 = numaTransform(na3, -minval, 1.0 / (maxval - minval));
1300 n = numaGetCount(na4);
1301 for (i = 0, npeak = 0; i < n; i++) {
1302 numaGetFValue(na4, i, &val4);
1303 numaGetFValue(na5, i, &val5);
1304 if (val4 > 0.3 && val4 < 0.7 && val5 >= peakfract) {
1305 npeak++;
1306 L_INFO("Peak(loc,val) = (%5.3f,%5.3f)\n", __func__, val4, val5);
1307 }
1308 }
1309 *pncols = npeak + 1;
1310 numaDestroy(&na2);
1311 numaDestroy(&na3);
1312 numaDestroy(&na4);
1313 numaDestroy(&na5);
1314 }
1315
1316 pixDestroy(&pix1);
1317 pixDestroy(&pix2);
1318 pixDestroy(&pix3);
1319 pixDestroy(&pix4);
1320 numaDestroy(&na1);
1321 return 0;
1322}
1323
1324
1325/*------------------------------------------------------------------*
1326 * Decision text vs photo *
1327 *------------------------------------------------------------------*/
1354l_ok
1356 BOX *box,
1357 l_int32 *pistext,
1358 PIXA *pixadb)
1359{
1360l_int32 i, empty, maxw, w, h, n1, n2, n3, minlines, big_comp;
1361l_float32 ratio1, ratio2;
1362L_BMF *bmf;
1363BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxa5;
1364PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7;
1365PIXA *pixa1;
1366SEL *sel1;
1367
1368 if (!pistext)
1369 return ERROR_INT("&istext not defined", __func__, 1);
1370 *pistext = -1;
1371 if (!pixs)
1372 return ERROR_INT("pixs not defined", __func__, 1);
1373
1374 /* Crop, convert to 1 bpp, 300 ppi */
1375 if ((pix1 = pixPrepare1bpp(pixs, box, 0.1, 300)) == NULL)
1376 return ERROR_INT("pix1 not made", __func__, 1);
1377
1378 pixZero(pix1, &empty);
1379 if (empty) {
1380 pixDestroy(&pix1);
1381 L_INFO("pix is empty\n", __func__);
1382 return 0;
1383 }
1384 w = pixGetWidth(pix1);
1385
1386 /* Identify and remove tall, thin vertical lines (as found in tables)
1387 * that are up to 9 pixels wide. Make a hit-miss sel with an
1388 * 81 pixel vertical set of hits and with 3 pairs of misses that
1389 * are 10 pixels apart horizontally. It is necessary to use a
1390 * hit-miss transform; if we only opened with a vertical line of
1391 * hits, we would remove solid regions of pixels that are not
1392 * text or vertical lines. */
1393 pix2 = pixCreate(11, 81, 1);
1394 for (i = 0; i < 81; i++)
1395 pixSetPixel(pix2, 5, i, 1);
1396 sel1 = selCreateFromPix(pix2, 40, 5, NULL);
1397 selSetElement(sel1, 20, 0, SEL_MISS);
1398 selSetElement(sel1, 20, 10, SEL_MISS);
1399 selSetElement(sel1, 40, 0, SEL_MISS);
1400 selSetElement(sel1, 40, 10, SEL_MISS);
1401 selSetElement(sel1, 60, 0, SEL_MISS);
1402 selSetElement(sel1, 60, 10, SEL_MISS);
1403 pix3 = pixHMT(NULL, pix1, sel1);
1404 pix4 = pixSeedfillBinaryRestricted(NULL, pix3, pix1, 8, 5, 1000);
1405 pix5 = pixXor(NULL, pix1, pix4);
1406 pixDestroy(&pix2);
1407 selDestroy(&sel1);
1408
1409 /* Convert the text lines to separate long horizontal components */
1410 pix6 = pixMorphCompSequence(pix5, "c30.1 + o15.1 + c60.1 + o2.2", 0);
1411
1412 /* Estimate the distance to the bottom of the significant region */
1413 if (box) { /* use full height */
1414 pixGetDimensions(pix6, NULL, &h, NULL);
1415 } else { /* use height of region that has text lines */
1416 pixFindThreshFgExtent(pix6, 400, NULL, &h);
1417 }
1418
1419 if (pixadb) {
1420 bmf = bmfCreate(NULL, 6);
1421 pixaAddPixWithText(pixadb, pix1, 1, bmf, "threshold/crop to binary",
1422 0x0000ff00, L_ADD_BELOW);
1423 pixaAddPixWithText(pixadb, pix3, 2, bmf, "hit-miss for vertical line",
1424 0x0000ff00, L_ADD_BELOW);
1425 pixaAddPixWithText(pixadb, pix4, 2, bmf, "restricted seed-fill",
1426 0x0000ff00, L_ADD_BELOW);
1427 pixaAddPixWithText(pixadb, pix5, 2, bmf, "remove using xor",
1428 0x0000ff00, L_ADD_BELOW);
1429 pixaAddPixWithText(pixadb, pix6, 2, bmf, "make long horiz components",
1430 0x0000ff00, L_ADD_BELOW);
1431 }
1432
1433 /* Extract the connected components */
1434 if (pixadb) {
1435 boxa1 = pixConnComp(pix6, &pixa1, 8);
1436 pix7 = pixaDisplayRandomCmap(pixa1, 0, 0);
1437 pixcmapResetColor(pixGetColormap(pix7), 0, 255, 255, 255);
1438 pixaAddPixWithText(pixadb, pix7, 2, bmf, "show connected components",
1439 0x0000ff00, L_ADD_BELOW);
1440 pixDestroy(&pix7);
1441 pixaDestroy(&pixa1);
1442 bmfDestroy(&bmf);
1443 } else {
1444 boxa1 = pixConnComp(pix6, NULL, 8);
1445 }
1446
1447 /* Analyze the connected components. The following conditions
1448 * at 300 ppi must be satisfied if the image is text:
1449 * (1) There are no components that are wider than 400 pixels and
1450 * taller than 175 pixels.
1451 * (2) The second longest component is at least 60% of the
1452 * (possibly cropped) image width. This catches images
1453 * that don't have any significant content.
1454 * (3) Of the components that are at least 40% of the length
1455 * of the longest (n2), at least 80% of them must not exceed
1456 * 60 pixels in height.
1457 * (4) The number of those long, thin components (n3) must
1458 * equal or exceed a minimum that scales linearly with the
1459 * image height.
1460 * Most images that are not text fail more than one of these
1461 * conditions. */
1462 boxa2 = boxaSort(boxa1, L_SORT_BY_WIDTH, L_SORT_DECREASING, NULL);
1463 boxaGetBoxGeometry(boxa2, 1, NULL, NULL, &maxw, NULL); /* 2nd longest */
1464 boxa3 = boxaSelectBySize(boxa1, 0.4 * maxw, 0, L_SELECT_WIDTH,
1465 L_SELECT_IF_GTE, NULL);
1466 boxa4 = boxaSelectBySize(boxa3, 0, 60, L_SELECT_HEIGHT,
1467 L_SELECT_IF_LTE, NULL);
1468 boxa5 = boxaSelectBySize(boxa1, 400, 175, L_SELECT_IF_BOTH,
1469 L_SELECT_IF_GT, NULL);
1470 big_comp = (boxaGetCount(boxa5) == 0) ? 0 : 1;
1471 n1 = boxaGetCount(boxa1);
1472 n2 = boxaGetCount(boxa3);
1473 n3 = boxaGetCount(boxa4);
1474 ratio1 = (l_float32)maxw / (l_float32)w;
1475 ratio2 = (l_float32)n3 / (l_float32)n2;
1476 minlines = L_MAX(2, h / 125);
1477 if (big_comp || ratio1 < 0.6 || ratio2 < 0.8 || n3 < minlines)
1478 *pistext = 0;
1479 else
1480 *pistext = 1;
1481 if (pixadb) {
1482 if (*pistext == 1) {
1483 L_INFO("This is text: \n n1 = %d, n2 = %d, n3 = %d, "
1484 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, "
1485 "big_comp = %d\n", __func__, n1, n2, n3, minlines,
1486 maxw, ratio1, h, big_comp);
1487 } else {
1488 L_INFO("This is not text: \n n1 = %d, n2 = %d, n3 = %d, "
1489 "minlines = %d\n maxw = %d, ratio1 = %4.2f, h = %d, "
1490 "big_comp = %d\n", __func__, n1, n2, n3, minlines,
1491 maxw, ratio1, h, big_comp);
1492 }
1493 }
1494
1495 boxaDestroy(&boxa1);
1496 boxaDestroy(&boxa2);
1497 boxaDestroy(&boxa3);
1498 boxaDestroy(&boxa4);
1499 boxaDestroy(&boxa5);
1500 pixDestroy(&pix1);
1501 pixDestroy(&pix3);
1502 pixDestroy(&pix4);
1503 pixDestroy(&pix5);
1504 pixDestroy(&pix6);
1505 return 0;
1506}
1507
1508
1518l_ok
1520 l_int32 thresh,
1521 l_int32 *ptop,
1522 l_int32 *pbot)
1523{
1524l_int32 i, n;
1525l_int32 *array;
1526NUMA *na;
1527
1528 if (ptop) *ptop = 0;
1529 if (pbot) *pbot = 0;
1530 if (!ptop && !pbot)
1531 return ERROR_INT("nothing to determine", __func__, 1);
1532 if (!pixs || pixGetDepth(pixs) != 1)
1533 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1534
1535 na = pixCountPixelsByRow(pixs, NULL);
1536 n = numaGetCount(na);
1537 array = numaGetIArray(na);
1538 if (ptop) {
1539 for (i = 0; i < n; i++) {
1540 if (array[i] >= thresh) {
1541 *ptop = i;
1542 break;
1543 }
1544 }
1545 }
1546 if (pbot) {
1547 for (i = n - 1; i >= 0; i--) {
1548 if (array[i] >= thresh) {
1549 *pbot = i;
1550 break;
1551 }
1552 }
1553 }
1554 LEPT_FREE(array);
1555 numaDestroy(&na);
1556 return 0;
1557}
1558
1559
1560/*------------------------------------------------------------------*
1561 * Decision: table vs text *
1562 *------------------------------------------------------------------*/
1606l_ok
1608 BOX *box,
1609 l_int32 orient,
1610 l_int32 *pscore,
1611 PIXA *pixadb)
1612{
1613l_int32 empty, nhb, nvb, nvw, score, htfound;
1614PIX *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
1615
1616 if (!pscore)
1617 return ERROR_INT("&score not defined", __func__, 1);
1618 *pscore = -1;
1619 if (!pixs)
1620 return ERROR_INT("pixs not defined", __func__, 1);
1621
1622 /* Check if there is an image region. First convert to 1 bpp
1623 * at 175 ppi. If an image is found, assume there is no table. */
1624 pix1 = pixPrepare1bpp(pixs, box, 0.1, 175);
1625 pix2 = pixGenerateHalftoneMask(pix1, NULL, &htfound, NULL);
1626 if (htfound && pixadb) pixaAddPix(pixadb, pix2, L_COPY);
1627 pixDestroy(&pix1);
1628 pixDestroy(&pix2);
1629 if (htfound) {
1630 *pscore = 0;
1631 L_INFO("pix has an image region\n", __func__);
1632 return 0;
1633 }
1634
1635 /* Crop, convert to 1 bpp, 75 ppi */
1636 if ((pix1 = pixPrepare1bpp(pixs, box, 0.05, 75)) == NULL)
1637 return ERROR_INT("pix1 not made", __func__, 1);
1638
1639 pixZero(pix1, &empty);
1640 if (empty) {
1641 *pscore = 0;
1642 pixDestroy(&pix1);
1643 L_INFO("pix is empty\n", __func__);
1644 return 0;
1645 }
1646
1647 /* The 2x2 dilation on 75 ppi makes these two approaches very similar:
1648 * (1) pix1 = pixPrepare1bpp(..., 300); // 300 ppi resolution
1649 * pix2 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
1650 * (2) pix1 = pixPrepare1bpp(..., 75); // 75 ppi resolution
1651 * pix2 = pixDilateBrick(NULL, pix1, 2, 2);
1652 * But (2) is more efficient if the input image to pixPrepare1bpp()
1653 * is not at 300 ppi. */
1654 pix2 = pixDilateBrick(NULL, pix1, 2, 2);
1655
1656 /* Deskew both horizontally and vertically; rotate by 90
1657 * degrees if in landscape mode. */
1658 pix3 = pixDeskewBoth(pix2, 1);
1659 if (pixadb) {
1660 pixaAddPix(pixadb, pix2, L_COPY);
1661 pixaAddPix(pixadb, pix3, L_COPY);
1662 }
1663 if (orient == L_LANDSCAPE_MODE)
1664 pix4 = pixRotate90(pix3, 1);
1665 else
1666 pix4 = pixClone(pix3);
1667 pixDestroy(&pix1);
1668 pixDestroy(&pix2);
1669 pixDestroy(&pix3);
1670 pix1 = pixClone(pix4);
1671 pixDestroy(&pix4);
1672
1673 /* Look for horizontal and vertical lines */
1674 pix2 = pixMorphSequence(pix1, "o100.1 + c1.4", 0);
1675 pix3 = pixSeedfillBinary(NULL, pix2, pix1, 8);
1676 pix4 = pixMorphSequence(pix1, "o1.100 + c4.1", 0);
1677 pix5 = pixSeedfillBinary(NULL, pix4, pix1, 8);
1678 pix6 = pixOr(NULL, pix3, pix5);
1679 if (pixadb) {
1680 pixaAddPix(pixadb, pix2, L_COPY);
1681 pixaAddPix(pixadb, pix4, L_COPY);
1682 pixaAddPix(pixadb, pix3, L_COPY);
1683 pixaAddPix(pixadb, pix5, L_COPY);
1684 pixaAddPix(pixadb, pix6, L_COPY);
1685 }
1686 pixCountConnComp(pix2, 8, &nhb); /* number of horizontal black lines */
1687 pixCountConnComp(pix4, 8, &nvb); /* number of vertical black lines */
1688
1689 /* Remove the lines */
1690 pixSubtract(pix1, pix1, pix6);
1691 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
1692
1693 /* Remove noise pixels */
1694 pix7 = pixMorphSequence(pix1, "c4.1 + o8.1", 0);
1695 if (pixadb) pixaAddPix(pixadb, pix7, L_COPY);
1696
1697 /* Look for vertical white space. Invert to convert white bg
1698 * to fg. Use a single rank-1 2x reduction, which closes small
1699 * fg holes, for the final processing at 37.5 ppi.
1700 * The vertical opening is then about 3 inches on a 300 ppi image.
1701 * We also remove vertical whitespace that is less than 5 pixels
1702 * wide at this resolution (about 0.1 inches) */
1703 pixInvert(pix7, pix7);
1704 pix8 = pixMorphSequence(pix7, "r1 + o1.100", 0);
1705 pix9 = pixSelectBySize(pix8, 5, 0, 8, L_SELECT_WIDTH,
1706 L_SELECT_IF_GTE, NULL);
1707 pixCountConnComp(pix9, 8, &nvw); /* number of vertical white lines */
1708 if (pixadb) {
1709 pixaAddPix(pixadb, pixScale(pix8, 2.0, 2.0), L_INSERT);
1710 pixaAddPix(pixadb, pixScale(pix9, 2.0, 2.0), L_INSERT);
1711 }
1712
1713 /* Require at least 2 of the following 4 conditions for a table.
1714 * Some tables do not have black (fg) lines, and for those we
1715 * require more than 6 long vertical whitespace (bg) lines. */
1716 score = 0;
1717 if (nhb > 1) score++;
1718 if (nvb > 2) score++;
1719 if (nvw > 3) score++;
1720 if (nvw > 6) score++;
1721 *pscore = score;
1722
1723 pixDestroy(&pix1);
1724 pixDestroy(&pix2);
1725 pixDestroy(&pix3);
1726 pixDestroy(&pix4);
1727 pixDestroy(&pix5);
1728 pixDestroy(&pix6);
1729 pixDestroy(&pix7);
1730 pixDestroy(&pix8);
1731 pixDestroy(&pix9);
1732 return 0;
1733}
1734
1735
1754PIX *
1756 BOX *box,
1757 l_float32 cropfract,
1758 l_int32 outres)
1759{
1760l_int32 w, h, res;
1761l_float32 factor;
1762BOX *box1;
1763PIX *pix1, *pix2, *pix3, *pix4, *pix5;
1764
1765 if (!pixs)
1766 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
1767
1768 /* Crop the image. If no box is given, use %cropfract to remove
1769 * pixels near the image boundary; this helps avoid false
1770 * negatives from noise that is often found there. */
1771 if (box) {
1772 pix1 = pixClipRectangle(pixs, box, NULL);
1773 } else {
1774 pixGetDimensions(pixs, &w, &h, NULL);
1775 box1 = boxCreate((l_int32)(cropfract * w), (l_int32)(cropfract * h),
1776 (l_int32)((1.0 - 2 * cropfract) * w),
1777 (l_int32)((1.0 - 2 * cropfract) * h));
1778 pix1 = pixClipRectangle(pixs, box1, NULL);
1779 boxDestroy(&box1);
1780 }
1781
1782 /* Convert to 1 bpp with adaptive background cleaning */
1783 if (pixGetDepth(pixs) > 1) {
1784 pix2 = pixConvertTo8(pix1, 0);
1785 pix3 = pixCleanBackgroundToWhite(pix2, NULL, NULL, 1.0, 70, 160);
1786 pixDestroy(&pix1);
1787 pixDestroy(&pix2);
1788 if (!pix3) {
1789 L_INFO("pix cleaning failed\n", __func__);
1790 return NULL;
1791 }
1792 pix4 = pixThresholdToBinary(pix3, 200);
1793 pixDestroy(&pix3);
1794 } else {
1795 pix4 = pixClone(pix1);
1796 pixDestroy(&pix1);
1797 }
1798
1799 /* Scale the image to the requested output resolution;
1800 do not scale if %outres <= 0 */
1801 if (outres <= 0)
1802 return pix4;
1803 if ((res = pixGetXRes(pixs)) == 0) {
1804 L_WARNING("Resolution is not set: using 300 ppi\n", __func__);
1805 res = 300;
1806 }
1807 if (res != outres) {
1808 factor = (l_float32)outres / (l_float32)res;
1809 pix5 = pixScale(pix4, factor, factor);
1810 } else {
1811 pix5 = pixClone(pix4);
1812 }
1813 pixDestroy(&pix4);
1814 return pix5;
1815}
1816
1817
1818/*------------------------------------------------------------------*
1819 * Estimate the grayscale background value *
1820 *------------------------------------------------------------------*/
1837l_ok
1839 l_int32 darkthresh,
1840 l_float32 edgecrop,
1841 l_int32 *pbg)
1842{
1843l_int32 w, h, sampling;
1844l_float32 fbg;
1845BOX *box;
1846PIX *pix1, *pix2, *pixm;
1847
1848 if (!pbg)
1849 return ERROR_INT("&bg not defined", __func__, 1);
1850 *pbg = 0;
1851 if (!pixs || pixGetDepth(pixs) != 8)
1852 return ERROR_INT("pixs not defined or not 8 bpp", __func__, 1);
1853 if (darkthresh > 128)
1854 L_WARNING("darkthresh unusually large\n", __func__);
1855 if (edgecrop < 0.0 || edgecrop >= 1.0)
1856 return ERROR_INT("edgecrop not in [0.0 ... 1.0)", __func__, 1);
1857
1858 pix1 = pixRemoveColormap(pixs, REMOVE_CMAP_TO_GRAYSCALE);
1859 pixGetDimensions(pix1, &w, &h, NULL);
1860
1861 /* Optionally crop inner part of image */
1862 if (edgecrop > 0.0) {
1863 box = boxCreate(0.5 * edgecrop * w, 0.5 * edgecrop * h,
1864 (1.0 - edgecrop) * w, (1.0 - edgecrop) * h);
1865 pix2 = pixClipRectangle(pix1, box, NULL);
1866 boxDestroy(&box);
1867 } else {
1868 pix2 = pixClone(pix1);
1869 }
1870
1871 /* We will use no more than 50K samples */
1872 sampling = L_MAX(1, (l_int32)sqrt((l_float64)(w * h) / 50000. + 0.5));
1873
1874 /* Optionally make a mask over all pixels lighter than %darkthresh */
1875 pixm = NULL;
1876 if (darkthresh > 0) {
1877 pixm = pixThresholdToBinary(pix2, darkthresh);
1878 pixInvert(pixm, pixm);
1879 }
1880
1881 pixGetRankValueMasked(pix2, pixm, 0, 0, sampling, 0.5, &fbg, NULL);
1882 *pbg = (l_int32)(fbg + 0.5);
1883 pixDestroy(&pix1);
1884 pixDestroy(&pix2);
1885 pixDestroy(&pixm);
1886 return 0;
1887}
1888
1889
1890/*---------------------------------------------------------------------*
1891 * Largest white or black rectangles in an image *
1892 *---------------------------------------------------------------------*/
1919l_ok
1921 l_int32 polarity,
1922 l_int32 nrect,
1923 BOXA **pboxa,
1924 PIX **ppixdb)
1925{
1926l_int32 i, op, bx, by, bw, bh;
1927BOX *box;
1928BOXA *boxa;
1929PIX *pix;
1930
1931 if (ppixdb) *ppixdb = NULL;
1932 if (!pboxa)
1933 return ERROR_INT("&boxa not defined", __func__, 1);
1934 *pboxa = NULL;
1935 if (!pixs || pixGetDepth(pixs) != 1)
1936 return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
1937 if (polarity != 0 && polarity != 1)
1938 return ERROR_INT("invalid polarity", __func__, 1);
1939 if (nrect > 1000) {
1940 L_WARNING("large num rectangles = %d requested; using 1000\n",
1941 __func__, nrect);
1942 nrect = 1000;
1943 }
1944
1945 pix = pixCopy(NULL, pixs);
1946 boxa = boxaCreate(nrect);
1947 *pboxa = boxa;
1948
1949 /* Sequentially find largest rectangle and fill with opposite color */
1950 for (i = 0; i < nrect; i++) {
1951 if (pixFindLargestRectangle(pix, polarity, &box, NULL) == 1) {
1952 boxDestroy(&box);
1953 L_ERROR("failure in pixFindLargestRectangle\n", __func__);
1954 break;
1955 }
1956 boxaAddBox(boxa, box, L_INSERT);
1957 op = (polarity == 0) ? PIX_SET : PIX_CLR;
1958 boxGetGeometry(box, &bx, &by, &bw, &bh);
1959 pixRasterop(pix, bx, by, bw, bh, op, NULL, 0, 0);
1960 }
1961
1962 if (ppixdb)
1963 *ppixdb = pixDrawBoxaRandom(pixs, boxa, 3);
1964
1965 pixDestroy(&pix);
1966 return 0;
1967}
1968
1969
2020l_ok
2022 l_int32 polarity,
2023 BOX **pbox,
2024 PIX **ppixdb)
2025{
2026l_int32 i, j, w, h, d, wpls, val;
2027l_int32 wp, hp, w1, w2, h1, h2, wmin, hmin, area1, area2;
2028l_int32 xmax, ymax; /* LR corner of the largest rectangle */
2029l_int32 maxarea, wmax, hmax, vertdist, horizdist, prevfg;
2030l_int32 *lowestfg;
2031l_uint32 *datas, *lines;
2032l_uint32 **linew, **lineh;
2033BOX *box;
2034PIX *pixw, *pixh; /* keeps the width and height for the largest */
2035 /* rectangles whose LR corner is located there. */
2036
2037 if (ppixdb) *ppixdb = NULL;
2038 if (!pbox)
2039 return ERROR_INT("&box not defined", __func__, 1);
2040 *pbox = NULL;
2041 if (!pixs)
2042 return ERROR_INT("pixs not defined", __func__, 1);
2043 pixGetDimensions(pixs, &w, &h, &d);
2044 if (d != 1)
2045 return ERROR_INT("pixs not 1 bpp", __func__, 1);
2046 if (polarity != 0 && polarity != 1)
2047 return ERROR_INT("invalid polarity", __func__, 1);
2048
2049 /* Initialize lowest "fg" seen so far for each column */
2050 lowestfg = (l_int32 *)LEPT_CALLOC(w, sizeof(l_int32));
2051 for (i = 0; i < w; i++)
2052 lowestfg[i] = -1;
2053
2054 /* The combination (val ^ polarity) is the color for which we
2055 * are searching for the maximum rectangle. For polarity == 0,
2056 * we search in the bg (white). */
2057 pixw = pixCreate(w, h, 32); /* stores width */
2058 pixh = pixCreate(w, h, 32); /* stores height */
2059 linew = (l_uint32 **)pixGetLinePtrs(pixw, NULL);
2060 lineh = (l_uint32 **)pixGetLinePtrs(pixh, NULL);
2061 datas = pixGetData(pixs);
2062 wpls = pixGetWpl(pixs);
2063 maxarea = xmax = ymax = wmax = hmax = 0;
2064 for (i = 0; i < h; i++) {
2065 lines = datas + i * wpls;
2066 prevfg = -1;
2067 for (j = 0; j < w; j++) {
2068 val = GET_DATA_BIT(lines, j);
2069 if ((val ^ polarity) == 0) { /* bg (0) if polarity == 0, etc. */
2070 if (i == 0 && j == 0) {
2071 wp = hp = 1;
2072 } else if (i == 0) {
2073 wp = linew[i][j - 1] + 1;
2074 hp = 1;
2075 } else if (j == 0) {
2076 wp = 1;
2077 hp = lineh[i - 1][j] + 1;
2078 } else {
2079 /* Expand #1 prev rectangle down */
2080 w1 = linew[i - 1][j];
2081 h1 = lineh[i - 1][j];
2082 horizdist = j - prevfg;
2083 wmin = L_MIN(w1, horizdist); /* width of new rectangle */
2084 area1 = wmin * (h1 + 1);
2085
2086 /* Expand #2 prev rectangle to right */
2087 w2 = linew[i][j - 1];
2088 h2 = lineh[i][j - 1];
2089 vertdist = i - lowestfg[j];
2090 hmin = L_MIN(h2, vertdist); /* height of new rectangle */
2091 area2 = hmin * (w2 + 1);
2092
2093 if (area1 > area2) {
2094 wp = wmin;
2095 hp = h1 + 1;
2096 } else {
2097 wp = w2 + 1;
2098 hp = hmin;
2099 }
2100 }
2101 } else { /* fg (1) if polarity == 0; bg (0) if polarity == 1 */
2102 prevfg = j;
2103 lowestfg[j] = i;
2104 wp = hp = 0;
2105 }
2106 linew[i][j] = wp;
2107 lineh[i][j] = hp;
2108 if (wp * hp > maxarea) {
2109 maxarea = wp * hp;
2110 xmax = j;
2111 ymax = i;
2112 wmax = wp;
2113 hmax = hp;
2114 }
2115 }
2116 }
2117
2118 /* Translate from LR corner to Box coords (UL corner, w, h) */
2119 box = boxCreate(xmax - wmax + 1, ymax - hmax + 1, wmax, hmax);
2120 *pbox = box;
2121
2122 if (ppixdb) {
2123 *ppixdb = pixConvertTo8(pixs, TRUE);
2124 pixRenderHashBoxArb(*ppixdb, box, 6, 2, L_NEG_SLOPE_LINE, 1, 255, 0, 0);
2125 }
2126
2127 LEPT_FREE(linew);
2128 LEPT_FREE(lineh);
2129 LEPT_FREE(lowestfg);
2130 pixDestroy(&pixw);
2131 pixDestroy(&pixh);
2132 return 0;
2133}
2134
2135
2136/*---------------------------------------------------------------------*
2137 * Generate rectangle inside connected component *
2138 *---------------------------------------------------------------------*/
2170BOX *
2171pixFindRectangleInCC(PIX *pixs,
2172 BOX *boxs,
2173 l_float32 fract,
2174 l_int32 dir,
2175 l_int32 select,
2176 l_int32 debug)
2177{
2178l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res;
2179l_int32 xfirst, xlast, xstart, yfirst, ylast, length;
2180BOX *box1, *box2, *box3, *box4, *box5;
2181PIX *pix1, *pix2, *pixdb1, *pixdb2;
2182PIXA *pixadb;
2183
2184 if (!pixs || pixGetDepth(pixs) != 1)
2185 return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
2186 if (fract <= 0.0 || fract > 1.0)
2187 return (BOX *)ERROR_PTR("invalid fraction", __func__, NULL);
2188 if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL)
2189 return (BOX *)ERROR_PTR("invalid scan direction", __func__, NULL);
2190 if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION &&
2191 select != L_LARGEST_AREA && select != L_SMALLEST_AREA)
2192 return (BOX *)ERROR_PTR("invalid select", __func__, NULL);
2193
2194 /* Extract the c.c. if necessary */
2195 x = y = 0;
2196 if (boxs) {
2197 pix1 = pixClipRectangle(pixs, boxs, NULL);
2198 boxGetGeometry(boxs, &x, &y, NULL, NULL);
2199 } else {
2200 pix1 = pixClone(pixs);
2201 }
2202
2203 /* All fast scans are horizontal; rotate 90 deg cw if necessary */
2204 if (dir == L_SCAN_VERTICAL)
2205 pix2 = pixRotate90(pix1, 1);
2206 else /* L_SCAN_HORIZONTAL */
2207 pix2 = pixClone(pix1);
2208 pixGetDimensions(pix2, &w, &h, NULL);
2209
2210 pixadb = (debug) ? pixaCreate(0) : NULL;
2211 pixdb1 = NULL;
2212 if (pixadb) {
2213 lept_mkdir("lept/rect");
2214 pixaAddPix(pixadb, pix1, L_CLONE);
2215 pixdb1 = pixConvertTo32(pix2);
2216 }
2217 pixDestroy(&pix1);
2218
2219 /* Scanning down, find the first scanline with a long enough run.
2220 * That run goes from (xfirst, yfirst) to (xlast, yfirst). */
2221 found = FALSE;
2222 for (i = 0; i < h; i++) {
2223 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2224 if (length >= (l_int32)(fract * w + 0.5)) {
2225 yfirst = i;
2226 xfirst = xstart;
2227 xlast = xfirst + length - 1;
2228 found = TRUE;
2229 break;
2230 }
2231 }
2232 if (!found) {
2233 L_WARNING("no run of sufficient size was found\n", __func__);
2234 pixDestroy(&pix2);
2235 pixDestroy(&pixdb1);
2236 pixaDestroy(&pixadb);
2237 return NULL;
2238 }
2239
2240 /* Continue down until the condition fails */
2241 w1 = xlast - xfirst + 1;
2242 h1 = h - yfirst; /* init */
2243 ylast = h - 1; /* init */
2244 for (i = yfirst + 1; i < h; i++) {
2245 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2246 if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2247 i == h - 1) {
2248 ylast = i - 1;
2249 h1 = ylast - yfirst + 1;
2250 break;
2251 }
2252 }
2253 box1 = boxCreate(xfirst, yfirst, w1, h1);
2254
2255 /* Scanning up, find the first scanline with a long enough run.
2256 * That run goes from (xfirst, ylast) to (xlast, ylast). */
2257 for (i = h - 1; i >= 0; i--) {
2258 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2259 if (length >= (l_int32)(fract * w + 0.5)) {
2260 ylast = i;
2261 xfirst = xstart;
2262 xlast = xfirst + length - 1;
2263 break;
2264 }
2265 }
2266
2267 /* Continue up until the condition fails */
2268 w2 = xlast - xfirst + 1;
2269 h2 = ylast + 1; /* initialize */
2270 for (i = ylast - 1; i >= 0; i--) {
2271 pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length);
2272 if (xstart > xfirst || (xstart + length - 1 < xlast) ||
2273 i == 0) {
2274 yfirst = i + 1;
2275 h2 = ylast - yfirst + 1;
2276 break;
2277 }
2278 }
2279 box2 = boxCreate(xfirst, yfirst, w2, h2);
2280 pixDestroy(&pix2);
2281
2282 if (pixadb) {
2283 pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0);
2284 pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0);
2285 pixaAddPix(pixadb, pixdb1, L_INSERT);
2286 }
2287
2288 /* Select the final result from the two boxes */
2289 if (select == L_GEOMETRIC_UNION)
2290 box3 = boxBoundingRegion(box1, box2);
2291 else if (select == L_GEOMETRIC_INTERSECTION)
2292 box3 = boxOverlapRegion(box1, box2);
2293 else if (select == L_LARGEST_AREA)
2294 box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2295 else /* select == L_SMALLEST_AREA) */
2296 box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2);
2297 boxDestroy(&box1);
2298 boxDestroy(&box2);
2299
2300 /* Rotate the box 90 degrees ccw if necessary */
2301 box4 = NULL;
2302 if (box3) {
2303 if (dir == L_SCAN_VERTICAL)
2304 box4 = boxRotateOrth(box3, w, h, 3);
2305 else
2306 box4 = boxCopy(box3);
2307 }
2308
2309 /* Transform back to global coordinates if %boxs exists */
2310 box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL;
2311 boxDestroy(&box3);
2312 boxDestroy(&box4);
2313
2314 /* Debug output */
2315 if (pixadb) {
2316 pixdb1 = pixConvertTo8(pixs, 0);
2317 pixAddConstantGray(pixdb1, 190);
2318 pixdb2 = pixConvertTo32(pixdb1);
2319 if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255);
2320 pixaAddPix(pixadb, pixdb2, L_INSERT);
2321 res = pixGetXRes(pixs);
2322 L_INFO("Writing debug files to /tmp/lept/rect/\n", __func__);
2323 pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL,
2324 "/tmp/lept/rect/fitrect.pdf");
2325 pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2);
2326 pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG);
2327 pixDestroy(&pix1);
2328 pixDestroy(&pixdb1);
2329 pixaDestroy(&pixadb);
2330 }
2331
2332 return box5;
2333}
2334
2335/*------------------------------------------------------------------*
2336 * Automatic photoinvert for OCR *
2337 *------------------------------------------------------------------*/
2357PIX *
2359 l_int32 thresh,
2360 PIX **ppixm,
2361 PIXA *pixadb)
2362{
2363l_int32 i, n, empty, x, y, w, h;
2364l_float32 fgfract;
2365BOX *box1;
2366BOXA *boxa1;
2367PIX *pix1, *pix2, *pix3, *pix4, *pix5;
2368
2369 if (ppixm) *ppixm = NULL;
2370 if (!pixs)
2371 return (PIX *)ERROR_PTR("pixs not defined", __func__, NULL);
2372 if (thresh == 0) thresh = 128;
2373
2374 if ((pix1 = pixConvertTo1(pixs, thresh)) == NULL)
2375 return (PIX *)ERROR_PTR("pix1 not made", __func__, NULL);
2376 if (pixadb) pixaAddPix(pixadb, pix1, L_COPY);
2377
2378 /* Identify regions for photo-inversion:
2379 * (1) Start with the halftone mask.
2380 * (2) Eliminate ordinary text and halftones in the mask.
2381 * (3) Some regions of inverted text may have been removed in
2382 * steps (1) and (2). Conditionally fill holes in the mask,
2383 * but do not fill out to the bounding rect. */
2384 pix2 = pixGenerateHalftoneMask(pix1, NULL, NULL, pixadb);
2385 pix3 = pixMorphSequence(pix2, "o15.15 + c25.25", 0); /* remove noise */
2386 pix4 = pixFillHolesToBoundingRect(pix3, 1, 0.5, 1.0);
2387 if (pixadb) {
2388 pixaAddPix(pixadb, pix2, L_CLONE);
2389 pixaAddPix(pixadb, pix3, L_CLONE);
2390 pixaAddPix(pixadb, pix4, L_COPY);
2391 }
2392 pixDestroy(&pix2);
2393 pixDestroy(&pix3);
2394 pixZero(pix4, &empty);
2395 if (empty) {
2396 pixDestroy(&pix4);
2397 return pix1;
2398 }
2399
2400 /* Examine each component and validate the inversion.
2401 * Require at least 60% of pixels under each component to be FG. */
2402 boxa1 = pixConnCompBB(pix4, 8);
2403 n = boxaGetCount(boxa1);
2404 for (i = 0; i < n; i++) {
2405 box1 = boxaGetBox(boxa1, i, L_COPY);
2406 pix5 = pixClipRectangle(pix1, box1, NULL);
2407 pixForegroundFraction(pix5, &fgfract);
2408 if (pixadb) lept_stderr("fg fraction: %5.3f\n", fgfract);
2409 boxGetGeometry(box1, &x, &y, &w, &h);
2410 if (fgfract < 0.6) /* erase from the mask */
2411 pixRasterop(pix4, x, y, w, h, PIX_CLR, NULL, 0, 0);
2412 pixDestroy(&pix5);
2413 boxDestroy(&box1);
2414 }
2415 boxaDestroy(&boxa1);
2416 pixZero(pix4, &empty);
2417 if (empty) {
2418 pixDestroy(&pix4);
2419 return pix1;
2420 }
2421
2422 /* Combine pixels of the photo-inverted pix with the binarized input */
2423 pix5 = pixInvert(NULL, pix1);
2424 pixCombineMasked(pix1, pix5, pix4);
2425
2426 if (pixadb) {
2427 pixaAddPix(pixadb, pix5, L_CLONE);
2428 pixaAddPix(pixadb, pix1, L_COPY);
2429 }
2430 pixDestroy(&pix5);
2431 if (ppixm)
2432 *ppixm = pix4;
2433 else
2434 pixDestroy(&pix4);
2435 return pix1;
2436}
PIX * pixCleanBackgroundToWhite(PIX *pixs, PIX *pixim, PIX *pixg, l_float32 gamma, l_int32 blackval, l_int32 whiteval)
pixCleanBackgroundToWhite()
Definition adaptmap.c:196
struct Numa NUMA
Definition array.h:66
#define GET_DATA_BIT(pdata, n)
@ L_DEFAULT_ENCODE
Definition imageio.h:158
l_ok pixFindThreshFgExtent(PIX *pixs, l_int32 thresh, l_int32 *ptop, l_int32 *pbot)
pixFindThreshFgExtent()
Definition pageseg.c:1519
PIX * pixGenTextblockMask(PIX *pixs, PIX *pixvws, PIXA *pixadb)
pixGenTextblockMask()
Definition pageseg.c:476
l_ok pixGetRegionsBinary(PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, PIXA *pixadb)
pixGetRegionsBinary()
Definition pageseg.c:114
PIX * pixGenTextlineMask(PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb)
pixGenTextlineMask()
Definition pageseg.c:386
l_ok pixEstimateBackground(PIX *pixs, l_int32 darkthresh, l_float32 edgecrop, l_int32 *pbg)
pixEstimateBackground()
Definition pageseg.c:1838
BOX * pixFindPageForeground(PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac)
pixFindPageForeground()
Definition pageseg.c:564
PIXA * pixExtractRawTextlines(PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
pixExtractRawTextlines()
Definition pageseg.c:1083
PIX * pixAutoPhotoinvert(PIX *pixs, l_int32 thresh, PIX **ppixm, PIXA *pixadb)
pixFindRectangleInCC()
Definition pageseg.c:2358
PIX * pixPrepare1bpp(PIX *pixs, BOX *box, l_float32 cropfract, l_int32 outres)
pixPrepare1bpp()
Definition pageseg.c:1755
PIXA * pixExtractTextlines(PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 minw, l_int32 minh, l_int32 adjw, l_int32 adjh, PIXA *pixadb)
pixExtractTextlines()
Definition pageseg.c:945
l_ok pixFindLargeRectangles(PIX *pixs, l_int32 polarity, l_int32 nrect, BOXA **pboxa, PIX **ppixdb)
pixFindLargeRectangles()
Definition pageseg.c:1920
l_ok pixCountTextColumns(PIX *pixs, l_float32 deltafract, l_float32 peakfract, l_float32 clipfract, l_int32 *pncols, PIXA *pixadb)
pixCountTextColumns()
Definition pageseg.c:1209
l_ok pixSplitIntoCharacters(PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug)
pixSplitIntoCharacters()
Definition pageseg.c:692
PIX * pixGenHalftoneMask(PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug)
pixGenHalftoneMask()
Definition pageseg.c:280
l_ok pixFindLargestRectangle(PIX *pixs, l_int32 polarity, BOX **pbox, PIX **ppixdb)
pixFindLargestRectangle()
Definition pageseg.c:2021
PIX * pixGenerateHalftoneMask(PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb)
pixGenerateHalftoneMask()
Definition pageseg.c:305
l_ok pixDecideIfTable(PIX *pixs, BOX *box, l_int32 orient, l_int32 *pscore, PIXA *pixadb)
pixDecideIfTable()
Definition pageseg.c:1607
BOXA * pixSplitComponentWithProfile(PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug)
pixSplitComponentWithProfile()
Definition pageseg.c:791
l_ok pixDecideIfText(PIX *pixs, BOX *box, l_int32 *pistext, PIXA *pixadb)
pixDecideIfText()
Definition pageseg.c:1355
@ REMOVE_CMAP_TO_GRAYSCALE
Definition pix.h:381
@ L_SELECT_IF_LTE
Definition pix.h:577
@ L_SELECT_IF_LT
Definition pix.h:575
@ L_SELECT_IF_GT
Definition pix.h:576
@ L_SELECT_IF_GTE
Definition pix.h:578
struct Ptaa PTAA
Definition pix.h:267
@ L_SORT_BY_AREA
Definition pix.h:537
@ L_SORT_BY_WIDTH
Definition pix.h:532
@ L_COPY
Definition pix.h:505
@ L_CLONE
Definition pix.h:506
@ L_INSERT
Definition pix.h:504
@ L_NEG_SLOPE_LINE
Definition pix.h:809
struct Pix PIX
Definition pix.h:228
struct Box BOX
Definition pix.h:252
#define PIX_CLR
Definition pix.h:447
@ L_SORT_DECREASING
Definition pix.h:523
struct PixColormap PIXCMAP
Definition pix.h:231
@ L_ADD_BELOW
Definition pix.h:1003
@ L_SCAN_VERTICAL
Definition pix.h:835
@ L_SCAN_HORIZONTAL
Definition pix.h:834
#define PIX_SET
Definition pix.h:448
@ L_SELECT_IF_BOTH
Definition pix.h:599
@ L_SELECT_IF_EITHER
Definition pix.h:597
@ L_SELECT_WIDTH
Definition pix.h:593
@ L_SELECT_HEIGHT
Definition pix.h:594
@ L_GEOMETRIC_UNION
Definition pix.h:889
@ L_SMALLEST_AREA
Definition pix.h:892
@ L_LARGEST_AREA
Definition pix.h:891
@ L_GEOMETRIC_INTERSECTION
Definition pix.h:890
struct PixaComp PIXAC
Definition pix.h:303
@ L_LANDSCAPE_MODE
Definition pix.h:819
struct Pixa PIXA
Definition pix.h:243
struct Boxa BOXA
Definition pix.h:255
struct Boxaa BOXAA
Definition pix.h:258
struct Boxa * boxa