Leptonica 1.83.1
Image processing and image analysis suite
Loading...
Searching...
No Matches
pdfio2.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
93
94#ifdef HAVE_CONFIG_H
95#include <config_auto.h>
96#endif /* HAVE_CONFIG_H */
97
98#include <string.h>
99#include <math.h>
100#include "allheaders.h"
101
102/* --------------------------------------------*/
103#if USE_PDFIO /* defined in environ.h */
104 /* --------------------------------------------*/
105
106 /* Typical scan resolution in ppi (pixels/inch) */
107static const l_int32 DefaultInputRes = 300;
108
109 /* Static helpers */
110static L_COMP_DATA *l_generateJp2kData(const char *fname);
111static L_COMP_DATA *pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag);
112static L_COMP_DATA *pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag,
113 l_int32 quality);
114static L_COMP_DATA *pixGenerateJp2kData(PIX *pixs, l_int32 quality);
115static L_COMP_DATA *pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag);
116
117static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes,
118 L_PDF_DATA *lpd);
119static void generateFixedStringsPdf(L_PDF_DATA *lpd);
120static char *generateEscapeString(const char *str);
121static void generateMediaboxPdf(L_PDF_DATA *lpd);
122static l_int32 generatePageStringPdf(L_PDF_DATA *lpd);
123static l_int32 generateContentStringPdf(L_PDF_DATA *lpd);
124static l_int32 generatePreXStringsPdf(L_PDF_DATA *lpd);
125static l_int32 generateColormapStringsPdf(L_PDF_DATA *lpd);
126static void generateTrailerPdf(L_PDF_DATA *lpd);
127static char *makeTrailerStringPdf(L_DNA *daloc);
128static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes,
129 L_PDF_DATA *lpd);
130
131static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda);
132static char *generatePagesObjStringPdf(NUMA *napage);
133static L_BYTEA *substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs);
134
135static L_PDF_DATA *pdfdataCreate(const char *title);
136static void pdfdataDestroy(L_PDF_DATA **plpd);
137static L_COMP_DATA *pdfdataGetCid(L_PDF_DATA *lpd, l_int32 index);
138
139
140/* ---------------- Defaults for rendering options ----------------- */
141 /* Output G4 as writing through image mask; this is the default */
142static l_int32 var_WRITE_G4_IMAGE_MASK = 1;
143 /* Write date/time and lib version into pdf; this is the default */
144static l_int32 var_WRITE_DATE_AND_VERSION = 1;
145
146#define L_SMALLBUF 256
147#define L_BIGBUF 2048 /* must be able to hold hex colormap */
148
149
150#ifndef NO_CONSOLE_IO
151#define DEBUG_MULTIPAGE 0
152#endif /* ~NO_CONSOLE_IO */
153
154
155/*---------------------------------------------------------------------*
156 * Intermediate function for generating multipage pdf output *
157 *---------------------------------------------------------------------*/
189l_ok
191 l_int32 type,
192 l_int32 quality,
193 l_uint8 **pdata,
194 size_t *pnbytes,
195 l_int32 x,
196 l_int32 y,
197 l_int32 res,
198 const char *title,
199 L_PDF_DATA **plpd,
200 l_int32 position)
201{
202l_int32 pixres, w, h, ret;
203l_float32 xpt, ypt, wpt, hpt;
204L_COMP_DATA *cid = NULL;
205L_PDF_DATA *lpd = NULL;
206
207 if (!pdata)
208 return ERROR_INT("&data not defined", __func__, 1);
209 *pdata = NULL;
210 if (!pnbytes)
211 return ERROR_INT("&nbytes not defined", __func__, 1);
212 *pnbytes = 0;
213 if (!pix)
214 return ERROR_INT("pix not defined", __func__, 1);
215 if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
216 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
217 selectDefaultPdfEncoding(pix, &type);
218 }
219 if (quality < 0 || quality > 100)
220 return ERROR_INT("invalid quality", __func__, 1);
221
222 if (plpd) { /* part of multi-page invocation */
223 if (position == L_FIRST_IMAGE)
224 *plpd = NULL;
225 }
226
227 /* Generate the compressed image data. It must NOT
228 * be ascii85 encoded. */
229 pixGenerateCIData(pix, type, quality, 0, &cid);
230 if (!cid)
231 return ERROR_INT("cid not made", __func__, 1);
232
233 /* Get media box in pts. Guess the input image resolution
234 * based on the input parameter %res, the resolution data in
235 * the pix, and the size of the image. */
236 pixres = cid->res;
237 w = cid->w;
238 h = cid->h;
239 if (res <= 0.0)
240 res = (pixres > 0) ? pixres : DefaultInputRes;
241 xpt = x * 72. / res;
242 ypt = y * 72. / res;
243 wpt = w * 72. / res;
244 hpt = h * 72. / res;
245
246 /* Set up lpd */
247 if (!plpd) { /* single image */
248 if ((lpd = pdfdataCreate(title)) == NULL)
249 return ERROR_INT("lpd not made", __func__, 1);
250 } else if (position == L_FIRST_IMAGE) { /* first of multiple images */
251 if ((lpd = pdfdataCreate(title)) == NULL)
252 return ERROR_INT("lpd not made", __func__, 1);
253 *plpd = lpd;
254 } else { /* not the first of multiple images */
255 lpd = *plpd;
256 }
257
258 /* Add the data to the lpd */
259 ptraAdd(lpd->cida, cid);
260 lpd->n++;
261 ptaAddPt(lpd->xy, xpt, ypt);
262 ptaAddPt(lpd->wh, wpt, hpt);
263
264 /* If a single image or the last of multiple images,
265 * generate the pdf and destroy the lpd */
266 if (!plpd || (position == L_LAST_IMAGE)) {
267 ret = l_generatePdf(pdata, pnbytes, lpd);
268 pdfdataDestroy(&lpd);
269 if (plpd) *plpd = NULL;
270 if (ret)
271 return ERROR_INT("pdf output not made", __func__, 1);
272 }
273
274 return 0;
275}
276
277
278/*---------------------------------------------------------------------*
279 * Intermediate function for generating multipage pdf output *
280 *---------------------------------------------------------------------*/
317l_ok
319 SARRAY *sa,
320 l_uint8 **pdata,
321 size_t *pnbytes)
322{
323char *fname, *str_pages, *str_trailer;
324l_uint8 *pdfdata, *data;
325l_int32 i, j, index, nobj, npages;
326l_int32 *sizes, *locs;
327size_t size;
328L_BYTEA *bas, *bad, *bat1, *bat2;
329L_DNA *da_locs, *da_sizes, *da_outlocs, *da;
330L_DNAA *daa_locs; /* object locations on each page */
331NUMA *na_objs, *napage;
332NUMAA *naa_objs; /* object mapping numbers to new values */
333
334 if (!pdata)
335 return ERROR_INT("&data not defined", __func__, 1);
336 *pdata = NULL;
337 if (!pnbytes)
338 return ERROR_INT("&nbytes not defined", __func__, 1);
339 *pnbytes = 0;
340 if (!pa_data)
341 return ERROR_INT("pa_data not defined", __func__, 1);
342
343 /* Parse the files and find the object locations.
344 * Remove file data that cannot be parsed. */
345 ptraGetActualCount(pa_data, &npages);
346 daa_locs = l_dnaaCreate(npages);
347 for (i = 0; i < npages; i++) {
348 bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
349 if (parseTrailerPdf(bas, &da_locs) != 0) {
350 bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
351 l_byteaDestroy(&bas);
352 if (sa) {
353 fname = sarrayGetString(sa, i, L_NOCOPY);
354 L_ERROR("can't parse file %s; skipping\n", __func__, fname);
355 } else {
356 L_ERROR("can't parse file %d; skipping\n", __func__, i);
357 }
358 } else {
359 l_dnaaAddDna(daa_locs, da_locs, L_INSERT);
360 }
361 }
362
363 /* Recompute npages in case some of the files were not pdf */
364 ptraCompactArray(pa_data);
365 ptraGetActualCount(pa_data, &npages);
366 if (npages == 0) {
367 l_dnaaDestroy(&daa_locs);
368 return ERROR_INT("no parsable pdf files found", __func__, 1);
369 }
370
371 /* Find the mapping from initial to final object numbers */
372 naa_objs = numaaCreate(npages); /* stores final object numbers */
373 napage = numaCreate(npages); /* stores "Page" object numbers */
374 index = 0;
375 for (i = 0; i < npages; i++) {
376 da = l_dnaaGetDna(daa_locs, i, L_CLONE);
377 nobj = l_dnaGetCount(da);
378 if (i == 0) {
379 numaAddNumber(napage, 4); /* object 4 on first page */
380 na_objs = numaMakeSequence(0.0, 1.0, nobj - 1);
381 index = nobj - 1;
382 } else { /* skip the first 3 objects in each file */
383 numaAddNumber(napage, index); /* Page object is first we add */
384 na_objs = numaMakeConstant(0.0, nobj - 1);
385 numaReplaceNumber(na_objs, 3, 3); /* refers to parent of all */
386 for (j = 4; j < nobj - 1; j++)
387 numaSetValue(na_objs, j, index++);
388 }
389 numaaAddNuma(naa_objs, na_objs, L_INSERT);
390 l_dnaDestroy(&da);
391 }
392
393 /* Make the Pages object (#3) */
394 str_pages = generatePagesObjStringPdf(napage);
395
396 /* Build the output */
397 bad = l_byteaCreate(5000);
398 da_outlocs = l_dnaCreate(0); /* locations of all output objects */
399 for (i = 0; i < npages; i++) {
400 bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
401 pdfdata = l_byteaGetData(bas, &size);
402 da_locs = l_dnaaGetDna(daa_locs, i, L_CLONE); /* locs on this page */
403 na_objs = numaaGetNuma(naa_objs, i, L_CLONE); /* obj # on this page */
404 nobj = l_dnaGetCount(da_locs) - 1;
405 da_sizes = l_dnaDiffAdjValues(da_locs); /* object sizes on this page */
406 sizes = l_dnaGetIArray(da_sizes);
407 locs = l_dnaGetIArray(da_locs);
408 if (i == 0) {
409 l_byteaAppendData(bad, pdfdata, sizes[0]);
410 l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]);
411 l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]);
412 l_byteaAppendString(bad, str_pages);
413 for (j = 0; j < 4; j++)
414 l_dnaAddNumber(da_outlocs, locs[j]);
415 }
416 for (j = 4; j < nobj; j++) {
417 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
418 bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]);
419 bat2 = substituteObjectNumbers(bat1, na_objs);
420 data = l_byteaGetData(bat2, &size);
421 l_byteaAppendData(bad, data, size);
422 l_byteaDestroy(&bat1);
423 l_byteaDestroy(&bat2);
424 }
425 if (i == npages - 1) /* last one */
426 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
427 LEPT_FREE(sizes);
428 LEPT_FREE(locs);
429 l_dnaDestroy(&da_locs);
430 numaDestroy(&na_objs);
431 l_dnaDestroy(&da_sizes);
432 }
433
434 /* Add the trailer */
435 str_trailer = makeTrailerStringPdf(da_outlocs);
436 l_byteaAppendString(bad, str_trailer);
437
438 /* Transfer the output data */
439 *pdata = l_byteaCopyData(bad, pnbytes);
440 l_byteaDestroy(&bad);
441
442#if DEBUG_MULTIPAGE
443 lept_stderr("******** object mapper **********");
444 numaaWriteStream(stderr, naa_objs);
445
446 lept_stderr("******** Page object numbers ***********");
447 numaWriteStderr(napage);
448
449 lept_stderr("******** Pages object ***********\n");
450 lept_stderr("%s\n", str_pages);
451#endif /* DEBUG_MULTIPAGE */
452
453 numaDestroy(&napage);
454 numaaDestroy(&naa_objs);
455 l_dnaDestroy(&da_outlocs);
456 l_dnaaDestroy(&daa_locs);
457 LEPT_FREE(str_pages);
458 LEPT_FREE(str_trailer);
459 return 0;
460}
461
462
463/*---------------------------------------------------------------------*
464 * Convert tiff multipage to pdf file *
465 *---------------------------------------------------------------------*/
479l_ok
480convertTiffMultipageToPdf(const char *filein,
481 const char *fileout)
482{
483l_int32 istiff;
484PIXA *pixa;
485FILE *fp;
486
487 if ((fp = fopenReadStream(filein)) == NULL)
488 return ERROR_INT("file not found", __func__, 1);
489 istiff = fileFormatIsTiff(fp);
490 fclose(fp);
491 if (!istiff)
492 return ERROR_INT("file not tiff format", __func__, 1);
493
494 pixa = pixaReadMultipageTiff(filein);
495 pixaConvertToPdf(pixa, 0, 1.0, 0, 0, "weasel2", fileout);
496 pixaDestroy(&pixa);
497 return 0;
498}
499
500
501/*---------------------------------------------------------------------*
502 * CID-based operations *
503 *---------------------------------------------------------------------*/
531l_ok
532l_generateCIDataForPdf(const char *fname,
533 PIX *pix,
534 l_int32 quality,
535 L_COMP_DATA **pcid)
536{
537l_int32 format, type;
538L_COMP_DATA *cid;
539PIX *pixt;
540
541 if (!pcid)
542 return ERROR_INT("&cid not defined", __func__, 1);
543 *pcid = cid = NULL;
544 if (!fname && !pix)
545 return ERROR_INT("neither fname nor pix are defined", __func__, 1);
546
547 /* If a compressed file is given that is not 'stdin', see if we
548 * can generate the pdf output without transcoding. */
549 if (fname && strcmp(fname, "-") != 0 && strcmp(fname, "stdin") != 0) {
550 findFileFormat(fname, &format);
551 if (format == IFF_UNKNOWN)
552 L_WARNING("file %s format is unknown\n", __func__, fname);
553 if (format == IFF_PS || format == IFF_LPDF) {
554 L_ERROR("file %s is unsupported format %d\n",
555 __func__, fname, format);
556 return 1;
557 }
558 if (format == IFF_JFIF_JPEG) {
559 cid = l_generateJpegData(fname, 0);
560 } else if (format == IFF_JP2) {
561 cid = l_generateJp2kData(fname);
562 } else if (format == IFF_PNG) {
563 cid = l_generateFlateDataPdf(fname, pix);
564 }
565 }
566
567 /* Otherwise, use the pix to generate the pdf output */
568 if (!cid) {
569 if (!pix)
570 pixt = pixRead(fname);
571 else
572 pixt = pixClone(pix);
573 if (!pixt)
574 return ERROR_INT("pixt not made", __func__, 1);
575 if (selectDefaultPdfEncoding(pixt, &type)) {
576 pixDestroy(&pixt);
577 return 1;
578 }
579 pixGenerateCIData(pixt, type, quality, 0, &cid);
580 pixDestroy(&pixt);
581 if (!cid)
582 return ERROR_INT("cid not made from pix", __func__, 1);
583 }
584 *pcid = cid;
585 return 0;
586}
587
588
613l_ok
614l_generateCIData(const char *fname,
615 l_int32 type,
616 l_int32 quality,
617 l_int32 ascii85,
618 L_COMP_DATA **pcid)
619{
620l_int32 format, d, bps, spp, iscmap;
621L_COMP_DATA *cid;
622PIX *pix;
623
624 if (!pcid)
625 return ERROR_INT("&cid not defined", __func__, 1);
626 *pcid = NULL;
627 if (!fname)
628 return ERROR_INT("fname not defined", __func__, 1);
629 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
630 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE)
631 return ERROR_INT("invalid conversion type", __func__, 1);
632 if (ascii85 != 0 && ascii85 != 1)
633 return ERROR_INT("invalid ascii85", __func__, 1);
634
635 /* Sanity check on requested encoding */
636 pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap);
637 d = bps * spp;
638 if (d == 24) d = 32;
639 if (iscmap && type != L_FLATE_ENCODE) {
640 L_WARNING("pixs has cmap; using flate encoding\n", __func__);
641 type = L_FLATE_ENCODE;
642 } else if (d < 8 && type == L_JPEG_ENCODE) {
643 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
644 type = L_FLATE_ENCODE;
645 } else if (d < 8 && type == L_JP2K_ENCODE) {
646 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
647 type = L_FLATE_ENCODE;
648 } else if (d > 1 && type == L_G4_ENCODE) {
649 L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__);
650 type = L_FLATE_ENCODE;
651 }
652
653 if (type == L_JPEG_ENCODE) {
654 if (format == IFF_JFIF_JPEG) { /* do not transcode */
655 cid = l_generateJpegData(fname, ascii85);
656 } else {
657 if ((pix = pixRead(fname)) == NULL)
658 return ERROR_INT("pix not returned for JPEG", __func__, 1);
659 cid = pixGenerateJpegData(pix, ascii85, quality);
660 pixDestroy(&pix);
661 }
662 if (!cid)
663 return ERROR_INT("jpeg data not made", __func__, 1);
664 } else if (type == L_JP2K_ENCODE) {
665 if (format == IFF_JP2) { /* do not transcode */
666 cid = l_generateJp2kData(fname);
667 } else {
668 if ((pix = pixRead(fname)) == NULL)
669 return ERROR_INT("pix not returned for JP2K", __func__, 1);
670 cid = pixGenerateJp2kData(pix, quality);
671 pixDestroy(&pix);
672 }
673 if (!cid)
674 return ERROR_INT("jp2k data not made", __func__, 1);
675 } else if (type == L_G4_ENCODE) {
676 if ((pix = pixRead(fname)) == NULL)
677 return ERROR_INT("pix not returned for G4", __func__, 1);
678 cid = pixGenerateG4Data(pix, ascii85);
679 pixDestroy(&pix);
680 if (!cid)
681 return ERROR_INT("g4 data not made", __func__, 1);
682 } else if (type == L_FLATE_ENCODE) {
683 if ((cid = l_generateFlateData(fname, ascii85)) == NULL)
684 return ERROR_INT("flate data not made", __func__, 1);
685 } else {
686 return ERROR_INT("invalid conversion type", __func__, 1);
687 }
688 *pcid = cid;
689
690 return 0;
691}
692
693
694/*---------------------------------------------------------------------*
695 * Low-level CID-based operations *
696 *---------------------------------------------------------------------*/
715L_COMP_DATA *
716l_generateFlateDataPdf(const char *fname,
717 PIX *pixs)
718{
719l_uint8 *pngcomp = NULL; /* entire PNG compressed file */
720l_uint8 *datacomp = NULL; /* gzipped raster data */
721l_uint8 *cmapdata = NULL; /* uncompressed colormap */
722char *cmapdatahex = NULL; /* hex ascii uncompressed colormap */
723l_uint32 i, j, n;
724l_int32 format, interlaced;
725l_int32 ncolors; /* in colormap */
726l_int32 bps; /* bits/sample: usually 8 */
727l_int32 spp; /* samples/pixel: 1-grayscale/cmap); 3-rgb; 4-rgba */
728l_int32 w, h, cmapflag;
729l_int32 xres, yres;
730size_t nbytescomp = 0, nbytespng = 0;
731FILE *fp;
732L_COMP_DATA *cid;
733PIX *pix;
734PIXCMAP *cmap = NULL;
735
736 if (!fname)
737 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
738
739 findFileFormat(fname, &format);
740 spp = 0; /* init to spp != 4 if not png */
741 interlaced = 0; /* initialize to no interlacing */
742 bps = 0; /* initialize to a nonsense value */
743 if (format == IFF_PNG) {
744 isPngInterlaced(fname, &interlaced);
745 if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL))
746 return (L_COMP_DATA *)ERROR_PTR("bad png input", __func__, NULL);
747 }
748
749 /* PDF is capable of inlining some types of PNG files, but not all
750 of them. We need to transcode anything with interlacing, an
751 alpha channel, or 1 bpp (which would otherwise be photo-inverted).
752
753 Note: any PNG image file with an alpha channel is converted on
754 reading to RGBA (spp == 4). This includes the (gray + alpha) format
755 with spp == 2. Because of the conversion, readHeaderPng() gives
756 spp = 2, whereas pixGetSpp() gives spp = 4 on the converted pix. */
757 if (format != IFF_PNG ||
758 (format == IFF_PNG && (interlaced || bps == 1 || spp == 4 || spp == 2)))
759 { /* lgtm+ analyzer needed the logic expanded */
760 if (!pixs)
761 pix = pixRead(fname);
762 else
763 pix = pixClone(pixs);
764 if (!pix)
765 return (L_COMP_DATA *)ERROR_PTR("pix not made", __func__, NULL);
766 cid = pixGenerateFlateData(pix, 0);
767 pixDestroy(&pix);
768 return cid;
769 }
770
771 /* It's png. Generate the pdf data without transcoding.
772 * Implementation by Jeff Breidenbach.
773 * First, read the metadata */
774 if ((fp = fopenReadStream(fname)) == NULL)
775 return (L_COMP_DATA *)ERROR_PTR("stream not opened", __func__, NULL);
776 freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag);
777 fgetPngResolution(fp, &xres, &yres);
778 fclose(fp);
779
780 /* We get pdf corruption when inlining the data from 16 bpp png. */
781 if (bps == 16)
782 return l_generateFlateData(fname, 0);
783
784 /* Read the entire png file */
785 if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL)
786 return (L_COMP_DATA *)ERROR_PTR("unable to read file",
787 __func__, NULL);
788
789 /* Extract flate data, copying portions of it to memory, including
790 * the predictor information in a byte at the beginning of each
791 * raster line. The flate data makes up the vast majority of
792 * the png file, so after extraction we expect datacomp to
793 * be nearly full (i.e., nbytescomp will be only slightly less
794 * than nbytespng). Also extract the colormap if present. */
795 if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) {
796 LEPT_FREE(pngcomp);
797 return (L_COMP_DATA *)ERROR_PTR("unable to allocate memory",
798 __func__, NULL);
799 }
800
801 /* Parse the png file. Each chunk consists of:
802 * length: 4 bytes
803 * name: 4 bytes (e.g., "IDAT")
804 * data: n bytes
805 * CRC: 4 bytes
806 * Start at the beginning of the data section of the first chunk,
807 * byte 16, because the png file begins with 8 bytes of header,
808 * followed by the first 8 bytes of the first chunk
809 * (length and name). On each loop, increment by 12 bytes to
810 * skip over the CRC, length and name of the next chunk. */
811 for (i = 16; i < nbytespng; i += 12) { /* do each successive chunk */
812 /* Get the chunk length */
813 n = pngcomp[i - 8] << 24;
814 n += pngcomp[i - 7] << 16;
815 n += pngcomp[i - 6] << 8;
816 n += pngcomp[i - 5] << 0;
817 if (n >= nbytespng - i) { /* "n + i" can overflow */
818 LEPT_FREE(pngcomp);
819 LEPT_FREE(datacomp);
820 pixcmapDestroy(&cmap);
821 L_ERROR("invalid png: i = %d, n = %d, nbytes = %zu\n", __func__,
822 i, n, nbytespng);
823 return NULL;
824 }
825
826 /* Is it a data chunk? */
827 if (memcmp(pngcomp + i - 4, "IDAT", 4) == 0) {
828 memcpy(datacomp + nbytescomp, pngcomp + i, n);
829 nbytescomp += n;
830 }
831
832 /* Is it a palette chunk? */
833 if (cmapflag && !cmap &&
834 memcmp(pngcomp + i - 4, "PLTE", 4) == 0) {
835 if ((n / 3) > (1 << bps)) {
836 LEPT_FREE(pngcomp);
837 LEPT_FREE(datacomp);
838 pixcmapDestroy(&cmap);
839 L_ERROR("invalid png: i = %d, n = %d, cmapsize = %d\n",
840 __func__, i, n, (1 << bps));
841 return NULL;
842 }
843 cmap = pixcmapCreate(bps);
844 for (j = i; j < i + n; j += 3) {
845 pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1],
846 pngcomp[j + 2]);
847 }
848 }
849 i += n; /* move to the end of the data chunk */
850 }
851 LEPT_FREE(pngcomp);
852
853 if (nbytescomp == 0) {
854 LEPT_FREE(datacomp);
855 pixcmapDestroy(&cmap);
856 return (L_COMP_DATA *)ERROR_PTR("invalid PNG file", __func__, NULL);
857 }
858
859 /* Extract and encode the colormap data as hexascii */
860 ncolors = 0;
861 if (cmap) {
862 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
863 pixcmapDestroy(&cmap);
864 if (!cmapdata) {
865 LEPT_FREE(datacomp);
866 return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
867 __func__, NULL);
868 }
869 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
870 LEPT_FREE(cmapdata);
871 }
872
873 /* Note that this is the only situation where the predictor
874 * field of the CID is set to 1. Adobe's predictor values on
875 * p. 76 of pdf_reference_1-7.pdf give 1 for no predictor and
876 * 10-14 for inline predictors, the specifics of which are
877 * ignored by the pdf interpreter, which just needs to know that
878 * the first byte on each compressed scanline is some predictor
879 * whose type can be inferred from the byte itself. */
880 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
881 cid->datacomp = datacomp;
882 cid->type = L_FLATE_ENCODE;
883 cid->cmapdatahex = cmapdatahex;
884 cid->nbytescomp = nbytescomp;
885 cid->ncolors = ncolors;
886 cid->predictor = TRUE;
887 cid->w = w;
888 cid->h = h;
889 cid->bps = bps;
890 cid->spp = spp;
891 cid->res = xres;
892 return cid;
893}
894
895
912L_COMP_DATA *
913l_generateJpegData(const char *fname,
914 l_int32 ascii85flag)
915{
916char *data85 = NULL; /* ascii85 encoded jpeg compressed file */
917l_uint8 *data = NULL;
918l_int32 w, h, xres, yres, bps, spp;
919size_t nbytes, nbytes85;
920L_COMP_DATA *cid;
921FILE *fp;
922
923 if (!fname)
924 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
925
926 if (ascii85flag != 0 && ascii85flag != 1)
927 return (L_COMP_DATA *)ERROR_PTR("wrong ascii85flags", __func__, NULL);
928
929 /* Read the metadata */
930 if (readHeaderJpeg(fname, &w, &h, &spp, NULL, NULL))
931 return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL);
932 bps = 8;
933 if ((fp = fopenReadStream(fname)) == NULL)
934 return (L_COMP_DATA *)ERROR_PTR("stream not opened", __func__, NULL);
935 fgetJpegResolution(fp, &xres, &yres);
936 fclose(fp);
937
938 /* Read the entire jpeg file. The returned jpeg data in memory
939 * starts with ffd8 and ends with ffd9 */
940 if ((data = l_binaryRead(fname, &nbytes)) == NULL)
941 return (L_COMP_DATA *)ERROR_PTR("data not extracted", __func__, NULL);
942
943 /* Optionally, encode the compressed data */
944 if (ascii85flag == 1) {
945 data85 = encodeAscii85(data, nbytes, &nbytes85);
946 LEPT_FREE(data);
947 if (!data85)
948 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
949 else
950 data85[nbytes85 - 1] = '\0'; /* remove the newline */
951 }
952
953 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
954 if (ascii85flag == 0) {
955 cid->datacomp = data;
956 } else { /* ascii85 */
957 cid->data85 = data85;
958 cid->nbytes85 = nbytes85;
959 }
960 cid->type = L_JPEG_ENCODE;
961 cid->nbytescomp = nbytes;
962 cid->w = w;
963 cid->h = h;
964 cid->bps = bps;
965 cid->spp = spp;
966 cid->res = xres;
967 return cid;
968}
969
970
986L_COMP_DATA *
988 size_t nbytes,
989 l_int32 ascii85flag)
990{
991char *data85 = NULL; /* ascii85 encoded jpeg compressed file */
992l_int32 w, h, xres, yres, bps, spp;
993size_t nbytes85;
994L_COMP_DATA *cid;
995
996 if (!data)
997 return (L_COMP_DATA *)ERROR_PTR("data not defined", __func__, NULL);
998
999 /* Read the metadata */
1000 if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) {
1001 LEPT_FREE(data);
1002 return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL);
1003 }
1004 bps = 8;
1005 readResolutionMemJpeg(data, nbytes, &xres, &yres);
1006
1007 /* Optionally, encode the compressed data */
1008 if (ascii85flag == 1) {
1009 data85 = encodeAscii85(data, nbytes, &nbytes85);
1010 LEPT_FREE(data);
1011 if (!data85)
1012 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
1013 else
1014 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1015 }
1016
1017 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1018 if (ascii85flag == 0) {
1019 cid->datacomp = data;
1020 } else { /* ascii85 */
1021 cid->data85 = data85;
1022 cid->nbytes85 = nbytes85;
1023 }
1024 cid->type = L_JPEG_ENCODE;
1025 cid->nbytescomp = nbytes;
1026 cid->w = w;
1027 cid->h = h;
1028 cid->bps = bps;
1029 cid->spp = spp;
1030 cid->res = xres;
1031 return cid;
1032}
1033
1034
1046static L_COMP_DATA *
1047l_generateJp2kData(const char *fname)
1048{
1049l_int32 w, h, bps, spp, xres, yres;
1050size_t nbytes;
1051L_COMP_DATA *cid;
1052FILE *fp;
1053
1054 if (!fname)
1055 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1056
1057 if (readHeaderJp2k(fname, &w, &h, &bps, &spp, NULL))
1058 return (L_COMP_DATA *)ERROR_PTR("bad jp2k metadata", __func__, NULL);
1059
1060 /* The returned jp2k data in memory is the entire jp2k file */
1061 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1062 if ((cid->datacomp = l_binaryRead(fname, &nbytes)) == NULL) {
1063 l_CIDataDestroy(&cid);
1064 return (L_COMP_DATA *)ERROR_PTR("data not extracted", __func__, NULL);
1065 }
1066
1067 xres = yres = 0;
1068 if ((fp = fopenReadStream(fname)) != NULL) {
1069 fgetJp2kResolution(fp, &xres, &yres);
1070 fclose(fp);
1071 }
1072 cid->type = L_JP2K_ENCODE;
1073 cid->nbytescomp = nbytes;
1074 cid->w = w;
1075 cid->h = h;
1076 cid->bps = bps;
1077 cid->spp = spp;
1078 cid->res = xres;
1079 return cid;
1080}
1081
1082
1098L_COMP_DATA *
1099l_generateG4Data(const char *fname,
1100 l_int32 ascii85flag)
1101{
1102l_uint8 *datacomp = NULL; /* g4 compressed raster data */
1103char *data85 = NULL; /* ascii85 encoded g4 compressed data */
1104l_int32 w, h, xres, yres, npages;
1105l_int32 minisblack; /* TRUE or FALSE */
1106size_t nbytes85, nbytescomp;
1107L_COMP_DATA *cid;
1108FILE *fp;
1109
1110 if (!fname)
1111 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1112
1113 /* Make sure this is a single page tiff file */
1114 if ((fp = fopenReadStream(fname)) == NULL)
1115 return (L_COMP_DATA *)ERROR_PTR("stream not opened", __func__, NULL);
1116 tiffGetCount(fp, &npages);
1117 fclose(fp);
1118 if (npages != 1) {
1119 L_ERROR(" %d page tiff; only works with 1 page\n", __func__, npages);
1120 return NULL;
1121 }
1122
1123 /* Read the resolution */
1124 if ((fp = fopenReadStream(fname)) == NULL)
1125 return (L_COMP_DATA *)ERROR_PTR("stream not opened", __func__, NULL);
1126 getTiffResolution(fp, &xres, &yres);
1127 fclose(fp);
1128
1129 /* The returned ccitt g4 data in memory is the block of
1130 * bytes in the tiff file, starting after 8 bytes and
1131 * ending before the directory. */
1132 if (extractG4DataFromFile(fname, &datacomp, &nbytescomp,
1133 &w, &h, &minisblack)) {
1134 return (L_COMP_DATA *)ERROR_PTR("datacomp not extracted",
1135 __func__, NULL);
1136 }
1137
1138 /* Optionally, encode the compressed data */
1139 if (ascii85flag == 1) {
1140 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1141 LEPT_FREE(datacomp);
1142 if (!data85)
1143 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
1144 else
1145 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1146 }
1147
1148 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1149 if (ascii85flag == 0) {
1150 cid->datacomp = datacomp;
1151 } else { /* ascii85 */
1152 cid->data85 = data85;
1153 cid->nbytes85 = nbytes85;
1154 }
1155 cid->type = L_G4_ENCODE;
1156 cid->nbytescomp = nbytescomp;
1157 cid->w = w;
1158 cid->h = h;
1159 cid->bps = 1;
1160 cid->spp = 1;
1161 cid->minisblack = minisblack;
1162 cid->res = xres;
1163 return cid;
1164}
1165
1166
1187l_ok
1189 l_int32 type,
1190 l_int32 quality,
1191 l_int32 ascii85,
1192 L_COMP_DATA **pcid)
1193{
1194l_int32 w, h, d, maxAsp;
1195PIXCMAP *cmap;
1196
1197 if (!pcid)
1198 return ERROR_INT("&cid not defined", __func__, 1);
1199 *pcid = NULL;
1200 if (!pixs)
1201 return ERROR_INT("pixs not defined", __func__, 1);
1202 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1203 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
1204 selectDefaultPdfEncoding(pixs, &type);
1205 }
1206 if (ascii85 != 0 && ascii85 != 1)
1207 return ERROR_INT("invalid ascii85", __func__, 1);
1208 pixGetDimensions(pixs, &w, &h, NULL);
1209 if (w == 0 || h == 0)
1210 return ERROR_INT("invalid w or h", __func__, 1);
1211 maxAsp = L_MAX(w / h, h / w);
1212 if (maxAsp > 10)
1213 return ERROR_INT("max asperity > 10", __func__, 1);
1214
1215 /* Conditionally modify the encoding type if libz is
1216 * available and the requested library is missing. */
1217#if defined(HAVE_LIBZ)
1218# if !defined(HAVE_LIBJPEG)
1219 if (type == L_JPEG_ENCODE) {
1220 L_WARNING("no libjpeg; using flate encoding\n", __func__);
1221 type = L_FLATE_ENCODE;
1222 }
1223# endif /* !defined(HAVE_LIBJPEG) */
1224# if !defined(HAVE_LIBJP2K)
1225 if (type == L_JP2K_ENCODE) {
1226 L_WARNING("no libjp2k; using flate encoding\n", __func__);
1227 type = L_FLATE_ENCODE;
1228 }
1229# endif /* !defined(HAVE_LIBJP2K) */
1230# if !defined(HAVE_LIBTIFF)
1231 if (type == L_G4_ENCODE) {
1232 L_WARNING("no libtiff; using flate encoding\n", __func__);
1233 type = L_FLATE_ENCODE;
1234 }
1235# endif /* !defined(HAVE_LIBTIFF) */
1236#endif /* defined(HAVE_LIBZ) */
1237
1238 /* Sanity check on requested encoding */
1239 d = pixGetDepth(pixs);
1240 cmap = pixGetColormap(pixs);
1241 if (cmap && type != L_FLATE_ENCODE) {
1242 L_WARNING("pixs has cmap; using flate encoding\n", __func__);
1243 type = L_FLATE_ENCODE;
1244 } else if (d < 8 && (type == L_JPEG_ENCODE || type == L_JP2K_ENCODE)) {
1245 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
1246 type = L_FLATE_ENCODE;
1247 } else if (d > 1 && type == L_G4_ENCODE) {
1248 L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__);
1249 type = L_FLATE_ENCODE;
1250 }
1251
1252 if (type == L_JPEG_ENCODE) {
1253 if ((*pcid = pixGenerateJpegData(pixs, ascii85, quality)) == NULL)
1254 return ERROR_INT("jpeg data not made", __func__, 1);
1255 } else if (type == L_JP2K_ENCODE) {
1256 if ((*pcid = pixGenerateJp2kData(pixs, quality)) == NULL)
1257 return ERROR_INT("jp2k data not made", __func__, 1);
1258 } else if (type == L_G4_ENCODE) {
1259 if ((*pcid = pixGenerateG4Data(pixs, ascii85)) == NULL)
1260 return ERROR_INT("g4 data not made", __func__, 1);
1261 } else { /* type == L_FLATE_ENCODE */
1262 if ((*pcid = pixGenerateFlateData(pixs, ascii85)) == NULL)
1263 return ERROR_INT("flate data not made", __func__, 1);
1264 }
1265 return 0;
1266}
1267
1268
1289L_COMP_DATA *
1290l_generateFlateData(const char *fname,
1291 l_int32 ascii85flag)
1292{
1293L_COMP_DATA *cid;
1294PIX *pixs;
1295
1296 if (!fname)
1297 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1298
1299 if ((pixs = pixRead(fname)) == NULL)
1300 return (L_COMP_DATA *)ERROR_PTR("pixs not made", __func__, NULL);
1301 cid = pixGenerateFlateData(pixs, ascii85flag);
1302 pixDestroy(&pixs);
1303 return cid;
1304}
1305
1306
1324static L_COMP_DATA *
1326 l_int32 ascii85flag)
1327{
1328l_uint8 *data = NULL; /* uncompressed raster data in required format */
1329l_uint8 *datacomp = NULL; /* gzipped raster data */
1330char *data85 = NULL; /* ascii85 encoded gzipped raster data */
1331l_uint8 *cmapdata = NULL; /* uncompressed colormap */
1332char *cmapdata85 = NULL; /* ascii85 encoded uncompressed colormap */
1333char *cmapdatahex = NULL; /* hex ascii uncompressed colormap */
1334l_int32 ncolors; /* in colormap; not used if cmapdata85 is null */
1335l_int32 bps; /* bits/sample: usually 8 */
1336l_int32 spp; /* samples/pixel: 1-grayscale/cmap); 3-rgb */
1337l_int32 w, h, d, cmapflag;
1338size_t ncmapbytes85 = 0;
1339size_t nbytes85 = 0;
1340size_t nbytes, nbytescomp;
1341L_COMP_DATA *cid;
1342PIX *pixt;
1343PIXCMAP *cmap;
1344
1345 if (!pixs)
1346 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1347
1348 /* Convert the image to one of these 4 types:
1349 * 1 bpp
1350 * 8 bpp, no colormap
1351 * 8 bpp, colormap
1352 * 32 bpp rgb */
1353 pixGetDimensions(pixs, &w, &h, &d);
1354 cmap = pixGetColormap(pixs);
1355 cmapflag = (cmap) ? 1 : 0;
1356 if (d == 2 || d == 4 || d == 16) {
1357 pixt = pixConvertTo8(pixs, cmapflag);
1358 cmap = pixGetColormap(pixt);
1359 d = pixGetDepth(pixt);
1360 } else if (d == 32 && pixGetSpp(pixs) == 4) { /* remove alpha */
1361 pixt = pixAlphaBlendUniform(pixs, 0xffffff00);
1362 } else {
1363 pixt = pixClone(pixs);
1364 }
1365 if (!pixt)
1366 return (L_COMP_DATA *)ERROR_PTR("pixt not made", __func__, NULL);
1367 spp = (d == 32) ? 3 : 1;
1368 bps = (d == 32) ? 8 : d;
1369
1370 /* Extract and encode the colormap data as both ascii85 and hexascii */
1371 ncolors = 0;
1372 if (cmap) {
1373 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
1374 if (!cmapdata) {
1375 pixDestroy(&pixt);
1376 return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
1377 __func__, NULL);
1378 }
1379
1380 cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85);
1381 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
1382 LEPT_FREE(cmapdata);
1383 }
1384
1385 /* Extract and compress the raster data */
1386 pixGetRasterData(pixt, &data, &nbytes);
1387 pixDestroy(&pixt);
1388 if (!data) {
1389 LEPT_FREE(cmapdata85);
1390 LEPT_FREE(cmapdatahex);
1391 return (L_COMP_DATA *)ERROR_PTR("data not returned", __func__, NULL);
1392 }
1393 datacomp = zlibCompress(data, nbytes, &nbytescomp);
1394 LEPT_FREE(data);
1395 if (!datacomp) {
1396 LEPT_FREE(cmapdata85);
1397 LEPT_FREE(cmapdatahex);
1398 return (L_COMP_DATA *)ERROR_PTR("datacomp not made", __func__, NULL);
1399 }
1400
1401 /* Optionally, encode the compressed data */
1402 if (ascii85flag == 1) {
1403 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1404 LEPT_FREE(datacomp);
1405 if (!data85) {
1406 LEPT_FREE(cmapdata85);
1407 LEPT_FREE(cmapdatahex);
1408 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
1409 } else {
1410 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1411 }
1412 }
1413
1414 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1415 if (ascii85flag == 0) {
1416 cid->datacomp = datacomp;
1417 } else { /* ascii85 */
1418 cid->data85 = data85;
1419 cid->nbytes85 = nbytes85;
1420 }
1421 cid->type = L_FLATE_ENCODE;
1422 cid->cmapdatahex = cmapdatahex;
1423 cid->cmapdata85 = cmapdata85;
1424 cid->nbytescomp = nbytescomp;
1425 cid->ncolors = ncolors;
1426 cid->w = w;
1427 cid->h = h;
1428 cid->bps = bps;
1429 cid->spp = spp;
1430 cid->res = pixGetXRes(pixs);
1431 cid->nbytes = nbytes; /* only for debugging */
1432 return cid;
1433}
1434
1435
1452static L_COMP_DATA *
1454 l_int32 ascii85flag,
1455 l_int32 quality)
1456{
1457l_int32 d;
1458char *fname;
1459L_COMP_DATA *cid;
1460
1461 if (!pixs)
1462 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1463 if (pixGetColormap(pixs))
1464 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1465 d = pixGetDepth(pixs);
1466 if (d != 8 && d != 16 && d != 32)
1467 return (L_COMP_DATA *)ERROR_PTR("pixs not 8, 16 or 32 bpp",
1468 __func__, NULL);
1469
1470 /* Compress to a temp jpeg file */
1471 fname = l_makeTempFilename();
1472 if (pixWriteJpeg(fname, pixs, quality, 0)) {
1473 LEPT_FREE(fname);
1474 return NULL;
1475 }
1476
1477 /* Generate the data */
1478 cid = l_generateJpegData(fname, ascii85flag);
1479 if (lept_rmfile(fname) != 0)
1480 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1481 LEPT_FREE(fname);
1482 return cid;
1483}
1484
1485
1500static L_COMP_DATA *
1502 l_int32 quality)
1503{
1504l_int32 d;
1505char *fname;
1506L_COMP_DATA *cid;
1507
1508 if (!pixs)
1509 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1510 if (pixGetColormap(pixs))
1511 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1512 d = pixGetDepth(pixs);
1513 if (d != 8 && d != 32)
1514 return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", __func__, NULL);
1515
1516 /* Compress to a temp jp2k file */
1517 fname = l_makeTempFilename();
1518 if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) {
1519 LEPT_FREE(fname);
1520 return NULL;
1521 }
1522
1523 /* Generate the data */
1524 cid = l_generateJp2kData(fname);
1525 if (lept_rmfile(fname) != 0)
1526 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1527 LEPT_FREE(fname);
1528 return cid;
1529}
1530
1531
1546static L_COMP_DATA *
1548 l_int32 ascii85flag)
1549{
1550char *fname;
1551L_COMP_DATA *cid;
1552
1553 if (!pixs)
1554 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1555 if (pixGetDepth(pixs) != 1)
1556 return (L_COMP_DATA *)ERROR_PTR("pixs not 1 bpp", __func__, NULL);
1557 if (pixGetColormap(pixs))
1558 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1559
1560 /* Compress to a temp tiff g4 file */
1561 fname = l_makeTempFilename();
1562 if (pixWrite(fname, pixs, IFF_TIFF_G4)) {
1563 LEPT_FREE(fname);
1564 return NULL;
1565 }
1566
1567 cid = l_generateG4Data(fname, ascii85flag);
1568 if (lept_rmfile(fname) != 0)
1569 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1570 LEPT_FREE(fname);
1571 return cid;
1572}
1573
1574
1590l_ok
1591cidConvertToPdfData(L_COMP_DATA *cid,
1592 const char *title,
1593 l_uint8 **pdata,
1594 size_t *pnbytes)
1595{
1596l_int32 res, ret;
1597l_float32 wpt, hpt;
1598L_PDF_DATA *lpd = NULL;
1599
1600 if (!pdata || !pnbytes)
1601 return ERROR_INT("&data and &nbytes not both defined", __func__, 1);
1602 *pdata = NULL;
1603 *pnbytes = 0;
1604 if (!cid)
1605 return ERROR_INT("cid not defined", __func__, 1);
1606
1607 /* Get media box parameters, in pts */
1608 res = cid->res;
1609 if (res <= 0)
1610 res = DefaultInputRes;
1611 wpt = cid->w * 72. / res;
1612 hpt = cid->h * 72. / res;
1613
1614 /* Set up the pdf data struct (lpd) */
1615 if ((lpd = pdfdataCreate(title)) == NULL)
1616 return ERROR_INT("lpd not made", __func__, 1);
1617 ptraAdd(lpd->cida, cid);
1618 lpd->n++;
1619 ptaAddPt(lpd->xy, 0, 0); /* xpt = ypt = 0 */
1620 ptaAddPt(lpd->wh, wpt, hpt);
1621
1622 /* Generate the pdf string and destroy the lpd */
1623 ret = l_generatePdf(pdata, pnbytes, lpd);
1624 pdfdataDestroy(&lpd);
1625 if (ret)
1626 return ERROR_INT("pdf output not made", __func__, 1);
1627 return 0;
1628}
1629
1630
1637void
1638l_CIDataDestroy(L_COMP_DATA **pcid)
1639{
1640L_COMP_DATA *cid;
1641
1642 if (pcid == NULL) {
1643 L_WARNING("ptr address is null!\n", __func__);
1644 return;
1645 }
1646 if ((cid = *pcid) == NULL)
1647 return;
1648
1649 if (cid->datacomp) LEPT_FREE(cid->datacomp);
1650 if (cid->data85) LEPT_FREE(cid->data85);
1651 if (cid->cmapdata85) LEPT_FREE(cid->cmapdata85);
1652 if (cid->cmapdatahex) LEPT_FREE(cid->cmapdatahex);
1653 LEPT_FREE(cid);
1654 *pcid = NULL;
1655}
1656
1657
1658/*---------------------------------------------------------------------*
1659 * Helper functions for generating the output pdf string *
1660 *---------------------------------------------------------------------*/
1682static l_int32
1683l_generatePdf(l_uint8 **pdata,
1684 size_t *pnbytes,
1685 L_PDF_DATA *lpd)
1686{
1687 if (!pdata)
1688 return ERROR_INT("&data not defined", __func__, 1);
1689 *pdata = NULL;
1690 if (!pnbytes)
1691 return ERROR_INT("&nbytes not defined", __func__, 1);
1692 *pnbytes = 0;
1693 if (!lpd)
1694 return ERROR_INT("lpd not defined", __func__, 1);
1695
1696 generateFixedStringsPdf(lpd);
1697 generateMediaboxPdf(lpd);
1698 generatePageStringPdf(lpd);
1699 generateContentStringPdf(lpd);
1700 generatePreXStringsPdf(lpd);
1701 generateColormapStringsPdf(lpd);
1702 generateTrailerPdf(lpd);
1703 return generateOutputDataPdf(pdata, pnbytes, lpd);
1704}
1705
1706
1707static void
1708generateFixedStringsPdf(L_PDF_DATA *lpd)
1709{
1710char buf[L_SMALLBUF];
1711char *version, *datestr;
1712SARRAY *sa;
1713
1714 /* Accumulate data for the header and objects 1-3 */
1715 lpd->id = stringNew("%PDF-1.5\n");
1716 l_dnaAddNumber(lpd->objsize, strlen(lpd->id));
1717
1718 lpd->obj1 = stringNew("1 0 obj\n"
1719 "<<\n"
1720 "/Type /Catalog\n"
1721 "/Pages 3 0 R\n"
1722 ">>\n"
1723 "endobj\n");
1724 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj1));
1725
1726 sa = sarrayCreate(0);
1727 sarrayAddString(sa, "2 0 obj\n"
1728 "<<\n", L_COPY);
1729 if (var_WRITE_DATE_AND_VERSION) {
1730 datestr = l_getFormattedDate();
1731 snprintf(buf, sizeof(buf), "/CreationDate (D:%s)\n", datestr);
1732 sarrayAddString(sa, buf, L_COPY);
1733 LEPT_FREE(datestr);
1734 version = getLeptonicaVersion();
1735 snprintf(buf, sizeof(buf),
1736 "/Producer (leptonica: %s)\n", version);
1737 LEPT_FREE(version);
1738 } else {
1739 snprintf(buf, sizeof(buf), "/Producer (leptonica)\n");
1740 }
1741 sarrayAddString(sa, buf, L_COPY);
1742 if (lpd->title) {
1743 char *hexstr;
1744 if ((hexstr = generateEscapeString(lpd->title)) != NULL) {
1745 snprintf(buf, sizeof(buf), "/Title %s\n", hexstr);
1746 sarrayAddString(sa, buf, L_COPY);
1747 } else {
1748 L_ERROR("title string is not ascii\n", __func__);
1749 }
1750 LEPT_FREE(hexstr);
1751 }
1752 sarrayAddString(sa, ">>\n"
1753 "endobj\n", L_COPY);
1754 lpd->obj2 = sarrayToString(sa, 0);
1755 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj2));
1756 sarrayDestroy(&sa);
1757
1758 lpd->obj3 = stringNew("3 0 obj\n"
1759 "<<\n"
1760 "/Type /Pages\n"
1761 "/Kids [ 4 0 R ]\n"
1762 "/Count 1\n"
1763 ">>\n");
1764 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj3));
1765
1766 /* Do the post-datastream string */
1767 lpd->poststream = stringNew("\n"
1768 "endstream\n"
1769 "endobj\n");
1770}
1771
1772
1790static char *
1791generateEscapeString(const char *str)
1792{
1793char smallbuf[8];
1794char *buffer;
1795l_int32 i, nchar, buflen;
1796
1797 if (!str)
1798 return (char *)ERROR_PTR("str not defined", __func__, NULL);
1799 nchar = strlen(str);
1800 for (i = 0; i < nchar; i++) {
1801 if (str[i] < 0)
1802 return (char *)ERROR_PTR("str not all ascii", __func__, NULL);
1803 }
1804
1805 buflen = 4 * nchar + 10;
1806 buffer = (char *)LEPT_CALLOC(buflen, sizeof(char));
1807 stringCat(buffer, buflen, "<feff");
1808 for (i = 0; i < nchar; i++) {
1809 snprintf(smallbuf, sizeof(smallbuf), "%04x", str[i]);
1810 stringCat(buffer, buflen, smallbuf);
1811 }
1812 stringCat(buffer, buflen, ">");
1813 return buffer;
1814}
1815
1816
1817static void
1818generateMediaboxPdf(L_PDF_DATA *lpd)
1819{
1820l_int32 i;
1821l_float32 xpt, ypt, wpt, hpt, maxx, maxy;
1822
1823 /* First get the full extent of all the images.
1824 * This is the mediabox, in pts. */
1825 maxx = maxy = 0;
1826 for (i = 0; i < lpd->n; i++) {
1827 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1828 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1829 maxx = L_MAX(maxx, xpt + wpt);
1830 maxy = L_MAX(maxy, ypt + hpt);
1831 }
1832
1833 lpd->mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5),
1834 (l_int32)(maxy + 0.5));
1835
1836 /* ypt is in standard image coordinates: the location of
1837 * the UL image corner with respect to the UL media box corner.
1838 * Rewrite each ypt for PostScript coordinates: the location of
1839 * the LL image corner with respect to the LL media box corner. */
1840 for (i = 0; i < lpd->n; i++) {
1841 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1842 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1843 ptaSetPt(lpd->xy, i, xpt, maxy - ypt - hpt);
1844 }
1845}
1846
1847
1848static l_int32
1849generatePageStringPdf(L_PDF_DATA *lpd)
1850{
1851char *buf;
1852char *xstr;
1853l_int32 bufsize, i, wpt, hpt;
1854SARRAY *sa;
1855
1856 /* Allocate 1000 bytes for the boilerplate text, and
1857 * 50 bytes for each reference to an image in the
1858 * ProcSet array. */
1859 bufsize = 1000 + 50 * lpd->n;
1860 if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
1861 return ERROR_INT("calloc fail for buf", __func__, 1);
1862
1863 boxGetGeometry(lpd->mediabox, NULL, NULL, &wpt, &hpt);
1864 sa = sarrayCreate(lpd->n);
1865 for (i = 0; i < lpd->n; i++) {
1866 snprintf(buf, bufsize, "/Im%d %d 0 R ", i + 1, 6 + i);
1867 sarrayAddString(sa, buf, L_COPY);
1868 }
1869 xstr = sarrayToString(sa, 0);
1870 sarrayDestroy(&sa);
1871 if (!xstr) {
1872 LEPT_FREE(buf);
1873 return ERROR_INT("xstr not made", __func__, 1);
1874 }
1875
1876 snprintf(buf, bufsize, "4 0 obj\n"
1877 "<<\n"
1878 "/Type /Page\n"
1879 "/Parent 3 0 R\n"
1880 "/MediaBox [%d %d %d %d]\n"
1881 "/Contents 5 0 R\n"
1882 "/Resources\n"
1883 "<<\n"
1884 "/XObject << %s >>\n"
1885 "/ProcSet [ /ImageB /ImageI /ImageC ]\n"
1886 ">>\n"
1887 ">>\n"
1888 "endobj\n",
1889 0, 0, wpt, hpt, xstr);
1890
1891 lpd->obj4 = stringNew(buf);
1892 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj4));
1893 sarrayDestroy(&sa);
1894 LEPT_FREE(buf);
1895 LEPT_FREE(xstr);
1896 return 0;
1897}
1898
1899
1900static l_int32
1901generateContentStringPdf(L_PDF_DATA *lpd)
1902{
1903char *buf;
1904char *cstr;
1905l_int32 i, bufsize;
1906l_float32 xpt, ypt, wpt, hpt;
1907SARRAY *sa;
1908
1909 bufsize = 1000 + 200 * lpd->n;
1910 if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
1911 return ERROR_INT("calloc fail for buf", __func__, 1);
1912
1913 sa = sarrayCreate(lpd->n);
1914 for (i = 0; i < lpd->n; i++) {
1915 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1916 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1917 snprintf(buf, bufsize,
1918 "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n",
1919 wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1);
1920 sarrayAddString(sa, buf, L_COPY);
1921 }
1922 cstr = sarrayToString(sa, 0);
1923 sarrayDestroy(&sa);
1924 if (!cstr) {
1925 LEPT_FREE(buf);
1926 return ERROR_INT("cstr not made", __func__, 1);
1927 }
1928
1929 snprintf(buf, bufsize, "5 0 obj\n"
1930 "<< /Length %d >>\n"
1931 "stream\n"
1932 "%s"
1933 "endstream\n"
1934 "endobj\n",
1935 (l_int32)strlen(cstr), cstr);
1936
1937 lpd->obj5 = stringNew(buf);
1938 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj5));
1939 sarrayDestroy(&sa);
1940 LEPT_FREE(buf);
1941 LEPT_FREE(cstr);
1942 return 0;
1943}
1944
1945
1946static l_int32
1947generatePreXStringsPdf(L_PDF_DATA *lpd)
1948{
1949char buff[256];
1950char buf[L_BIGBUF];
1951char *cstr, *bstr, *fstr, *pstr, *xstr, *photometry;
1952l_int32 i, cmindex;
1953L_COMP_DATA *cid;
1954SARRAY *sa;
1955
1956 sa = lpd->saprex;
1957 cmindex = 6 + lpd->n; /* starting value */
1958 for (i = 0; i < lpd->n; i++) {
1959 pstr = cstr = NULL;
1960 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
1961 return ERROR_INT("cid not found", __func__, 1);
1962
1963 if (cid->type == L_G4_ENCODE) {
1964 if (var_WRITE_G4_IMAGE_MASK) {
1965 cstr = stringNew("/ImageMask true\n"
1966 "/ColorSpace /DeviceGray");
1967 } else {
1968 cstr = stringNew("/ColorSpace /DeviceGray");
1969 }
1970 bstr = stringNew("/BitsPerComponent 1\n"
1971 "/Interpolate true");
1972 /* Note: the reversal is deliberate. The BlackIs1 flag
1973 * is misleadingly named: it says whether to invert the
1974 * image on decoding because the black pixels are 0,
1975 * not whether the black pixels are 1! The default for
1976 * BlackIs1 is "false", which means "don't invert because
1977 * black is 1." Yikes. */
1978 photometry = (cid->minisblack) ? stringNew("true")
1979 : stringNew("false");
1980 snprintf(buff, sizeof(buff),
1981 "/Filter /CCITTFaxDecode\n"
1982 "/DecodeParms\n"
1983 "<<\n"
1984 "/BlackIs1 %s\n"
1985 "/K -1\n"
1986 "/Columns %d\n"
1987 ">>", photometry, cid->w);
1988 fstr = stringNew(buff);
1989 LEPT_FREE(photometry);
1990 } else if (cid->type == L_JPEG_ENCODE) {
1991 if (cid->spp == 1)
1992 cstr = stringNew("/ColorSpace /DeviceGray");
1993 else if (cid->spp == 3)
1994 cstr = stringNew("/ColorSpace /DeviceRGB");
1995 else if (cid->spp == 4) /* pdf supports cmyk */
1996 cstr = stringNew("/ColorSpace /DeviceCMYK");
1997 else
1998 L_ERROR("in jpeg: spp != 1, 3 or 4\n", __func__);
1999 bstr = stringNew("/BitsPerComponent 8");
2000 fstr = stringNew("/Filter /DCTDecode");
2001 } else if (cid->type == L_JP2K_ENCODE) {
2002 if (cid->spp == 1)
2003 cstr = stringNew("/ColorSpace /DeviceGray");
2004 else if (cid->spp == 3)
2005 cstr = stringNew("/ColorSpace /DeviceRGB");
2006 else
2007 L_ERROR("in jp2k: spp != 1 && spp != 3\n", __func__);
2008 bstr = stringNew("/BitsPerComponent 8");
2009 fstr = stringNew("/Filter /JPXDecode");
2010 } else { /* type == L_FLATE_ENCODE */
2011 if (cid->ncolors > 0) { /* cmapped */
2012 snprintf(buff, sizeof(buff), "/ColorSpace %d 0 R", cmindex++);
2013 cstr = stringNew(buff);
2014 } else {
2015 if (cid->spp == 1 && cid->bps == 1)
2016 cstr = stringNew("/ColorSpace /DeviceGray\n"
2017 "/Decode [1 0]");
2018 else if (cid->spp == 1) /* 8 bpp */
2019 cstr = stringNew("/ColorSpace /DeviceGray");
2020 else if (cid->spp == 3)
2021 cstr = stringNew("/ColorSpace /DeviceRGB");
2022 else
2023 L_ERROR("unknown colorspace: spp = %d\n",
2024 __func__, cid->spp);
2025 }
2026 snprintf(buff, sizeof(buff), "/BitsPerComponent %d", cid->bps);
2027 bstr = stringNew(buff);
2028 fstr = stringNew("/Filter /FlateDecode");
2029 if (cid->predictor == TRUE) {
2030 snprintf(buff, sizeof(buff),
2031 "/DecodeParms\n"
2032 "<<\n"
2033 " /Columns %d\n"
2034 " /Predictor 14\n"
2035 " /Colors %d\n"
2036 " /BitsPerComponent %d\n"
2037 ">>\n", cid->w, cid->spp, cid->bps);
2038 pstr = stringNew(buff);
2039 }
2040 }
2041 if (!pstr) /* no decode parameters */
2042 pstr = stringNew("");
2043
2044 snprintf(buf, sizeof(buf),
2045 "%d 0 obj\n"
2046 "<<\n"
2047 "/Length %zu\n"
2048 "/Subtype /Image\n"
2049 "%s\n" /* colorspace */
2050 "/Width %d\n"
2051 "/Height %d\n"
2052 "%s\n" /* bits/component */
2053 "%s\n" /* filter */
2054 "%s" /* decode parms; can be empty */
2055 ">>\n"
2056 "stream\n",
2057 6 + i, cid->nbytescomp, cstr,
2058 cid->w, cid->h, bstr, fstr, pstr);
2059 xstr = stringNew(buf);
2060 sarrayAddString(sa, xstr, L_INSERT);
2061 l_dnaAddNumber(lpd->objsize,
2062 strlen(xstr) + cid->nbytescomp + strlen(lpd->poststream));
2063 LEPT_FREE(cstr);
2064 LEPT_FREE(bstr);
2065 LEPT_FREE(fstr);
2066 LEPT_FREE(pstr);
2067 }
2068
2069 return 0;
2070}
2071
2072
2073static l_int32
2074generateColormapStringsPdf(L_PDF_DATA *lpd)
2075{
2076char buf[L_BIGBUF];
2077char *cmstr;
2078l_int32 i, cmindex, ncmap;
2079L_COMP_DATA *cid;
2080SARRAY *sa;
2081
2082 /* In our canonical format, we have 5 objects, followed
2083 * by n XObjects, followed by m colormaps, so the index of
2084 * the first colormap object is 6 + n. */
2085 sa = lpd->sacmap;
2086 cmindex = 6 + lpd->n; /* starting value */
2087 ncmap = 0;
2088 for (i = 0; i < lpd->n; i++) {
2089 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
2090 return ERROR_INT("cid not found", __func__, 1);
2091 if (cid->ncolors == 0) continue;
2092
2093 ncmap++;
2094 snprintf(buf, sizeof(buf), "%d 0 obj\n"
2095 "[ /Indexed /DeviceRGB\n"
2096 "%d\n"
2097 "%s\n"
2098 "]\n"
2099 "endobj\n",
2100 cmindex, cid->ncolors - 1, cid->cmapdatahex);
2101 cmindex++;
2102 cmstr = stringNew(buf);
2103 l_dnaAddNumber(lpd->objsize, strlen(cmstr));
2104 sarrayAddString(sa, cmstr, L_INSERT);
2105 }
2106
2107 lpd->ncmap = ncmap;
2108 return 0;
2109}
2110
2111
2112static void
2113generateTrailerPdf(L_PDF_DATA *lpd)
2114{
2115l_int32 i, n, size, linestart;
2116L_DNA *daloc, *dasize;
2117
2118 /* Let nobj be the number of numbered objects. These numbered
2119 * objects are indexed by their pdf number in arrays naloc[]
2120 * and nasize[]. The 0th object is the 9 byte header. Then
2121 * the number of objects in nasize, which includes the header,
2122 * is n = nobj + 1. The array naloc[] has n + 1 elements,
2123 * because it includes as the last element the starting
2124 * location of xref. The indexing of these objects, their
2125 * starting locations and sizes are:
2126 *
2127 * Object number Starting location Size
2128 * ------------- ----------------- --------------
2129 * 0 daloc[0] = 0 dasize[0] = 9
2130 * 1 daloc[1] = 9 dasize[1] = 49
2131 * n daloc[n] dasize[n]
2132 * xref daloc[n+1]
2133 *
2134 * We first generate daloc.
2135 */
2136 dasize = lpd->objsize;
2137 daloc = lpd->objloc;
2138 linestart = 0;
2139 l_dnaAddNumber(daloc, linestart); /* header */
2140 n = l_dnaGetCount(dasize);
2141 for (i = 0; i < n; i++) {
2142 l_dnaGetIValue(dasize, i, &size);
2143 linestart += size;
2144 l_dnaAddNumber(daloc, linestart);
2145 }
2146 l_dnaGetIValue(daloc, n, &lpd->xrefloc); /* save it */
2147
2148 /* Now make the actual trailer string */
2149 lpd->trailer = makeTrailerStringPdf(daloc);
2150}
2151
2152
2153static char *
2154makeTrailerStringPdf(L_DNA *daloc)
2155{
2156char *outstr;
2157char buf[L_BIGBUF];
2158l_int32 i, n, linestart, xrefloc;
2159SARRAY *sa;
2160
2161 if (!daloc)
2162 return (char *)ERROR_PTR("daloc not defined", __func__, NULL);
2163 n = l_dnaGetCount(daloc) - 1; /* numbered objects + 1 (yes, +1) */
2164
2165 sa = sarrayCreate(0);
2166 snprintf(buf, sizeof(buf), "xref\n"
2167 "0 %d\n"
2168 "0000000000 65535 f \n", n);
2169 sarrayAddString(sa, buf, L_COPY);
2170 for (i = 1; i < n; i++) {
2171 l_dnaGetIValue(daloc, i, &linestart);
2172 snprintf(buf, sizeof(buf), "%010d 00000 n \n", linestart);
2173 sarrayAddString(sa, buf, L_COPY);
2174 }
2175
2176 l_dnaGetIValue(daloc, n, &xrefloc);
2177 snprintf(buf, sizeof(buf), "trailer\n"
2178 "<<\n"
2179 "/Size %d\n"
2180 "/Root 1 0 R\n"
2181 "/Info 2 0 R\n"
2182 ">>\n"
2183 "startxref\n"
2184 "%d\n"
2185 "%%%%EOF\n", n, xrefloc);
2186 sarrayAddString(sa, buf, L_COPY);
2187 outstr = sarrayToString(sa, 0);
2188 sarrayDestroy(&sa);
2189 return outstr;
2190}
2191
2192
2206static l_int32
2208 size_t *pnbytes,
2209 L_PDF_DATA *lpd)
2210{
2211char *str;
2212l_uint8 *data;
2213l_int32 nimages, i, len;
2214l_int32 *sizes, *locs;
2215size_t nbytes;
2216L_COMP_DATA *cid;
2217
2218 if (!pdata)
2219 return ERROR_INT("&data not defined", __func__, 1);
2220 *pdata = NULL;
2221 if (!pnbytes)
2222 return ERROR_INT("&nbytes not defined", __func__, 1);
2223 nbytes = lpd->xrefloc + strlen(lpd->trailer);
2224 *pnbytes = nbytes;
2225 if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL)
2226 return ERROR_INT("calloc fail for data", __func__, 1);
2227 *pdata = data;
2228
2229 sizes = l_dnaGetIArray(lpd->objsize);
2230 locs = l_dnaGetIArray(lpd->objloc);
2231 memcpy(data, lpd->id, sizes[0]);
2232 memcpy(data + locs[1], lpd->obj1, sizes[1]);
2233 memcpy(data + locs[2], lpd->obj2, sizes[2]);
2234 memcpy(data + locs[3], lpd->obj3, sizes[3]);
2235 memcpy(data + locs[4], lpd->obj4, sizes[4]);
2236 memcpy(data + locs[5], lpd->obj5, sizes[5]);
2237
2238 /* Each image has 3 parts: variable preamble, the compressed
2239 * data stream, and the fixed poststream. */
2240 nimages = lpd->n;
2241 for (i = 0; i < nimages; i++) {
2242 if ((cid = pdfdataGetCid(lpd, i)) == NULL) { /* should not happen */
2243 LEPT_FREE(sizes);
2244 LEPT_FREE(locs);
2245 return ERROR_INT("cid not found", __func__, 1);
2246 }
2247 str = sarrayGetString(lpd->saprex, i, L_NOCOPY);
2248 len = strlen(str);
2249 memcpy(data + locs[6 + i], str, len);
2250 memcpy(data + locs[6 + i] + len,
2251 cid->datacomp, cid->nbytescomp);
2252 memcpy(data + locs[6 + i] + len + cid->nbytescomp,
2253 lpd->poststream, strlen(lpd->poststream));
2254 }
2255
2256 /* Each colormap is simply a stored string */
2257 for (i = 0; i < lpd->ncmap; i++) {
2258 str = sarrayGetString(lpd->sacmap, i, L_NOCOPY);
2259 memcpy(data + locs[6 + nimages + i], str, strlen(str));
2260 }
2261
2262 /* And finally the trailer */
2263 memcpy(data + lpd->xrefloc, lpd->trailer, strlen(lpd->trailer));
2264 LEPT_FREE(sizes);
2265 LEPT_FREE(locs);
2266 return 0;
2267}
2268
2269
2270/*---------------------------------------------------------------------*
2271 * Helper functions for generating multipage pdf output *
2272 *---------------------------------------------------------------------*/
2280static l_int32
2282 L_DNA **pda)
2283{
2284char *str;
2285l_uint8 nl = '\n';
2286l_uint8 *data;
2287l_int32 i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok;
2288size_t size;
2289L_DNA *da, *daobj, *daxref;
2290SARRAY *sa;
2291
2292 if (!pda)
2293 return ERROR_INT("&da not defined", __func__, 1);
2294 *pda = NULL;
2295 if (!bas)
2296 return ERROR_INT("bas not defined", __func__, 1);
2297 data = l_byteaGetData(bas, &size);
2298 if (memcmp(data, "%PDF-1.", 7) != 0)
2299 return ERROR_INT("PDF header signature not found", __func__, 1);
2300
2301 /* Search for "startxref" starting 50 bytes from the EOF */
2302 start = 0;
2303 if (size > 50)
2304 start = size - 50;
2305 arrayFindSequence(data + start, size - start,
2306 (l_uint8 *)"startxref\n", 10, &loc, &found);
2307 if (!found)
2308 return ERROR_INT("startxref not found!", __func__, 1);
2309 if (sscanf((char *)(data + start + loc + 10), "%d\n", &xrefloc) != 1)
2310 return ERROR_INT("xrefloc not found!", __func__, 1);
2311 if (xrefloc < 0 || xrefloc >= size)
2312 return ERROR_INT("invalid xrefloc!", __func__, 1);
2313 sa = sarrayCreateLinesFromString((char *)(data + xrefloc), 0);
2314 str = sarrayGetString(sa, 1, L_NOCOPY);
2315 if ((sscanf(str, "0 %d", &nobj)) != 1) {
2316 sarrayDestroy(&sa);
2317 return ERROR_INT("nobj not found", __func__, 1);
2318 }
2319
2320 /* Get starting locations. The numa index is the
2321 * object number. loc[0] is the ID; loc[nobj + 1] is xrefloc. */
2322 da = l_dnaCreate(nobj + 1);
2323 *pda = da;
2324 for (i = 0; i < nobj; i++) {
2325 str = sarrayGetString(sa, i + 2, L_NOCOPY);
2326 sscanf(str, "%d", &startloc);
2327 l_dnaAddNumber(da, startloc);
2328 }
2329 l_dnaAddNumber(da, xrefloc);
2330
2331#if DEBUG_MULTIPAGE
2332 lept_stderr("************** Trailer string ************\n");
2333 lept_stderr("xrefloc = %d", xrefloc);
2334 sarrayWriteStderr(sa);
2335
2336 lept_stderr("************** Object locations ************");
2337 l_dnaWriteStderr(da);
2338#endif /* DEBUG_MULTIPAGE */
2339 sarrayDestroy(&sa);
2340
2341 /* Verify correct parsing */
2342 trailer_ok = TRUE;
2343 for (i = 1; i < nobj; i++) {
2344 l_dnaGetIValue(da, i, &startloc);
2345 if ((sscanf((char *)(data + startloc), "%d 0 obj", &objno)) != 1) {
2346 L_ERROR("bad trailer for object %d\n", __func__, i);
2347 trailer_ok = FALSE;
2348 break;
2349 }
2350 }
2351
2352 /* If the trailer is broken, reconstruct the correct obj locations */
2353 if (!trailer_ok) {
2354 L_INFO("rebuilding pdf trailer\n", __func__);
2355 l_dnaEmpty(da);
2356 l_dnaAddNumber(da, 0);
2357 l_byteaFindEachSequence(bas, (l_uint8 *)" 0 obj\n", 7, &daobj);
2358 nobj = l_dnaGetCount(daobj);
2359 for (i = 0; i < nobj; i++) {
2360 l_dnaGetIValue(daobj, i, &loc);
2361 for (j = loc - 1; j > 0; j--) {
2362 if (data[j] == nl)
2363 break;
2364 }
2365 l_dnaAddNumber(da, j + 1);
2366 }
2367 l_byteaFindEachSequence(bas, (l_uint8 *)"xref", 4, &daxref);
2368 l_dnaGetIValue(daxref, 0, &loc);
2369 l_dnaAddNumber(da, loc);
2370 l_dnaDestroy(&daobj);
2371 l_dnaDestroy(&daxref);
2372 }
2373
2374 return 0;
2375}
2376
2377
2378static char *
2379generatePagesObjStringPdf(NUMA *napage)
2380{
2381char *str;
2382char *buf;
2383l_int32 i, n, index, bufsize;
2384SARRAY *sa;
2385
2386 if (!napage)
2387 return (char *)ERROR_PTR("napage not defined", __func__, NULL);
2388
2389 n = numaGetCount(napage);
2390 bufsize = 100 + 16 * n; /* large enough to hold the output string */
2391 buf = (char *)LEPT_CALLOC(bufsize, sizeof(char));
2392 sa = sarrayCreate(n);
2393 for (i = 0; i < n; i++) {
2394 numaGetIValue(napage, i, &index);
2395 snprintf(buf, bufsize, " %d 0 R ", index);
2396 sarrayAddString(sa, buf, L_COPY);
2397 }
2398
2399 str = sarrayToString(sa, 0);
2400 snprintf(buf, bufsize - 1, "3 0 obj\n"
2401 "<<\n"
2402 "/Type /Pages\n"
2403 "/Kids [%s]\n"
2404 "/Count %d\n"
2405 ">>\n"
2406 "endobj\n",
2407 str, n);
2408 sarrayDestroy(&sa);
2409 LEPT_FREE(str);
2410 return buf;
2411}
2412
2413
2431static L_BYTEA *
2433 NUMA *na_objs)
2434{
2435l_uint8 space = ' ';
2436l_uint8 *datas;
2437l_uint8 buf[32]; /* only needs to hold one integer in ascii format */
2438l_int32 start, nrepl, i, j, nobjs, objin, objout, found;
2439l_int32 *objs, *matches;
2440size_t size;
2441L_BYTEA *bad;
2442L_DNA *da_match;
2443
2444 if (!bas)
2445 return (L_BYTEA *)ERROR_PTR("bas not defined", __func__, NULL);
2446 if (!na_objs)
2447 return (L_BYTEA *)ERROR_PTR("na_objs not defined", __func__, NULL);
2448
2449 datas = l_byteaGetData(bas, &size);
2450 bad = l_byteaCreate(100);
2451 objs = numaGetIArray(na_objs); /* object number mapper */
2452 nobjs = numaGetCount(na_objs); /* use for sanity checking */
2453
2454 /* Substitute the object number on the first line */
2455 sscanf((char *)datas, "%d", &objin);
2456 if (objin < 0 || objin >= nobjs) {
2457 L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs);
2458 LEPT_FREE(objs);
2459 return bad;
2460 }
2461 objout = objs[objin];
2462 snprintf((char *)buf, 32, "%d", objout);
2463 l_byteaAppendString(bad, (char *)buf);
2464
2465 /* Find the set of matching locations for object references */
2466 arrayFindSequence(datas, size, &space, 1, &start, &found);
2467 da_match = arrayFindEachSequence(datas, size, (l_uint8 *)" 0 R", 4);
2468 if (!da_match) {
2469 l_byteaAppendData(bad, datas + start, size - start);
2470 LEPT_FREE(objs);
2471 return bad;
2472 }
2473
2474 /* Substitute all the object reference numbers */
2475 nrepl = l_dnaGetCount(da_match);
2476 matches = l_dnaGetIArray(da_match);
2477 for (i = 0; i < nrepl; i++) {
2478 /* Find the first space before the object number */
2479 for (j = matches[i] - 1; j > 0; j--) {
2480 if (datas[j] == space)
2481 break;
2482 }
2483 /* Copy bytes from 'start' up to the object number */
2484 l_byteaAppendData(bad, datas + start, j - start + 1);
2485 sscanf((char *)(datas + j + 1), "%d", &objin);
2486 if (objin < 0 || objin >= nobjs) {
2487 L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs);
2488 LEPT_FREE(objs);
2489 LEPT_FREE(matches);
2490 l_dnaDestroy(&da_match);
2491 return bad;
2492 }
2493 objout = objs[objin];
2494 snprintf((char *)buf, 32, "%d", objout);
2495 l_byteaAppendString(bad, (char *)buf);
2496 start = matches[i];
2497 }
2498 l_byteaAppendData(bad, datas + start, size - start);
2499
2500 LEPT_FREE(objs);
2501 LEPT_FREE(matches);
2502 l_dnaDestroy(&da_match);
2503 return bad;
2504}
2505
2506
2507/*---------------------------------------------------------------------*
2508 * Create/destroy/access pdf data *
2509 *---------------------------------------------------------------------*/
2510static L_PDF_DATA *
2511pdfdataCreate(const char *title)
2512{
2513L_PDF_DATA *lpd;
2514
2515 lpd = (L_PDF_DATA *)LEPT_CALLOC(1, sizeof(L_PDF_DATA));
2516 if (title) lpd->title = stringNew(title);
2517 lpd->cida = ptraCreate(10);
2518 lpd->xy = ptaCreate(10);
2519 lpd->wh = ptaCreate(10);
2520 lpd->saprex = sarrayCreate(10);
2521 lpd->sacmap = sarrayCreate(10);
2522 lpd->objsize = l_dnaCreate(20);
2523 lpd->objloc = l_dnaCreate(20);
2524 return lpd;
2525}
2526
2527static void
2528pdfdataDestroy(L_PDF_DATA **plpd)
2529{
2530l_int32 i;
2531L_COMP_DATA *cid;
2532L_PDF_DATA *lpd;
2533
2534 if (plpd== NULL) {
2535 L_WARNING("ptr address is null!\n", __func__);
2536 return;
2537 }
2538 if ((lpd = *plpd) == NULL)
2539 return;
2540
2541 if (lpd->title) LEPT_FREE(lpd->title);
2542 for (i = 0; i < lpd->n; i++) {
2543 cid = (L_COMP_DATA *)ptraRemove(lpd->cida, i, L_NO_COMPACTION);
2544 l_CIDataDestroy(&cid);
2545 }
2546
2547 ptraDestroy(&lpd->cida, 0, 0);
2548 if (lpd->id) LEPT_FREE(lpd->id);
2549 if (lpd->obj1) LEPT_FREE(lpd->obj1);
2550 if (lpd->obj2) LEPT_FREE(lpd->obj2);
2551 if (lpd->obj3) LEPT_FREE(lpd->obj3);
2552 if (lpd->obj4) LEPT_FREE(lpd->obj4);
2553 if (lpd->obj5) LEPT_FREE(lpd->obj5);
2554 if (lpd->poststream) LEPT_FREE(lpd->poststream);
2555 if (lpd->trailer) LEPT_FREE(lpd->trailer);
2556 if (lpd->xy) ptaDestroy(&lpd->xy);
2557 if (lpd->wh) ptaDestroy(&lpd->wh);
2558 if (lpd->mediabox) boxDestroy(&lpd->mediabox);
2559 if (lpd->saprex) sarrayDestroy(&lpd->saprex);
2560 if (lpd->sacmap) sarrayDestroy(&lpd->sacmap);
2561 if (lpd->objsize) l_dnaDestroy(&lpd->objsize);
2562 if (lpd->objloc) l_dnaDestroy(&lpd->objloc);
2563 LEPT_FREE(lpd);
2564 *plpd = NULL;
2565}
2566
2567
2568static L_COMP_DATA *
2569pdfdataGetCid(L_PDF_DATA *lpd,
2570 l_int32 index)
2571{
2572 if (!lpd)
2573 return (L_COMP_DATA *)ERROR_PTR("lpd not defined", __func__, NULL);
2574 if (index < 0 || index >= lpd->n)
2575 return (L_COMP_DATA *)ERROR_PTR("invalid image index", __func__, NULL);
2576
2577 return (L_COMP_DATA *)ptraGetPtrToItem(lpd->cida, index);
2578}
2579
2580
2581/*---------------------------------------------------------------------*
2582 * Set flags for special modes *
2583 *---------------------------------------------------------------------*/
2598void
2600{
2601 var_WRITE_G4_IMAGE_MASK = flag;
2602}
2603
2604
2618void
2620{
2621 var_WRITE_DATE_AND_VERSION = flag;
2622}
2623
2624/* --------------------------------------------*/
2625#endif /* USE_PDFIO */
2626/* --------------------------------------------*/
struct Numaa NUMAA
Definition array.h:69
struct L_Dnaa L_DNAA
Definition array.h:75
struct L_Dna L_DNA
Definition array.h:72
struct Numa NUMA
Definition array.h:66
struct L_Bytea L_BYTEA
Definition array.h:84
struct Sarray SARRAY
Definition array.h:81
@ L_FIRST_IMAGE
Definition imageio.h:208
@ L_LAST_IMAGE
Definition imageio.h:210
@ L_FLATE_ENCODE
Definition imageio.h:161
@ L_G4_ENCODE
Definition imageio.h:160
@ L_JP2K_ENCODE
Definition imageio.h:162
@ L_JPEG_ENCODE
Definition imageio.h:159
void l_CIDataDestroy(L_COMP_DATA **pcid)
l_CIDataDestroy()
Definition pdfio2.c:1638
L_COMP_DATA * l_generateJpegDataMem(l_uint8 *data, size_t nbytes, l_int32 ascii85flag)
l_generateJpegDataMem()
Definition pdfio2.c:987
l_ok pixGenerateCIData(PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
pixGenerateCIData()
Definition pdfio2.c:1188
static L_COMP_DATA * pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag)
pixGenerateFlateData()
Definition pdfio2.c:1325
L_COMP_DATA * l_generateFlateData(const char *fname, l_int32 ascii85flag)
l_generateFlateData()
Definition pdfio2.c:1290
l_ok pixConvertToPdfData(PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdfData()
Definition pdfio2.c:190
L_COMP_DATA * l_generateJpegData(const char *fname, l_int32 ascii85flag)
l_generateJpegData()
Definition pdfio2.c:913
static char * generateEscapeString(const char *str)
generateEscapeString()
Definition pdfio2.c:1791
L_COMP_DATA * l_generateG4Data(const char *fname, l_int32 ascii85flag)
l_generateG4Data()
Definition pdfio2.c:1099
static L_COMP_DATA * pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag, l_int32 quality)
pixGenerateJpegData()
Definition pdfio2.c:1453
void l_pdfSetDateAndVersion(l_int32 flag)
l_pdfSetDateAndVersion()
Definition pdfio2.c:2619
static L_BYTEA * substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs)
substituteObjectNumbers()
Definition pdfio2.c:2432
l_ok convertTiffMultipageToPdf(const char *filein, const char *fileout)
convertTiffMultipageToPdf()
Definition pdfio2.c:480
l_ok l_generateCIData(const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
l_generateCIData()
Definition pdfio2.c:614
l_ok ptraConcatenatePdfToData(L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
ptraConcatenatePdfToData()
Definition pdfio2.c:318
static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda)
parseTrailerPdf()
Definition pdfio2.c:2281
static L_COMP_DATA * pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag)
pixGenerateG4Data()
Definition pdfio2.c:1547
void l_pdfSetG4ImageMask(l_int32 flag)
l_pdfSetG4ImageMask()
Definition pdfio2.c:2599
l_ok cidConvertToPdfData(L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
cidConvertToPdfData()
Definition pdfio2.c:1591
static L_COMP_DATA * l_generateJp2kData(const char *fname)
l_generateJp2kData()
Definition pdfio2.c:1047
L_COMP_DATA * l_generateFlateDataPdf(const char *fname, PIX *pixs)
l_generateFlateDataPdf()
Definition pdfio2.c:716
static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
l_generatePdf()
Definition pdfio2.c:1683
l_ok l_generateCIDataForPdf(const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
l_generateCIDataForPdf()
Definition pdfio2.c:532
static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
generateOutputDataPdf()
Definition pdfio2.c:2207
static L_COMP_DATA * pixGenerateJp2kData(PIX *pixs, l_int32 quality)
pixGenerateJp2kData()
Definition pdfio2.c:1501
@ L_COPY
Definition pix.h:505
@ L_CLONE
Definition pix.h:506
@ L_NOCOPY
Definition pix.h:503
@ L_INSERT
Definition pix.h:504
struct Pix PIX
Definition pix.h:228
struct PixColormap PIXCMAP
Definition pix.h:231
struct Pixa PIXA
Definition pix.h:243
@ L_NO_COMPACTION
Definition ptra.h:79
l_int32 ncolors
Definition imageio.h:190
l_int32 predictor
Definition imageio.h:196
char * cmapdatahex
Definition imageio.h:189
l_uint8 * datacomp
Definition imageio.h:184
size_t nbytescomp
Definition imageio.h:185
l_int32 minisblack
Definition imageio.h:195
char * cmapdata85
Definition imageio.h:188
l_int32 xrefloc
Definition imageio.h:246
char * poststream
Definition imageio.h:237
struct Sarray * saprex
Definition imageio.h:242
struct L_Ptra * cida
Definition imageio.h:230
struct Pta * xy
Definition imageio.h:239
l_int32 ncmap
Definition imageio.h:229
char * obj2
Definition imageio.h:233
char * trailer
Definition imageio.h:238
char * obj1
Definition imageio.h:232
struct Sarray * sacmap
Definition imageio.h:243
l_int32 n
Definition imageio.h:228
struct L_Dna * objsize
Definition imageio.h:244
struct L_Dna * objloc
Definition imageio.h:245
char * title
Definition imageio.h:227
char * id
Definition imageio.h:231
struct Pta * wh
Definition imageio.h:240
char * obj5
Definition imageio.h:236
char * obj4
Definition imageio.h:235
struct Box * mediabox
Definition imageio.h:241
char * obj3
Definition imageio.h:234