|
Leptonica
1.54
|
#include "alltypes.h"Макросы | |
| #define | LIBLEPT_MAJOR_VERSION 1 |
| #define | LIBLEPT_MINOR_VERSION 73 |
| #define LIBLEPT_MAJOR_VERSION 1 |
| #define LIBLEPT_MINOR_VERSION 73 |
| LEPT_DLL l_int32 addColorizedGrayToCmap | ( | PIXCMAP * | cmap, |
| l_int32 | type, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| NUMA ** | pna | ||
| ) |
Input: cmap (from 2 or 4 bpp pix)
type (L_PAINT_LIGHT, L_PAINT_DARK)
rval, gval, bval (target color)
&na (<optional return> table for mapping new cmap entries)
Return: 0 if OK; 1 on error; 2 if new colors will not fit in cmap.
Notes: (1) If type == L_PAINT_LIGHT, it colorizes non-black pixels, preserving antialiasing. If type == L_PAINT_DARK, it colorizes non-white pixels, preserving antialiasing. (2) This increases the colormap size by the number of different gray (non-black or non-white) colors in the input colormap. If there is not enough room in the colormap for this expansion, it returns 1 (treated as a warning); the caller should check the return value. (3) This can be used to determine if the new colors will fit in the cmap, using null for &na. Returns 0 if they fit; 2 if they don't fit. (4) The mapping table contains, for each gray color found, the index of the corresponding colorized pixel. Non-gray pixels are assigned the invalid index 256. (5) See pixColorGrayCmap() for usage.
| LEPT_DLL l_int32 adjacentOnPixelInRaster | ( | PIX * | pixs, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 * | pxa, | ||
| l_int32 * | pya | ||
| ) |
Input: pixs (1 bpp)
x, y (current pixel)
xa, ya (adjacent ON pixel, found by simple CCW search)
Return: 1 if a pixel is found; 0 otherwise or on error
Notes: (1) Search is in 4-connected directions first; then on diagonals. This allows traversal along a 4-connected boundary.
Input: vc (vector of 6 coefficients)
*vci (<return> inverted transform)
Return: 0 if OK; 1 on error
Notes: (1) The 6 affine transform coefficients are the first two rows of a 3x3 matrix where the last row has only a 1 in the third column. We invert this using gaussjordan(), and select the first 2 rows as the coefficients of the inverse affine transform. (2) Alternatively, we can find the inverse transform coefficients by inverting the 2x2 submatrix, and treating the top 2 coefficients in the 3rd column as a RHS vector for that 2x2 submatrix. Then the 6 inverted transform coefficients are composed of the inverted 2x2 submatrix and the negative of the transformed RHS vector. Why is this so? We have Y = AX + R (2 equations in 6 unknowns) Then X = A'Y - A'R Gauss-jordan solves AF = R and puts the solution for F, which is A'R, into the input R vector.
| LEPT_DLL l_int32 affineXformPt | ( | l_float32 * | vc, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 * | pxp, | ||
| l_float32 * | pyp | ||
| ) |
Input: vc (vector of 6 coefficients)
(x, y) (initial point)
(&xp, &yp) (<return> transformed point)
Return: 0 if OK; 1 on error
Notes: (1) This computes the floating point location of the transformed point. (2) It does not check ptrs for returned data!
| LEPT_DLL l_int32 affineXformSampledPt | ( | l_float32 * | vc, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 * | pxp, | ||
| l_int32 * | pyp | ||
| ) |
Input: vc (vector of 6 coefficients)
(x, y) (initial point)
(&xp, &yp) (<return> transformed point)
Return: 0 if OK; 1 on error
Notes: (1) This finds the nearest pixel coordinates of the transformed point. (2) It does not check ptrs for returned data!
| LEPT_DLL char* appendSubdirs | ( | const char * | basedir, |
| const char * | subdirs | ||
| ) |
Input: basedir
subdirs
Return: concatenated full directory path without trailing slash,
or null on error
Notes: (1) Use unix pathname separators (2) Allocates a new string: <basedir>/<subdirs>
| LEPT_DLL l_int32 applyCubicFit | ( | l_float32 | a, |
| l_float32 | b, | ||
| l_float32 | c, | ||
| l_float32 | d, | ||
| l_float32 | x, | ||
| l_float32 * | py | ||
| ) |
Input: a, b, c, d (cubic fit coefficients)
x
&y (<return> y = a * x^3 + b * x^2 + c * x + d)
Return: 0 if OK, 1 on error
Input: a, b (linear fit coefficients)
x
&y (<return> y = a * x + b)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 applyQuadraticFit | ( | l_float32 | a, |
| l_float32 | b, | ||
| l_float32 | c, | ||
| l_float32 | x, | ||
| l_float32 * | py | ||
| ) |
Input: a, b, c (quadratic fit coefficients)
x
&y (<return> y = a * x^2 + b * x + c)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 applyQuarticFit | ( | l_float32 | a, |
| l_float32 | b, | ||
| l_float32 | c, | ||
| l_float32 | d, | ||
| l_float32 | e, | ||
| l_float32 | x, | ||
| l_float32 * | py | ||
| ) |
Input: a, b, c, d, e (quartic fit coefficients)
x
&y (<return> y = a * x^4 + b * x^3 + c * x^2 + d * x + e)
Return: 0 if OK, 1 on error
| LEPT_DLL L_DNA* arrayFindEachSequence | ( | const l_uint8 * | data, |
| size_t | datalen, | ||
| const l_uint8 * | sequence, | ||
| size_t | seqlen | ||
| ) |
Input: data (byte array)
datalen (length of data, in bytes)
sequence (subarray of bytes to find in data)
seqlen (length of sequence, in bytes)
Return: dna of offsets where the sequence is found, or null if
none are found or on error
Notes: (1) The byte arrays @data and @sequence are not C strings, as they can contain null bytes. Therefore, for each we must give the length of the array. (2) This finds every occurrence in @data of @sequence.
| LEPT_DLL l_int32 arrayFindSequence | ( | const l_uint8 * | data, |
| size_t | datalen, | ||
| const l_uint8 * | sequence, | ||
| size_t | seqlen, | ||
| l_int32 * | poffset, | ||
| l_int32 * | pfound | ||
| ) |
Input: data (byte array)
datalen (length of data, in bytes)
sequence (subarray of bytes to find in data)
seqlen (length of sequence, in bytes)
&offset (return> offset from beginning of
data where the sequence begins)
&found (<return> 1 if sequence is found; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) The byte arrays 'data' and 'sequence' are not C strings, as they can contain null bytes. Therefore, for each we must give the length of the array. (2) This searches for the first occurrence in @data of @sequence, which consists of @seqlen bytes. The parameter @seqlen must not exceed the actual length of the @sequence byte array. (3) If the sequence is not found, the offset will be 0, so you must check @found.
Input: barstr (string of integers in set {1,2,3,4} of bar widths)
format (L_BF_ANY, L_BF_CODEI2OF5, L_BF_CODE93, ...)
debugflag (use 1 to generate debug output)
Return: data (string of decoded barcode data), or null on error
Input: format Return: 1 if format is one of those supported; 0 otherwise
Input: buffer address in memory (<optional>)
size of byte array to be alloc'd (0 for default)
Return: bbuffer, or null on error
Notes: (1) If a buffer address is given, you should read all the data in. (2) Allocates a bbuffer with associated byte array of the given size. If a buffer address is given, it then reads the number of bytes into the byte array.
Input: &bbuffer (<to be nulled>) Return: void
Notes: (1) Destroys the byte array in the bbuffer and then the bbuffer; then nulls the contents of the input ptr.
Input: &bbuffer (<to be nulled>)
&nbytes (<return> number of bytes saved in array)
Return: barray (newly allocated array of data)
Notes: (1) Copies data to newly allocated array; then destroys the bbuffer.
Input: bbuffer
nbytes (number of bytes to extend array size)
Return: 0 if OK, 1 on error
Notes: (1) reallocNew() copies all bb->nalloc bytes, even though only bb->n are data.
Input: bbuffer
src (source memory buffer from which bytes are read)
nbytes (bytes to be read)
Return: 0 if OK, 1 on error
Notes: (1) For a read after write, first remove the written bytes by shifting the unwritten bytes in the array, then check if there is enough room to add the new bytes. If not, realloc with bbufferExpandArray(), resulting in a second writing of the unwritten bytes. While less efficient, this is simpler than making a special case of reallocNew().
Input: bbuffer
fp (source stream from which bytes are read)
nbytes (bytes to be read)
Return: 0 if OK, 1 on error
Input: bbuffer
dest (dest memory buffer to which bytes are written)
nbytes (bytes requested to be written)
&nout (<return> bytes actually written)
Return: 0 if OK, 1 on error
Input: bbuffer
fp (dest stream to which bytes are written)
nbytes (bytes requested to be written)
&nout (<return> bytes actually written)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 bilinearXformPt | ( | l_float32 * | vc, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 * | pxp, | ||
| l_float32 * | pyp | ||
| ) |
Input: vc (vector of 8 coefficients)
(x, y) (initial point)
(&xp, &yp) (<return> transformed point)
Return: 0 if OK; 1 on error
Notes: (1) This computes the floating point location of the transformed point. (2) It does not check ptrs for returned data!
| LEPT_DLL l_int32 bilinearXformSampledPt | ( | l_float32 * | vc, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 * | pxp, | ||
| l_int32 * | pyp | ||
| ) |
Input: vc (vector of 8 coefficients)
(x, y) (initial point)
(&xp, &yp) (<return> transformed point)
Return: 0 if OK; 1 on error
Notes: (1) This finds the nearest pixel coordinates of the transformed point. (2) It does not check ptrs for returned data!
Input: &bmf (<set to null>) Return: void
Input: bmf
chr (should be one of the 95 supported bitmaps)
&baseline (<return>; distance below UL corner of bitmap char)
Return: 0 if OK, 1 on error
| LEPT_DLL SARRAY* bmfGetLineStrings | ( | L_BMF * | bmf, |
| const char * | textstr, | ||
| l_int32 | maxw, | ||
| l_int32 | firstindent, | ||
| l_int32 * | ph | ||
| ) |
Input: bmf
textstr
maxw (max width of a text line in pixels)
firstindent (indentation of first line, in x-widths)
&h (<return> height required to hold text bitmap)
Return: sarray of text strings for each line, or null on error
Notes: (1) Divides the input text string into an array of text strings, each of which will fit within maxw bits of width.
Input: bmf
chr (should be one of the 95 supported printable bitmaps)
Return: pix (clone of pix in bmf), or null on error
Input: bmf
textstr
&w (<return> width of text string, in pixels for the
font represented by the bmf)
Return: 0 if OK, 1 on error
Input: bmf
chr (should be one of the 95 supported bitmaps)
&w (<return> character width; -1 if not printable)
Return: 0 if OK, 1 on error
Input: bmf
textstr
sa (of individual words)
Return: numa (of word lengths in pixels for the font represented
by the bmf), or null on error
Input: boxaa
index (of boxa with boxaa)
box (to be added)
accessflag (L_INSERT, L_COPY or L_CLONE)
Return: 0 if OK, 1 on error
Notes: (1) Adds to an existing boxa only.
Input: boxaa
boxa (to be added)
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK, 1 on error
Input: baa
box (to be aligned with the bext boxa in the baa, if possible)
delta (amount by which consecutive components can miss
in overlap and still be included in the array)
&index (of boxa with best overlap, or if none match,
this is the index of the next boxa to be generated)
Return: 0 if OK, 1 on error
Notes: (1) This is not greedy. It finds the boxa whose vertical extent has the closest overlap with the input box.
Input: baas (input boxaa to be copied)
copyflag (L_COPY, L_CLONE)
Return: baad (new boxaa, composed of copies or clones of the boxa
in baas), or null on error
Notes: (1) L_COPY makes a copy of each boxa in baas. L_CLONE makes a clone of each boxa in baas.
Input: size of boxa ptr array to be alloc'd (0 for default) Return: baa, or null on error
Input: boxa
box (to be added)
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK, 1 on error
Input: &boxaa (<will be set to null before returning>) Return: void
| LEPT_DLL PIX* boxaaDisplay | ( | BOXAA * | baa, |
| l_int32 | linewba, | ||
| l_int32 | linewb, | ||
| l_uint32 | colorba, | ||
| l_uint32 | colorb, | ||
| l_int32 | w, | ||
| l_int32 | h | ||
| ) |
Input: baa
linewba (line width to display boxa)
linewb (line width to display box)
colorba (color to display boxa)
colorb (color to display box)
w (of pix; use 0 if determined by baa)
h (of pix; use 0 if determined by baa)
Return: 0 if OK, 1 on error
| LEPT_DLL BOXA* boxaAdjustHeightToTarget | ( | BOXA * | boxad, |
| BOXA * | boxas, | ||
| l_int32 | sides, | ||
| l_int32 | target, | ||
| l_int32 | thresh | ||
| ) |
Input: boxad (use null to get a new one)
boxas
sides (L_ADJUST_TOP, L_ADJUST_BOT, L_ADJUST_TOP_AND_BOT)
target (target height if differs by more than thresh)
thresh (min abs difference in height to cause adjustment)
Return: boxad, or null on error
Notes: (1) Conditionally adjusts the height of each box, by moving the indicated edges (top and/or bot) if the height differs by @thresh or more from @target. (2) Use boxad == NULL for a new boxa, and boxad == boxas for in-place. Use one of these: boxad = boxaAdjustHeightToTarget(NULL, boxas, ...); // new boxaAdjustHeightToTarget(boxas, boxas, ...); // in-place
| LEPT_DLL BOXA* boxaAdjustWidthToTarget | ( | BOXA * | boxad, |
| BOXA * | boxas, | ||
| l_int32 | sides, | ||
| l_int32 | target, | ||
| l_int32 | thresh | ||
| ) |
Input: boxad (use null to get a new one; same as boxas for in-place)
boxas
sides (L_ADJUST_LEFT, L_ADJUST_RIGHT, L_ADJUST_LEFTL_AND_RIGHT)
target (target width if differs by more than thresh)
thresh (min abs difference in width to cause adjustment)
Return: boxad, or null on error
Notes: (1) Conditionally adjusts the width of each box, by moving the indicated edges (left and/or right) if the width differs by @thresh or more from @target. (2) Use boxad == NULL for a new boxa, and boxad == boxas for in-place. Use one of these: boxad = boxaAdjustWidthToTarget(NULL, boxas, ...); // new boxaAdjustWidthToTarget(boxas, boxas, ...); // in-place
Input: boxaa Return: 0 if OK, 1 on error
Input: boxaa
size (new size of boxa array)
Return: 0 if OK; 1 on error
Notes: (1) If necessary, reallocs the boxa ptr array to @size.
Input: boxaa
maxindex
boxa (to be replicated into the extended ptr array)
Return: 0 if OK, 1 on error
Notes: (1) This should be used on an existing boxaa that has been fully loaded with boxa. It then extends the boxaa, loading all the additional ptrs with copies of boxa. Typically, boxa will be empty.
Input: boxas
mat (3x3 transform matrix; canonical form)
Return: boxad (transformed boxas), or null on error
Input: baa
num (number extracted from each)
fillerbox (<optional> that fills if necessary)
copyflag (L_COPY or L_CLONE)
Return: boxa, or null on error
Notes: (1) This 'flattens' the baa to a boxa, taking the first @num boxes from each boxa. (2) In each boxa, if there are less than @num boxes, we preserve the alignment between the input baa and the output boxa by inserting one or more fillerbox(es) or, if @fillerbox == NULL, one or more invalid placeholder boxes.
Input: baa
&naindex (<optional return> the boxa index in the baa)
copyflag (L_COPY or L_CLONE)
Return: boxa, or null on error
Notes: (1) This 'flattens' the baa to a boxa, taking the boxes in order in the first boxa, then the second, etc. (2) If a boxa is empty, we generate an invalid, placeholder box of zero size. This is useful when converting from a baa where each boxa has either 0 or 1 boxes, and it is necessary to maintain a 1:1 correspondence between the initial boxa array and the resulting box array. (3) If &naindex is defined, we generate a Numa that gives, for each box in the baa, the index of the boxa to which it belongs.
Input: baa
iboxa (index into the boxa array in the boxaa)
ibox (index into the box array in the boxa)
accessflag (L_COPY or L_CLONE)
Return: box, or null on error
Input: boxaa
index (to the index-th boxa)
accessflag (L_COPY or L_CLONE)
Return: boxa, or null on error
Input: boxaa Return: count (number of boxes), or 0 if no boxes or on error
Input: boxaa Return: count (number of boxa), or 0 if no boxa or on error
| LEPT_DLL l_int32 boxaaGetExtent | ( | BOXAA * | baa, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| BOX ** | pbox, | ||
| BOXA ** | pboxa | ||
| ) |
Input: baa
&w (<optional return> width)
&h (<optional return> height)
&box (<optional return>, minimum box containing all boxa
in boxaa)
&boxa (<optional return>, boxa containing all boxes in each
boxa in the boxaa)
Return: 0 if OK, 1 on error
Notes: (1) The returned w and h are the minimum size image that would contain all boxes untranslated. (2) Each box in the returned boxa is the minimum box required to hold all the boxes in the respective boxa of baa. (3) If there are no valid boxes in a boxa, the box corresponding to its extent has all fields set to 0 (an invalid box).
Input: boxaa (typically empty)
boxa (to be replicated into the entire ptr array)
Return: 0 if OK, 1 on error
Notes: (1) This initializes a boxaa by filling up the entire boxa ptr array with copies of @boxa. Any existing boxa are destroyed. After this operation, the number of boxa is equal to the number of allocated ptrs. (2) Note that we use boxaaReplaceBox() instead of boxaInsertBox(). They both have the same effect when inserting into a NULL ptr in the boxa ptr array (3) Example usage. This function is useful to prepare for a random insertion (or replacement) of boxa into a boxaa. To randomly insert boxa into a boxaa, up to some index "max": Boxaa *baa = boxaaCreate(max); // initialize the boxa Boxa *boxa = boxaCreate(...); ... [optionally fix with boxes] boxaaInitFull(baa, boxa); A typical use is to initialize the array with empty boxa, and to replace only a subset that must be aligned with something else, such as a pixa.
Input: boxaa
index (location in boxaa to insert new boxa)
boxa (new boxa to be inserted)
Return: 0 if OK, 1 on error
Notes: (1) This shifts boxa[i] --> boxa[i + 1] for all i >= index, and then inserts boxa as boxa[index]. (2) To insert at the beginning of the array, set index = 0. (3) To append to the array, it's easier to use boxaaAddBoxa(). (4) This should not be used repeatedly to insert into large arrays, because the function is O(n).
Input: baad (dest boxaa; add to this one)
baas (source boxaa; add from this one)
istart (starting index in baas)
iend (ending index in baas; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) This appends a clone of each indicated boxa in baas to baad (2) istart < 0 is taken to mean 'read from the start' (istart = 0) (3) iend < 0 means 'read to the end' (4) if baas == NULL, this is a no-op.
Input: w, h (of pix that is being quadtree-ized)
nlevels (in quadtree)
Return: baa (for quadtree regions at each level), or null on error
Notes: (1) The returned boxaa has @nlevels of boxa, each containing the set of rectangles at that level. The rectangle at level 0 is the entire region; at level 1 the region is divided into 4 rectangles, and at level n there are n^4 rectangles. (2) At each level, the rectangles in the boxa are in "raster" order, with LR (fast scan) and TB (slow scan).
Input: filename Return: boxaa, or null on error
| LEPT_DLL BOXAA* boxaaReadFromFiles | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | first, | ||
| l_int32 | nfiles | ||
| ) |
Input: dirname (directory)
substr (<optional> substring filter on filenames; can be NULL)
first (0-based)
nfiles (use 0 for everything from @first to the end)
Return: baa, or null on error or if no boxa files are found.
Notes: (1) The files must be serialized boxa files (e.g., *.ba). If some files cannot be read, warnings are issued. (2) Use @substr to filter filenames in the directory. If @substr == NULL, this takes all files. (3) After filtering, use @first and @nfiles to select a contiguous set of files, that have been lexically sorted in increasing order.
Input: stream Return: boxaa, or null on error
Input: boxaa
index (of the boxa to be removed)
Return: 0 if OK, 1 on error
Notes: (1) This removes boxa[index] and then shifts boxa[i] --> boxa[i - 1] for all i > index. (2) The removed boxaa is destroyed. (2) This should not be used repeatedly on large arrays, because the function is O(n).
Input: boxaa
index (to the index-th boxa)
boxa (insert and replace any existing one)
Return: 0 if OK, 1 on error
Notes: (1) Any existing boxa is destroyed, and the input one is inserted in its place. (2) If the index is invalid, return 1 (error)
Input: baas
first (use 0 to select from the beginning)
last (use 0 to select to the end)
copyflag (L_COPY, L_CLONE)
Return: baad, or null on error
Notes: (1) The copyflag specifies what we do with each boxa from baas. Specifically, L_CLONE inserts a clone into baad of each selected boxa from baas.
| LEPT_DLL l_int32 boxaaSizeRange | ( | BOXAA * | baa, |
| l_int32 * | pminw, | ||
| l_int32 * | pminh, | ||
| l_int32 * | pmaxw, | ||
| l_int32 * | pmaxh | ||
| ) |
Input: baa
&minw, &minh, &maxw, &maxh (<optional return> range of
dimensions of all boxes)
Return: 0 if OK, 1 on error
Input: filename
boxaa
Return: 0 if OK, 1 on error
Input: stream
boxaa
Return: 0 if OK, 1 on error
Input: boxa
sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER)
sortorder (L_SORT_INCREASING, L_SORT_DECREASING)
&naindex (<optional return> index of sorted order into
original array)
Return: boxad (sorted version of boxas), or null on error
Notes: (1) For a large number of boxes (say, greater than 1000), this O(n) binsort is much faster than the O(nlogn) shellsort. For 5000 components, this is over 20x faster than boxaSort(). (2) Consequently, boxaSort() calls this function if it will likely go much faster.
Input: boxa Return: 0 if OK, 1 on error
Notes: (1) This destroys all boxes in the boxa, setting the ptrs to null. The number of allocated boxes, n, is set to 0.
Input: boxas
box (for clipping)
Return boxad (boxa with boxes in boxas clipped to box),
or null on error
Notes: (1) All boxes in boxa not intersecting with box are removed, and the remaining boxes are clipped to box.
Input: boxas
Return: boxad (where each set of boxes in boxas that overlap are
combined into a single bounding box in boxad), or
null on error.
Notes: (1) If there are no overlapping boxes, it simply returns a copy of @boxas. (2) The alternative method of painting each rectanle and finding the 4-connected components gives the wrong result, because two non-overlapping rectangles, when rendered, can still be 4-connected, and hence they will be joined. (3) A bad case is to have n boxes, none of which overlap. Then you have one iteration with O(n^2) compares. This is still faster than painting each rectangle and finding the connected components, even for thousands of rectangles.
| LEPT_DLL l_int32 boxaCompareRegions | ( | BOXA * | boxa1, |
| BOXA * | boxa2, | ||
| l_int32 | areathresh, | ||
| l_int32 * | pnsame, | ||
| l_float32 * | pdiffarea, | ||
| l_float32 * | pdiffxor, | ||
| PIX ** | ppixdb | ||
| ) |
Input: boxa1, boxa2
areathresh (minimum area of boxes to be considered)
&pnsame (<return> true if same number of boxes)
&pdiffarea (<return> fractional difference in total area)
&pdiffxor (<optional return> fractional difference
in xor of regions)
&pixdb (<optional return> debug pix showing two boxa)
Return: 0 if OK, 1 on error
Notes: (1) This takes 2 boxa, removes all boxes smaller than a given area, and compares the remaining boxes between the boxa. (2) The area threshold is introduced to help remove noise from small components. Any box with a smaller value of w * h will be removed from consideration. (3) The xor difference is the most stringent test, requiring alignment of the corresponding boxes. It is also more computationally intensive and is optionally returned. Alignment is to the UL corner of each region containing all boxes, as given by boxaGetExtent(). (4) Both fractional differences are with respect to the total area in the two boxa. They range from 0.0 to 1.0. A perfect match has value 0.0. If both boxa are empty, we return 0.0; if one is empty we return 1.0. (5) An example input might be the rectangular regions of a segmentation mask for text or images from two pages.
| LEPT_DLL BOXA* boxaConstrainSize | ( | BOXA * | boxas, |
| l_int32 | width, | ||
| l_int32 | widthflag, | ||
| l_int32 | height, | ||
| l_int32 | heightflag | ||
| ) |
Input: boxas
width (force width of all boxes to this size;
input 0 to use the median width)
widthflag (L_ADJUST_SKIP, L_ADJUST_LEFT, L_ADJUST_RIGHT,
or L_ADJUST_LEFT_AND_RIGHT)
height (force height of all boxes to this size;
input 0 to use the median height)
heightflag (L_ADJUST_SKIP, L_ADJUST_TOP, L_ADJUST_BOT,
or L_ADJUST_TOP_AND_BOT)
Return: boxad (adjusted so all boxes are the same size)
Notes: (1) Forces either width or height (or both) of every box in the boxa to a specified size, by moving the indicated sides. (2) All input boxes should be valid. Median values will be used with invalid boxes. (3) Typical input might be the output of boxaLinearFit(), where each side has been fit. (4) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(), this is not dependent on a difference threshold to change the size.
Input: boxas
box (for containment)
Return: boxad (boxa with all boxes in boxas that are
entirely contained in box), or null on error
Notes: (1) All boxes in boxa that are entirely outside box are removed.
Input: boxa
ncorners (2 or 4 for the representation of each box)
Return: pta (with @ncorners points for each box in the boxa),
or null on error
Notes: (1) If ncorners == 2, we select the UL and LR corners. Otherwise we save all 4 corners in this order: UL, UR, LL, LR.
Input: boxa
copyflag (L_COPY, L_CLONE, L_COPY_CLONE)
Return: new boxa, or null on error
Notes: (1) See pix.h for description of the copyflag. (2) The copy-clone makes a new boxa that holds clones of each box.
Input: n (initial number of ptrs) Return: boxa, or null on error
Input: &boxa (<will be set to null before returning>) Return: void
Note:
| LEPT_DLL PIX* boxaDisplayTiled | ( | BOXA * | boxas, |
| PIXA * | pixa, | ||
| l_int32 | maxwidth, | ||
| l_int32 | linewidth, | ||
| l_float32 | scalefactor, | ||
| l_int32 | background, | ||
| l_int32 | spacing, | ||
| l_int32 | border, | ||
| const char * | fontdir | ||
| ) |
Input: boxa
pixa (<optional> background for each box)
maxwidth (of output image)
linewidth (width of box outlines, before scaling)
scalefactor (applied to every box; use 1.0 for no scaling)
background (0 for white, 1 for black; this is the color
of the spacing between the images)
spacing (between images, and on outside)
border (width of black border added to each image;
use 0 for no border)
fontdir (<optional> can be NULL; use to number the boxes)
Return: pixd (of tiled images of boxes), or null on error
Notes: (1) Displays each box separately in a tiled 32 bpp image. (2) If pixa is defined, it must have the same count as the boxa, and it will be a background over with each box is rendered. If pixa is not defined, the boxes will be rendered over blank images of identical size. (3) See pixaDisplayTiledInRows() for other parameters.
| LEPT_DLL BOX* boxAdjustSides | ( | BOX * | boxd, |
| BOX * | boxs, | ||
| l_int32 | delleft, | ||
| l_int32 | delright, | ||
| l_int32 | deltop, | ||
| l_int32 | delbot | ||
| ) |
Input: boxd (<optional>; this can be null, equal to boxs,
or different from boxs)
boxs (starting box; to have sides adjusted)
delleft, delright, deltop, delbot (changes in location of
each side)
Return: boxd, or null on error or if the computed boxd has
width or height <= 0.
Notes: (1) Set boxd == NULL to get new box; boxd == boxs for in-place; or otherwise to resize existing boxd. (2) For usage, suggest one of these: boxd = boxAdjustSides(NULL, boxs, ...); // new boxAdjustSides(boxs, boxs, ...); // in-place boxAdjustSides(boxd, boxs, ...); // other (1) New box dimensions are cropped at left and top to x >= 0 and y >= 0. (2) For example, to expand in-place by 20 pixels on each side, use boxAdjustSides(box, box, -20, 20, -20, 20);
Input: boxa
num (number put into each boxa in the baa)
copyflag (L_COPY or L_CLONE)
Return: baa, or null on error
Notes: (1) This puts @num boxes from the input @boxa into each of a set of boxa within an output baa. (2) This assumes that the boxes in @boxa are in sets of @num each.
| LEPT_DLL l_int32 boxaEqual | ( | BOXA * | boxa1, |
| BOXA * | boxa2, | ||
| l_int32 | maxdist, | ||
| NUMA ** | pnaindex, | ||
| l_int32 * | psame | ||
| ) |
Input: boxa1
boxa2
maxdist
&naindex (<optional return> index array of correspondences
&same (<return> 1 if equal; 0 otherwise)
Return 0 if OK, 1 on error
Notes: (1) The two boxa are the "same" if they contain the same boxes and each box is within @maxdist of its counterpart in their positions within the boxa. This allows for small rearrangements. Use 0 for maxdist if the boxa must be identical. (2) This applies only to geometry and ordering; refcounts are not considered. (3) @maxdist allows some latitude in the ordering of the boxes. For the boxa to be the "same", corresponding boxes must be within @maxdist of each other. Note that for large @maxdist, we should use a hash function for efficiency. (4) naindex[i] gives the position of the box in boxa2 that corresponds to box i in boxa1. It is only returned if the boxa are equal.
Input: boxa Return: 0 if OK; 1 on error
Notes: (1) Reallocs with doubled size of ptr array.
Input: boxa
size (new size of boxa array)
Return: 0 if OK; 1 on error
Notes: (1) If necessary, reallocs new boxa ptr array to @size.
| LEPT_DLL l_int32 boxaExtractAsNuma | ( | BOXA * | boxa, |
| NUMA ** | pnal, | ||
| NUMA ** | pnat, | ||
| NUMA ** | pnar, | ||
| NUMA ** | pnab, | ||
| NUMA ** | pnaw, | ||
| NUMA ** | pnah, | ||
| l_int32 | keepinvalid | ||
| ) |
Input: boxa
&nal (<optional return> array of left locations)
&nat (<optional return> array of top locations)
&nar (<optional return> array of right locations)
&nab (<optional return> array of bottom locations)
&naw (<optional return> array of widths)
&nah (<optional return> array of heights)
keepinvalid (1 to keep invalid boxes; 0 to remove them)
Return: 0 if OK, 1 on error
Notes: (1) If you are counting or sorting values, such as determining rank order, you must remove invalid boxes. (2) If you are parametrizing the values, or doing an evaluation where the position in the boxa sequence is important, you must replace the invalid boxes with valid ones before doing the extraction. This is easily done with boxaFillSequence().
| LEPT_DLL l_int32 boxaExtractAsPta | ( | BOXA * | boxa, |
| PTA ** | pptal, | ||
| PTA ** | pptat, | ||
| PTA ** | pptar, | ||
| PTA ** | pptab, | ||
| PTA ** | pptaw, | ||
| PTA ** | pptah, | ||
| l_int32 | keepinvalid | ||
| ) |
Input: boxa
&ptal (<optional return> array of left locations vs. index)
&ptat (<optional return> array of top locations vs. index)
&ptar (<optional return> array of right locations vs. index)
&ptab (<optional return> array of bottom locations vs. index)
&ptaw (<optional return> array of widths vs. index)
&ptah (<optional return> array of heights vs. index)
keepinvalid (1 to keep invalid boxes; 0 to remove them)
Return: 0 if OK, 1 on error
Notes: (1) For most applications, such as counting, sorting, fitting to some parametrized form, plotting or filtering in general, you should remove the invalid boxes. Each pta saves the box index in the x array, so replacing invalid boxes by filling with boxaFillSequence(), which is required for boxaExtractAsNuma(), is not necessary. (2) If invalid boxes are retained, each one will result in entries (typically 0) in all selected output pta.
Input: boxa (typ. of word bounding boxes, in textline order)
numa (index of textline for each box in boxa)
Return: naa (numaa, where each numa represents one textline),
or null on error
Notes: (1) The input is expected to come from pixGetWordBoxesInTextlines(). (2) Each numa in the output consists of an average y coordinate of the first box in the textline, followed by pairs of x coordinates representing the left and right edges of each of the boxes in the textline.
Input: boxas (with at least 3 boxes)
useflag (L_USE_ALL_BOXES, L_USE_SAME_PARITY_BOXES)
debug (1 for debug output)
Return: boxad (filled boxa), or null on error
Notes: (1) This simple function replaces invalid boxes with a copy of the nearest valid box, selected from either the entire sequence (L_USE_ALL_BOXES) or from the boxes with the same parity (L_USE_SAME_PARITY_BOXES). It returns a new boxa. (2) This is useful if you expect boxes in the sequence to vary slowly with index.
Input: boxa
&area (<return> total area of all boxes)
Return: 0 if OK, 1 on error
Notes: (1) Measures the total area of the boxes, without regard to overlaps.
Input: boxa
&w (<optional return> average width)
&h (<optional return> average height)
Return: 0 if OK, 1 on error or if the boxa is empty
Input: boxa
index (to the index-th box)
accessflag (L_COPY or L_CLONE)
Return: box, or null on error
| LEPT_DLL l_int32 boxaGetBoxGeometry | ( | BOXA * | boxa, |
| l_int32 | index, | ||
| l_int32 * | px, | ||
| l_int32 * | py, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph | ||
| ) |
Input: boxa
index (to the index-th box)
&x, &y, &w, &h (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Input: boxa Return: count (of all boxes); 0 if no boxes or on error
| LEPT_DLL l_int32 boxaGetCoverage | ( | BOXA * | boxa, |
| l_int32 | wc, | ||
| l_int32 | hc, | ||
| l_int32 | exactflag, | ||
| l_float32 * | pfract | ||
| ) |
Input: boxa
wc, hc (dimensions of overall clipping rectangle with UL
corner at (0, 0) that is covered by the boxes.
exactflag (1 for guaranteeing an exact result; 0 for getting
an exact result only if the boxes do not overlap)
&fract (<return> sum of box area as fraction of w * h)
Return: 0 if OK, 1 on error
Notes: (1) The boxes in boxa are clipped to the input rectangle. (2) * When @exactflag == 1, we generate a 1 bpp pix of size wc x hc, paint all the boxes black, and count the fg pixels. This can take 1 msec on a large page with many boxes.
Input: boxa
&w (<optional return> width)
&h (<optional return> height)
&box (<optional return>, minimum box containing all boxes
in boxa)
Return: 0 if OK, 1 on error
Notes: (1) The returned w and h are the minimum size image that would contain all boxes untranslated. (2) If there are no valid boxes, returned w and h are 0 and all parameters in the returned box are 0. This is not an error, because an empty boxa is valid and boxaGetExtent() is required for serialization.
Input: boxa
Return: box (with median values for x, y, w, h), or null on error
or if the boxa is empty.
Notes: (1) See boxaGetRankSize()
Input: boxa
x, y (point)
Return box (box with centroid closest to the given point [x,y]),
or NULL if no boxes in boxa)
Notes: (1) Uses euclidean distance between centroid and point.
Input: boxa
fract (use 0.0 for smallest, 1.0 for largest)
Return: box (with rank values for x, y, w, h), or null on error
or if the boxa is empty (has no valid boxes)
Notes: (1) This function does not assume that all boxes in the boxa are valid (2) The four box parameters are sorted independently. For rank order, the width and height are sorted in increasing order. But what does it mean to sort x and y in "rank order"? If the boxes are of comparable size and somewhat aligned (e.g., from multiple images), it makes some sense to give a "rank order" for x and y by sorting them in decreasing order. But in general, the interpretation of a rank order on x and y is highly application dependent. In summary:
Input: boxa
index (to the index-th box)
accessflag (L_COPY or L_CLONE)
Return: box, or null if box is not valid or on error
Notes: (1) This returns NULL for an invalid box in a boxa. For a box to be valid, both the width and height must be > 0. (2) We allow invalid boxes, with w = 0 or h = 0, as placeholders in boxa for which the index of the box in the boxa is important. This is an atypical situation; usually you want to put only valid boxes in a boxa.
Input: boxa Return: count (of valid boxes); 0 if no valid boxes or on error
| LEPT_DLL BOXA* boxaGetWhiteblocks | ( | BOXA * | boxas, |
| BOX * | box, | ||
| l_int32 | sortflag, | ||
| l_int32 | maxboxes, | ||
| l_float32 | maxoverlap, | ||
| l_int32 | maxperim, | ||
| l_float32 | fract, | ||
| l_int32 | maxpops | ||
| ) |
Input: boxas (typically, a set of bounding boxes of fg components)
box (initial region; typically including all boxes in boxas;
if null, it computes the region to include all boxes
in boxas)
sortflag (L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
L_SORT_BY_PERIMETER, L_SORT_BY_AREA)
maxboxes (maximum number of output whitespace boxes; e.g., 100)
maxoverlap (maximum fractional overlap of a box by any
of the larger boxes; e.g., 0.2)
maxperim (maximum half-perimeter, in pixels, for which
pivot is selected by proximity to box centroid;
e.g., 200)
fract (fraction of box diagonal that is an acceptable
distance from the box centroid to select the pivot;
e.g., 0.2)
maxpops (maximum number of pops from the heap; use 0 as default)
Return: boxa (of sorted whitespace boxes), or null on error
Notes: (1) This uses the elegant Breuel algorithm, found in "Two Geometric Algorithms for Layout Analysis", 2002, url: "citeseer.ist.psu.edu/breuel02two.html". It starts with the bounding boxes (b.b.) of the connected components (c.c.) in a region, along with the rectangle representing that region. It repeatedly divides the rectangle into four maximal rectangles that exclude a pivot rectangle, sorting them in a priority queue according to one of the six sort flags. It returns a boxa of the "largest" set that have no intersection with boxes from the input boxas. (2) If box == NULL, the initial region is the minimal region that includes the origin and every box in boxas. (3) maxboxes is the maximum number of whitespace boxes that will be returned. The actual number will depend on the image and the values chosen for maxoverlap and maxpops. In many cases, the actual number will be 'maxboxes'. (4) maxoverlap allows pruning of whitespace boxes depending on the overlap. To avoid all pruning, use maxoverlap = 1.0. To select only boxes that have no overlap with each other (maximal pruning), choose maxoverlap = 0.0. Otherwise, no box can have more than the 'maxoverlap' fraction of its area overlapped by any larger (in the sense of the sortflag) box. (5) Choose maxperim (actually, maximum half-perimeter) to represent a c.c. that is small enough so that you don't care about the white space that could be inside of it. For all such c.c., the pivot for 'quadfurcation' of a rectangle is selected as having a reasonable proximity to the rectangle centroid. (6) Use fract in the range [0.0 ... 1.0]. Set fract = 0.0 to choose the small box nearest the centroid as the pivot. If you choose fract > 0.0, it is suggested that you call boxaPermuteRandom() first, to permute the boxes (see usage below). This should reduce the search time for each of the pivot boxes. (7) Choose maxpops to be the maximum number of rectangles that are popped from the heap. This is an indirect way to limit the execution time. Use 0 for default (a fairly large number). At any time, you can expect the heap to contain about 2.5 times as many boxes as have been popped off. (8) The output result is a sorted set of overlapping boxes, constrained by 'maxboxes', 'maxoverlap' and 'maxpops'. (9) The main defect of the method is that it abstracts out the actual components, retaining only the b.b. for analysis. Consider a component with a large b.b. If this is chosen as a pivot, all white space inside is immediately taken out of consideration. Furthermore, even if it is never chosen as a pivot, as the partitioning continues, at no time will any of the whitespace inside this component be part of a rectangle with zero overlapping boxes. Thus, the interiors of all boxes are necessarily excluded from the union of the returned whitespace boxes. (10) USAGE: One way to accommodate to this weakness is to remove such large b.b. before starting the computation. For example, if 'box' is an input image region containing 'boxa' b.b. of c.c.:
// Faster pivot choosing
boxaPermuteRandom(boxa, boxa);
// Remove anything either large width or height
boxat = boxaSelectBySize(boxa, maxwidth, maxheight,
L_SELECT_IF_BOTH, L_SELECT_IF_LT,
NULL);
boxad = boxaGetWhiteblocks(boxat, box, type, maxboxes,
maxoverlap, maxperim, fract,
maxpops);
The result will be rectangular regions of "white space" that extend into (and often through) the excluded components. (11) As a simple example, suppose you wish to find the columns on a page. First exclude large c.c. that may block the columns, and then call:
boxad = boxaGetWhiteblocks(boxa, box, L_SORT_BY_HEIGHT,
20, 0.15, 200, 0.2, 2000);
to get the 20 tallest boxes with no more than 0.15 overlap between a box and any of the taller ones, and avoiding the use of any c.c. with a b.b. half perimeter greater than 200 as a pivot.
| LEPT_DLL BOXA* boxaHandleOverlaps | ( | BOXA * | boxas, |
| l_int32 | op, | ||
| l_int32 | range, | ||
| l_float32 | min_overlap, | ||
| l_float32 | max_ratio, | ||
| NUMA ** | pnamap | ||
| ) |
Input: boxas
op (L_COMBINE, L_REMOVE_SMALL)
range (> 0, forward distance over which overlaps are checked)
min_overlap (minimum fraction of smaller box required for
overlap to count; 0.0 to ignore)
max_ratio (maximum fraction of small/large areas for
overlap to count; 1.0 to ignore)
&namap (<optional return> combining map)
Return: boxad, or null on error.
Notes: (1) For all n(n-1)/2 box pairings, if two boxes overlap, either: (a) op == L_COMBINE: get the bounding region for the two, replace the larger with the bounding region, and remove the smaller of the two, or (b) op == L_REMOVE_SMALL: just remove the smaller. (2) If boxas is 2D sorted, range can be small, but if it is not spatially sorted, range should be large to allow all pairwise comparisons to be made. (3) The @min_overlap parameter allows ignoring small overlaps. If @min_overlap == 1.0, only boxes fully contained in larger boxes can be considered for removal; if @min_overlap == 0.0, this constraint is ignored. (4) The @max_ratio parameter allows ignoring overlaps between boxes that are not too different in size. If @max_ratio == 0.0, no boxes can be removed; if @max_ratio == 1.0, this constraint is ignored.
Input: boxa (typically empty)
box (<optional> to be replicated into the entire ptr array)
Return: 0 if OK, 1 on error
Notes: (1) This initializes a boxa by filling up the entire box ptr array with copies of @box. If @box == NULL, use a placeholder box of zero size. Any existing boxes are destroyed. After this opepration, the number of boxes is equal to the number of allocated ptrs. (2) Note that we use boxaReplaceBox() instead of boxaInsertBox(). They both have the same effect when inserting into a NULL ptr in the boxa ptr array: (3) Example usage. This function is useful to prepare for a random insertion (or replacement) of boxes into a boxa. To randomly insert boxes into a boxa, up to some index "max": Boxa *boxa = boxaCreate(max); boxaInitFull(boxa, NULL); If you want placeholder boxes of non-zero size: Boxa *boxa = boxaCreate(max); Box *box = boxCreate(...); boxaInitFull(boxa, box); boxDestroy(&box); If we have an existing boxa with a smaller ptr array, it can be reused for up to max boxes: boxaExtendArrayToSize(boxa, max); boxaInitFull(boxa, NULL); The initialization allows the boxa to always be properly filled, even if all the boxes are not later replaced. If you want to know which boxes have been replaced, and you initialized with invalid zero-sized boxes, use boxaGetValidBox() to return NULL for the invalid boxes.
Input: boxa
index (location in boxa to insert new value)
box (new box to be inserted)
Return: 0 if OK, 1 on error
Notes: (1) This shifts box[i] --> box[i + 1] for all i >= index, and then inserts box as box[index]. (2) To insert at the beginning of the array, set index = 0. (3) To append to the array, it's easier to use boxaAddBox(). (4) This should not be used repeatedly to insert into large arrays, because the function is O(n).
Input: boxas
box (for intersecting)
Return boxad (boxa with all boxes in boxas that intersect box),
or null on error
Notes: (1) All boxes in boxa that intersect with box (i.e., are completely or partially contained in box) are retained.
Input: boxa
&full (return> 1 if boxa is full)
Return: 0 if OK, 1 on error
Input: boxad (dest boxa; add to this one)
boxas (source boxa; add from this one)
istart (starting index in boxas)
iend (ending index in boxas; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) This appends a clone of each indicated box in boxas to boxad (2) istart < 0 is taken to mean 'read from the start' (istart = 0) (3) iend < 0 means 'read to the end' (4) if boxas == NULL or has no boxes, this is a no-op.
Input: boxas (source boxa)
factor (reject outliers with widths and heights deviating
from the median by more than @factor times
the median deviation from the median; typically ~3)
debug (1 for debug output)
Return: boxad (fitted boxa), or null on error
Notes: (1) This finds a set of boxes (boxad) where each edge of each box is a linear least square fit (LSF) to the edges of the input set of boxes (boxas). Before fitting, outliers in the boxes in boxas are removed (see below). (2) This is useful when each of the box edges in boxas are expected to vary linearly with box index in the set. These could be, for example, noisy measurements of similar regions on successive scanned pages. (3) Method: there are 2 steps: (a) Find and remove outliers, separately based on the deviation from the median of the width and height of the box. Use @factor to specify tolerance to outliers; use a very large value of @factor to avoid rejecting any box sides in the linear LSF. (b) On the remaining boxes, do a linear LSF independently for each of the four sides. (4) Invalid input boxes are not used in computation of the LSF. (5) The returned boxad can then be used in boxaModifyWithBoxa() to selectively change the boxes in boxas.
| LEPT_DLL l_int32 boxaLocationRange | ( | BOXA * | boxa, |
| l_int32 * | pminx, | ||
| l_int32 * | pminy, | ||
| l_int32 * | pmaxx, | ||
| l_int32 * | pmaxy | ||
| ) |
Input: boxa
&minx, &miny, &maxx, &maxy (<optional return> range of
UL corner positions)
Return: 0 if OK, 1 on error
Input: boxa
area (threshold value of width * height)
relation (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
Return: na (indicator array), or null on error
Notes: (1) To keep small components, use relation = L_SELECT_IF_LT or L_SELECT_IF_LTE. To keep large components, use relation = L_SELECT_IF_GT or L_SELECT_IF_GTE.
| LEPT_DLL NUMA* boxaMakeSizeIndicator | ( | BOXA * | boxa, |
| l_int32 | width, | ||
| l_int32 | height, | ||
| l_int32 | type, | ||
| l_int32 | relation | ||
| ) |
Input: boxa
width, height (threshold dimensions)
type (L_SELECT_WIDTH, L_SELECT_HEIGHT,
L_SELECT_IF_EITHER, L_SELECT_IF_BOTH)
relation (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
Return: na (indicator array), or null on error
Notes: (1) The args specify constraints on the size of the components that are kept. (2) If the selection type is L_SELECT_WIDTH, the input height is ignored, and v.v. (3) To keep small components, use relation = L_SELECT_IF_LT or L_SELECT_IF_LTE. To keep large components, use relation = L_SELECT_IF_GT or L_SELECT_IF_GTE.
Input: boxae (boxes to go in even positions in merged boxa)
boxao (boxes to go in odd positions in merged boxa)
fillflag (1 if there are invalid boxes in placeholders)
Return: boxad (merged), or null on error
Notes: (1) This is essentially the inverse of boxaSplitEvenOdd(). Typically, boxae and boxao were generated by boxaSplitEvenOdd(), and the value of @fillflag needs to be the same in both calls. (2) If @fillflag == 1, both boxae and boxao are of the same size; otherwise boxae may have one more box than boxao.
Input: boxas
boxam (boxa with boxes used to modify those in boxas)
subflag (L_USE_MINSIZE, L_USE_MAXSIZE, L_SUB_ON_BIG_DIFF,
L_USE_CAPPED_MIN or L_USE_CAPPED_MAX)
maxdiff (parameter used with L_SUB_ON_BIG_DIFF,
L_USE_CAPPED_MIN and L_USE_CAPPED_MAX)
Return: boxad (result after adjusting boxes in boxas), or null
on error.
Notes: (1) This takes two input boxa (boxas, boxam) and constructs boxad, where each box in boxad is generated from the corresponding boxes in boxas and boxam. The rule for constructing each output box depends on @subflag and @maxdiff. Let boxs be a box from @boxas and boxm be a box from @boxam. If @subflag == L_USE_MINSIZE, the output box is the intersection of the two input boxes. If @subflag == L_USE_MAXSIZE, the output box is the union of the two input boxes; i.e., the minimum bounding rectangle for the two input boxes. For the last two flags, each side of the output box is found separately from the corresponding side of boxs and boxm, according to these rules, where "smaller"("bigger") mean in a direction that decreases(increases) the size of the output box: If @subflag == L_SUB_ON_BIG_DIFF, use boxs if within @maxdiff pixels of boxm; otherwise, use boxm. If @subflag == L_USE_CAPPED_MIN, use the Min of boxm with the Max of (boxs, boxm +- @maxdiff), where the sign is adjusted to make the box smaller (e.g., use "+" on left side). If @subflag == L_USE_CAPPED_MAX, use the Max of boxm with the Min of (boxs, boxm +- @maxdiff), where the sign is adjusted to make the box bigger (e.g., use "-" on left side). Use of the last 2 flags is further explained in (3) and (4). (2) boxas and boxam must be the same size. If boxam == NULL, this returns a copy of boxas with a warning. (3) If @subflag == L_SUB_ON_BIG_DIFF, use boxm for each side where the corresponding sides differ by more than @maxdiff. Two extreme cases: (a) set @maxdiff == 0 to use only values from boxam in boxad. (b) set @maxdiff == 10000 to ignore all values from boxam; then boxad will be the same as boxas. (4) If @subflag == L_USE_CAPPED_MAX: use boxm if boxs is smaller; use boxs if boxs is bigger than boxm by an amount up to @maxdiff; and use boxm +- @maxdiff (the 'capped' value) if boxs is bigger than boxm by an amount larger than @maxdiff. Similarly, with interchange of Min/Max and sign of @maxdiff, for @subflag == L_USE_CAPPED_MIN. (5) If either of corresponding boxes in boxas and boxam is invalid, an invalid box is copied to the result. (6) Typical input for boxam may be the output of boxaLinearFit(). where outliers have been removed and each side is LS fit to a line. (7) Unlike boxaAdjustWidthToTarget() and boxaAdjustHeightToTarget(), this is not dependent on a difference threshold to change the size. Additional constraints on the size of each box can be enforced by following this operation with boxaConstrainSize(), taking boxad as input.
Input: boxas (input boxa) Return: boxad (with boxes permuted), or null on error
Notes: (1) This does a pseudorandom in-place permutation of the boxes. (2) The result is guaranteed not to have any boxes in their original position, but it is not very random. If you need randomness, use boxaPermuteRandom().
Input: boxad (<optional> can be null or equal to boxas)
boxas (input boxa)
Return: boxad (with boxes permuted), or null on error
Notes: (1) If boxad is null, make a copy of boxas and permute the copy. Otherwise, boxad must be equal to boxas, and the operation is done in-place. (2) This does a random in-place permutation of the boxes, by swapping each box in turn with a random box. The result is almost guaranteed not to have any boxes in their original position. (3) MSVC rand() has MAX_RAND = 2^15 - 1, so it will not do a proper permutation is the number of boxes exceeds this.
| LEPT_DLL l_int32 boxaPlotSides | ( | BOXA * | boxa, |
| const char * | plotname, | ||
| NUMA ** | pnal, | ||
| NUMA ** | pnat, | ||
| NUMA ** | pnar, | ||
| NUMA ** | pnab, | ||
| l_int32 | outformat | ||
| ) |
Input: boxas (source boxa)
plotname (<optional>, can be NULL)
&nal (<optional return> na of left sides)
&nat (<optional return> na of top sides)
&nar (<optional return> na of right sides)
&nab (<optional return> na of bottom sides)
outformat (GPLOT_NONE for no output; GPLOT_PNG for png, etc)
ut
Return: 0 if OK, 1 on error
Notes: (1) This is a debugging function to show the progression of the four sides in the boxes. There must be at least 2 boxes. (2) If there are invalid boxes (e.g., if only even or odd indices have valid boxes), this will fill them with the nearest valid box before plotting. (3) The plotfiles are put in /tmp/plotsides, and are named either with @plotname or, if NULL, a default name.
Input: boxas (sorted by size in decreasing order)
maxoverlap (maximum fractional overlap of a box by any
of the larger boxes)
Return: boxad (pruned), or null on error
Notes: (1) This selectively removes smaller boxes when they are overlapped by any larger box by more than the input 'maxoverlap' fraction. (2) To avoid all pruning, use maxoverlap = 1.0. To select only boxes that have no overlap with each other (maximal pruning), set maxoverlap = 0.0. (3) If there are no boxes in boxas, returns an empty boxa.
Input: filename Return: boxa, or null on error
Input: data (ascii)
size (of data; can use strlen to get it)
Return: boxa, or null on error
Input: stream Return: boxa, or null on error
| LEPT_DLL BOXA* boxaReconcileEvenOddHeight | ( | BOXA * | boxas, |
| l_int32 | sides, | ||
| l_int32 | delh, | ||
| l_int32 | op, | ||
| l_float32 | factor | ||
| ) |
Input: boxas (containing at least 3 valid boxes in even and odd)
sides (L_ADJUST_TOP, L_ADJUST_BOT, L_ADJUST_TOP_AND_BOT)
delh (threshold on median height difference)
op (L_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX)
factor (> 0.0, typically near 1.0)
Return: boxad (adjusted), or a copy of boxas on error
Notes: (1) The basic idea is to reconcile differences in box height in the even and odd boxes, by moving the top and/or bottom edges in the even and odd boxes. Choose the edge or edges to be moved, whether to adjust the boxes with the min or the max of the medians, and the threshold on the median difference between even and odd box heights for the operations to take place. The same threshold is also used to determine if each individual box edge is to be adjusted. (2) Boxes are conditionally reset with either the same top (y) value or the same bottom value, or both. The value is determined by the greater or lesser of the medians of the even and odd boxes, with the choice depending on the value of @op, which selects for either min or max median height. If the median difference between even and odd boxes is greater than @dely, then any individual box edge that differs from the selected median by more than @dely is set to the selected median times a factor typically near 1.0. (3) Note that if selecting for minimum height, you will choose the largest y-value for the top and the smallest y-value for the bottom of the box. (4) Typical input might be the output of boxaSmoothSequence(), where even and odd boxa have been independently regulated. (5) Require at least 3 valid even boxes and 3 valid odd boxes. Median values will be used for invalid boxes.
| LEPT_DLL BOXA* boxaReconcilePairWidth | ( | BOXA * | boxas, |
| l_int32 | delw, | ||
| l_int32 | op, | ||
| l_float32 | factor, | ||
| NUMA * | na | ||
| ) |
Input: boxas
delw (threshold on adjacent width difference)
op (L_ADJUST_CHOOSE_MIN, L_ADJUST_CHOOSE_MAX)
factor (> 0.0, typically near 1.0)
na (<optional> indicator array allowing change)
Return: boxad (adjusted), or a copy of boxas on error
Notes: (1) This reconciles differences in the width of adjacent boxes, by moving one side of one of the boxes in each pair. If the widths in the pair differ by more than some threshold, move either the left side for even boxes or the right side for odd boxes, depending on if we're choosing the min or max. If choosing min, the width of the max is set to factor * (width of min). If choosing max, the width of the min is set to factor * (width of max). (2) If @na exists, it is an indicator array corresponding to the boxes in @boxas. If @na != NULL, only boxes with an indicator value of 1 are allowed to adjust; otherwise, all boxes can adjust. (3) Typical input might be the output of boxaSmoothSequence(), where even and odd boxa have been independently regulated.
Input: boxa
index (of box to be removed)
Return: 0 if OK, 1 on error
Notes: (1) This removes box[index] and then shifts box[i] --> box[i - 1] for all i > index. (2) It should not be used repeatedly to remove boxes from large arrays, because the function is O(n).
Input: boxa
index (of box to be removed)
&box (<optional return> removed box)
Return: 0 if OK, 1 on error
Notes: (1) This removes box[index] and then shifts box[i] --> box[i - 1] for all i > index. (2) It should not be used repeatedly to remove boxes from large arrays, because the function is O(n).
Input: boxa
index (to the index-th box)
box (insert to replace existing one)
Return: 0 if OK, 1 on error
Notes: (1) In-place replacement of one box. (2) The previous box at that location, if any, is destroyed.
Input: boxas
(xc, yc) (location of center of rotation)
angle (rotation in radians; clockwise is positive)
Return: boxad (scaled boxas), or null on error
Notes; (1) See createMatrix2dRotate() for details of transform.
Input: boxa
w, h (of image in which the boxa is embedded)
rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
all rotations are clockwise)
Return: boxad, or null on error
Notes: (1) See boxRotateOrth() for details.
Input: boxa
copyflag (L_COPY or L_CLONE)
Return: boxad if OK, null on error
Notes: (1) This makes a copy/clone of each valid box.
Input: boxas
scalex (horizontal scale factor)
scaley (vertical scale factor)
Return: boxad (scaled boxas), or null on error
Notes; (1) See createMatrix2dScale() for details of transform.
| LEPT_DLL BOXA* boxaSelectByArea | ( | BOXA * | boxas, |
| l_int32 | area, | ||
| l_int32 | relation, | ||
| l_int32 * | pchanged | ||
| ) |
Input: boxas
area (threshold value of width * height)
relation (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: boxad (filtered set), or null on error
Notes: (1) Uses box clones in the new boxa. (2) To keep small components, use relation = L_SELECT_IF_LT or L_SELECT_IF_LTE. To keep large components, use relation = L_SELECT_IF_GT or L_SELECT_IF_GTE.
| LEPT_DLL BOXA* boxaSelectBySize | ( | BOXA * | boxas, |
| l_int32 | width, | ||
| l_int32 | height, | ||
| l_int32 | type, | ||
| l_int32 | relation, | ||
| l_int32 * | pchanged | ||
| ) |
Input: boxas
width, height (threshold dimensions)
type (L_SELECT_WIDTH, L_SELECT_HEIGHT,
L_SELECT_IF_EITHER, L_SELECT_IF_BOTH)
relation (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: boxad (filtered set), or null on error
Notes: (1) The args specify constraints on the size of the components that are kept. (2) Uses box clones in the new boxa. (3) If the selection type is L_SELECT_WIDTH, the input height is ignored, and v.v. (4) To keep small components, use relation = L_SELECT_IF_LT or L_SELECT_IF_LTE. To keep large components, use relation = L_SELECT_IF_GT or L_SELECT_IF_GTE.
Input: boxa (1 bpp)
areaslop (fraction near but less than 1.0)
yslop (number of pixels in y direction)
connectivity (4 or 8)
Return: box, or null on error
Notes: (1) See usage notes in pixSelectLargeULComp().
Input: boxas
first (use 0 to select from the beginning)
last (use 0 to select to the end)
copyflag (L_COPY, L_CLONE)
Return: boxad, or null on error
Notes: (1) The copyflag specifies what we do with each box from boxas. Specifically, L_CLONE inserts a clone into boxad of each selected box from boxas.
Input: boxas
na (indicator numa)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: boxad, or null on error
Notes: (1) Returns a boxa clone if no components are removed. (2) Uses box clones in the new boxa. (3) The indicator numa has values 0 (ignore) and 1 (accept).
| LEPT_DLL BOXA* boxaSetSide | ( | BOXA * | boxad, |
| BOXA * | boxas, | ||
| l_int32 | side, | ||
| l_int32 | val, | ||
| l_int32 | thresh | ||
| ) |
Input: boxad (use null to get a new one; same as boxas for in-place)
boxas
side (L_SET_LEFT, L_SET_RIGHT, L_SET_TOP, L_SET_BOT)
val (location to set for given side, for each box)
thresh (min abs difference to cause resetting to @val)
Return: boxad, or null on error
Notes: (1) Sets the given side of each box. Use boxad == NULL for a new boxa, and boxad == boxas for in-place. (2) Use one of these: boxad = boxaSetSide(NULL, boxas, ...); // new boxaSetSide(boxas, boxas, ...); // in-place
| LEPT_DLL l_int32 boxaSimilar | ( | BOXA * | boxa1, |
| BOXA * | boxa2, | ||
| l_int32 | leftdiff, | ||
| l_int32 | rightdiff, | ||
| l_int32 | topdiff, | ||
| l_int32 | botdiff, | ||
| l_int32 | debug, | ||
| l_int32 * | psimilar, | ||
| NUMA ** | pnasim | ||
| ) |
Input: boxa1
boxa2
leftdiff, rightdiff, topdiff, botdiff
debug (output details of non-similar boxes)
&similar (<return> 1 if similar; 0 otherwise)
&nasim (<optional return> na containing 1 if similar; else 0)
Return 0 if OK, 1 on error
Notes: (1) See boxSimilar() for parameter usage. (2) Corresponding boxes are taken in order in the two boxa. (3) @nasim is an indicator array with a (0/1) for each box pair. (4) With @nasim or debug == 1, boxes continue to be tested after failure.
| LEPT_DLL l_int32 boxaSizeRange | ( | BOXA * | boxa, |
| l_int32 * | pminw, | ||
| l_int32 * | pminh, | ||
| l_int32 * | pmaxw, | ||
| l_int32 * | pmaxh | ||
| ) |
Input: boxa
&minw, &minh, &maxw, &maxh (<optional return> range of
dimensions of box in the array)
Return: 0 if OK, 1 on error
| LEPT_DLL BOXA* boxaSmoothSequenceLS | ( | BOXA * | boxas, |
| l_float32 | factor, | ||
| l_int32 | subflag, | ||
| l_int32 | maxdiff, | ||
| l_int32 | debug | ||
| ) |
Input: boxas (source boxa)
factor (reject outliers with widths and heights deviating
from the median by more than @factor times
the median variation from the median; typically ~3)
subflag (L_USE_MINSIZE, L_USE_MAXSIZE, L_SUB_ON_BIG_DIFF,
L_USE_CAPPED_MIN or L_USE_CAPPED_MAX)
maxdiff (parameter used with L_SUB_ON_BIG_DIFF and
L_USE_CAPPED_MAX)
debug (1 for debug output)
Return: boxad (fitted boxa), or null on error
Notes: (1) This returns a modified version of @boxas by constructing for each input box a box that has been linear least square fit (LSF) to the entire set. The linear fitting is done to each of the box sides independently, after outliers are rejected, and it is computed separately for sequences of even and odd boxes. Once the linear LSF box is found, the output box (in @boxad) is constructed from the input box and the LSF box, depending on @subflag. See boxaModifyWithBoxa() for details on the use of @subflag and @maxdiff. (2) This is useful if, in both the even and odd sets, the box edges vary roughly linearly with its index in the set.
| LEPT_DLL BOXA* boxaSmoothSequenceMedian | ( | BOXA * | boxas, |
| l_int32 | halfwin, | ||
| l_int32 | subflag, | ||
| l_int32 | maxdiff, | ||
| l_int32 | debug | ||
| ) |
Input: boxas (source boxa)
halfwin (half-width of sliding window; used to find median)
subflag (L_USE_MINSIZE, L_USE_MAXSIZE, L_SUB_ON_BIG_DIFF,
L_USE_CAPPED_MIN or L_USE_CAPPED_MAX)
maxdiff (parameter used with L_SUB_ON_BIG_DIFF,
L_USE_CAPPED_MIN and L_USE_CAPPED_MAX)
debug (1 for debug output)
Return: boxad (fitted boxa), or null on error
Notes: (1) The target width of the sliding window is 2 * @halfwin + 1. If necessary, this will be reduced by boxaWindowedMedian(). (2) This returns a modified version of @boxas by constructing for each input box a box that has been smoothed with windowed median filtering. The filtering is done to each of the box sides independently, and it is computed separately for sequences of even and odd boxes. The output @boxad is constructed from the input box and the filtered boxa, box, depending on @subflag. See boxaModifyWithBoxa() for details on the use of @subflag and @maxdiff. (3) This is useful for removing noise separately in the even and odd sets, where the box edge locations can have discontinuities but otherwise vary roughly linearly within intervals of size @halfwin or larger. (4) If you don't need to handle even and odd sets separately, just do this: boxam = boxaWindowedMedian(boxas, halfwin, debug); boxad = boxaModifyWithBoxa(boxas, boxam, subflag, maxdiff); boxaDestroy(&boxam);
Input: boxa
sorttype (L_SORT_BY_X, L_SORT_BY_Y,
L_SORT_BY_RIGHT, L_SORT_BY_BOT,
L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
L_SORT_BY_PERIMETER, L_SORT_BY_AREA,
L_SORT_BY_ASPECT_RATIO)
sortorder (L_SORT_INCREASING, L_SORT_DECREASING)
&naindex (<optional return> index of sorted order into
original array)
Return: boxad (sorted version of boxas), or null on error
| LEPT_DLL BOXAA* boxaSort2d | ( | BOXA * | boxas, |
| NUMAA ** | pnaad, | ||
| l_int32 | delta1, | ||
| l_int32 | delta2, | ||
| l_int32 | minh1 | ||
| ) |
Input: boxas
&naa (<optional return> numaa with sorted indices
whose values are the indices of the input array)
delta1 (min overlap that permits aggregation of a box
onto a boxa of horizontally-aligned boxes; pass 1)
delta2 (min overlap that permits aggregation of a box
onto a boxa of horizontally-aligned boxes; pass 2)
minh1 (components less than this height either join an
existing boxa or are set aside for pass 2)
Return: baa (2d sorted version of boxa), or null on error
Notes: (1) The final result is a sort where the 'fast scan' direction is left to right, and the 'slow scan' direction is from top to bottom. Each boxa in the baa represents a sorted set of boxes from left to right. (2) Three passes are used to aggregate the boxas, which can correspond to characters or words in a line of text. In pass 1, only taller components, which correspond to xheight or larger, are permitted to start a new boxa. In pass 2, the remaining vertically-challenged components are allowed to join an existing boxa or start a new one. In pass 3, boxa whose extent is overlapping are joined. After that, the boxes in each boxa are sorted horizontally, and finally the boxa are sorted vertically. (3) If delta1 < 0, the first pass allows aggregation when boxes in the same boxa do not overlap vertically. The distance by which they can miss and still be aggregated is the absolute value |delta1|. Similar for delta2 on the second pass. (4) On the first pass, any component of height less than minh1 cannot start a new boxa; it's put aside for later insertion. (5) On the second pass, any small component that doesn't align with an existing boxa can start a new one. (6) This can be used to identify lines of text from character or word bounding boxes.
Input: boxas
naa (numaa that maps from the new baa to the input boxa)
Return: baa (sorted boxaa), or null on error
Input: boxas
naindex (na that maps from the new boxa to the input boxa)
Return: boxad (sorted), or null on error
Input: boxa
fillflag (1 to put invalid boxes in place; 0 to omit)
&boxae, &boxao (<return> save even and odd boxes in their
separate boxa, setting the other type to invalid boxes.)
Return: 0 if OK, 1 on error
Notes: (1) If @fillflag == 1, boxae has copies of the even boxes in their original location, and nvalid boxes are placed in the odd array locations. And v.v. (2) If @fillflag == 0, boxae has only copies of the even boxes.
Input: boxa
i, j (two indices of boxes, that are to be swapped)
Return: 0 if OK, 1 on error
| LEPT_DLL BOXA* boxaTransform | ( | BOXA * | boxas, |
| l_int32 | shiftx, | ||
| l_int32 | shifty, | ||
| l_float32 | scalex, | ||
| l_float32 | scaley | ||
| ) |
Input: boxa
shiftx, shifty
scalex, scaley
Return: boxad, or null on error
Notes: (1) This is a very simple function that first shifts, then scales.
| LEPT_DLL BOXA* boxaTransformOrdered | ( | BOXA * | boxas, |
| l_int32 | shiftx, | ||
| l_int32 | shifty, | ||
| l_float32 | scalex, | ||
| l_float32 | scaley, | ||
| l_int32 | xcen, | ||
| l_int32 | ycen, | ||
| l_float32 | angle, | ||
| l_int32 | order | ||
| ) |
Input: boxa
shiftx, shifty
scalex, scaley
xcen, ycen (center of rotation)
angle (in radians; clockwise is positive)
order (one of 6 combinations: L_TR_SC_RO, ...)
Return: boxd, or null on error
Notes: (1) This allows a sequence of linear transforms on each box. the transforms are from the affine set, composed of shift, scaling and rotation, and the order of the transforms is specified. (2) Although these operations appear to be on an infinite 2D plane, in practice the region of interest is clipped to a finite image. The center of rotation is usually taken with respect to the image (either the UL corner or the center). A translation can have two very different effects: (a) Moves the boxes across the fixed image region. (b) Moves the image origin, causing a change in the image region and an opposite effective translation of the boxes. This function should only be used for (a), where the image region is fixed on translation. If the image region is changed by the translation, use instead the functions in affinecompose.c, where the image region and rotation center can be computed from the actual clipping due to translation of the image origin. (3) See boxTransformOrdered() for usage and implementation details.
Input: boxas
transx (x component of translation wrt. the origin)
transy (y component of translation wrt. the origin)
Return: boxad (translated boxas), or null on error
Notes; (1) See createMatrix2dTranslate() for details of transform.
Input: boxas (source boxa)
halfwin (half width of window over which the median is found)
debug (1 for debug output)
Return: boxad (smoothed boxa), or null on error
Notes: (1) This finds a set of boxes (boxad) where each edge of each box is a windowed median smoothed value to the edges of the input set of boxes (boxas). (2) Invalid input boxes are filled from nearby ones. (3) The returned boxad can then be used in boxaModifyWithBoxa() to selectively change the boxes in the source boxa.
Input: filename
boxa
Return: 0 if OK, 1 on error
Input: &data (<return> data of serialized boxa; ascii)
&size (<return> size of returned data)
boxa
Return: 0 if OK, 1 on error
Input: stream
boxa
Return: 0 if OK, 1 on error
Input: box1, box2 (two boxes)
Return: box (of bounding region containing the input boxes),
or null on error
Notes: (1) This is the geometric union of the two rectangles.
Input: box
wi, hi (rectangle representing image)
Return: part of box within given rectangle, or NULL on error
or if box is entirely outside the rectangle
Notes: (1) This can be used to clip a rectangle to an image. The clipping rectangle is assumed to have a UL corner at (0, 0), and a LR corner at (wi - 1, hi - 1).
| LEPT_DLL l_int32 boxClipToRectangleParams | ( | BOX * | box, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 * | pxstart, | ||
| l_int32 * | pystart, | ||
| l_int32 * | pxend, | ||
| l_int32 * | pyend, | ||
| l_int32 * | pbw, | ||
| l_int32 * | pbh | ||
| ) |
Input: box (<optional> requested box; can be null)
w, h (clipping box size; typ. the size of an image)
&xstart (<return>)
&ystart (<return>)
&xend (<return> one pixel beyond clipping box)
¥d (<return> one pixel beyond clipping box)
&bw (<optional return> clipped width)
&bh (<optional return> clipped height)
Return: 0 if OK; 1 on error
Notes: (1) The return value should be checked. If it is 1, the returned parameter values are bogus. (2) This simplifies the selection of pixel locations within a given rectangle: for (i = ystart; i < yend; i++ { ... for (j = xstart; j < xend; j++ { ....
Input: box Return: ptr to same box, or null on error
Input: box1, box2
&result (<return> 1 if box2 is entirely contained within
box1, and 0 otherwise)
Return: 0 if OK, 1 on error
Input: box
x, y (a point)
&contains (<return> 1 if box contains point; 0 otherwise)
Return: 0 if OK, 1 on error.
Input: box
ncorners (2 or 4 for the representation of the box)
Return: pta (with @ncorners points), or null on error
Notes: (1) If ncorners == 2, we select the UL and LR corners. Otherwise we save all 4 corners in this order: UL, UR, LL, LR.
Input: x, y, w, h Return: box, or null on error
Notes: (1) This clips the box to the +quad. If no part of the box is in the +quad, this returns NULL. (2) We allow you to make a box with w = 0 and/or h = 0. This does not represent a valid region, but it is useful as a placeholder in a boxa for which the index of the box in the boxa is important. This is an atypical situation; usually you want to put only valid boxes with nonzero width and height in a boxa. If you have a boxa with invalid boxes, the accessor boxaGetValidBox() will return NULL on each invalid box. (3) If you want to create only valid boxes, use boxCreateValid(), which returns NULL if either w or h is 0.
Input: x, y, w, h Return: box, or null on error
Notes: (1) This returns NULL if either w = 0 or h = 0.
Input: &box (<will be set to null before returning>) Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the box. (2) Always nulls the input ptr.
Input: box1
box2
&same (<return> 1 if equal; 0 otherwise)
Return 0 if OK, 1 on error
Input: box
&cx, &cy (<return> location of center of box)
Return 0 if OK, 1 on error
| LEPT_DLL l_int32 boxGetGeometry | ( | BOX * | box, |
| l_int32 * | px, | ||
| l_int32 * | py, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph | ||
| ) |
Input: box
&x, &y, &w, &h (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Input: box
side (L_GET_LEFT, L_GET_RIGHT, L_GET_TOP, L_GET_BOT)
&loc (<return> location)
Return: 0 if OK, 1 on error
Notes: (1) All returned values are within the box. In particular: right = left + width - 1 bottom = top + height - 1
| LEPT_DLL l_int32 boxIntersectByLine | ( | BOX * | box, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | slope, | ||
| l_int32 * | px1, | ||
| l_int32 * | py1, | ||
| l_int32 * | px2, | ||
| l_int32 * | py2, | ||
| l_int32 * | pn | ||
| ) |
Input: box
x, y (point that line goes through)
slope (of line)
(&x1, &y1) (<return> 1st point of intersection with box)
(&x2, &y2) (<return> 2nd point of intersection with box)
&n (<return> number of points of intersection)
Return: 0 if OK, 1 on error
Notes: (1) If the intersection is at only one point (a corner), the coordinates are returned in (x1, y1). (2) Represent a vertical line by one with a large but finite slope.
Input: box1, box2
&result (<return> 1 if any part of box2 is contained
in box1, and 0 otherwise)
Return: 0 if OK, 1 on error
Input: box
&valid (<return> 1 if valid; 0 otherwise)
Return: 0 if OK, 1 on error
Input: box1, box2 (two boxes)
&area (<return> the number of pixels in the overlap)
Return: 0 if OK, 1 on error.
Input: box1, box2 (two boxes)
&fract (<return> the fraction of box2 overlapped by box1)
Return: 0 if OK, 1 on error.
Notes: (1) The result depends on the order of the input boxes, because the overlap is taken as a fraction of box2.
Input: box1, box2 (two boxes)
Return: box (of overlap region between input boxes),
or null if no overlap or on error
Notes: (1) This is the geometric intersection of the two rectangles.
Input: stream
box
Return: 0 if OK, 1 on error
Notes: (1) This outputs debug info. Use serialization functions to write to file if you want to read the data back.
Input: boxd (<optional>; this can be null, equal to boxs,
or different from boxs);
boxs (starting box; to have one side relocated)
loc (new location of the side that is changing)
sideflag (L_FROM_LEFT, etc., indicating the side that moves)
Return: boxd, or null on error or if the computed boxd has
width or height <= 0.
Notes: (1) Set boxd == NULL to get new box; boxd == boxs for in-place; or otherwise to resize existing boxd. (2) For usage, suggest one of these: boxd = boxRelocateOneSide(NULL, boxs, ...); // new boxRelocateOneSide(boxs, boxs, ...); // in-place boxRelocateOneSide(boxd, boxs, ...); // other
Input: box
w, h (of image in which the box is embedded)
rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
all rotations are clockwise)
Return: boxd, or null on error
Notes: (1) Rotate the image with the embedded box by the specified amount. (2) After rotation, the rotated box is always measured with respect to the UL corner of the image.
| LEPT_DLL l_int32 boxSeparationDistance | ( | BOX * | box1, |
| BOX * | box2, | ||
| l_int32 * | ph_sep, | ||
| l_int32 * | pv_sep | ||
| ) |
Input: box1, box2 (two boxes, in any order)
&h_sep (<optional return> horizontal separation)
&v_sep (<optional return> vertical separation)
Return: 0 if OK, 1 on error
Notes: (1) This measures horizontal and vertical separation of the two boxes. If the boxes are touching but have no pixels in common, the separation is 0. If the boxes overlap by a distance d, the returned separation is -d.
Input: box
x, y, w, h (use -1 to leave unchanged)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 boxSimilar | ( | BOX * | box1, |
| BOX * | box2, | ||
| l_int32 | leftdiff, | ||
| l_int32 | rightdiff, | ||
| l_int32 | topdiff, | ||
| l_int32 | botdiff, | ||
| l_int32 * | psimilar | ||
| ) |
Input: box1
box2
leftdiff, rightdiff, topdiff, botdiff
&similar (<return> 1 if similar; 0 otherwise)
Return 0 if OK, 1 on error
Notes: (1) The values of leftdiff (etc) are the maximum allowed deviations between the locations of the left (etc) sides. If any side pairs differ by more than this amount, the boxes are not similar.
| LEPT_DLL BOX* boxTransform | ( | BOX * | box, |
| l_int32 | shiftx, | ||
| l_int32 | shifty, | ||
| l_float32 | scalex, | ||
| l_float32 | scaley | ||
| ) |
Input: box
shiftx, shifty
scalex, scaley
Return: boxd, or null on error
Notes: (1) This is a very simple function that first shifts, then scales. (2) If the box is invalid, a new invalid box is returned.
| LEPT_DLL BOX* boxTransformOrdered | ( | BOX * | boxs, |
| l_int32 | shiftx, | ||
| l_int32 | shifty, | ||
| l_float32 | scalex, | ||
| l_float32 | scaley, | ||
| l_int32 | xcen, | ||
| l_int32 | ycen, | ||
| l_float32 | angle, | ||
| l_int32 | order | ||
| ) |
Input: boxs
shiftx, shifty
scalex, scaley
xcen, ycen (center of rotation)
angle (in radians; clockwise is positive)
order (one of 6 combinations: L_TR_SC_RO, ...)
Return: boxd, or null on error
Notes: (1) This allows a sequence of linear transforms, composed of shift, scaling and rotation, where the order of the transforms is specified. (2) The rotation is taken about a point specified by (xcen, ycen). Let the components of the vector from the center of rotation to the box center be (xdif, ydif): xdif = (bx + 0.5 * bw) - xcen ydif = (by + 0.5 * bh) - ycen Then the box center after rotation has new components: bxcen = xcen + xdif * cosa + ydif * sina bycen = ycen + ydif * cosa - xdif * sina where cosa and sina are the cos and sin of the angle, and the enclosing box for the rotated box has size: rw = |bw * cosa| + |bh * sina| rh = |bh * cosa| + |bw * sina| where bw and bh are the unrotated width and height. Then the box UL corner (rx, ry) is rx = bxcen - 0.5 * rw ry = bycen - 0.5 * rh (3) The center of rotation specified by args @xcen and @ycen is the point BEFORE any translation or scaling. If the rotation is not the first operation, this function finds the actual center at the time of rotation. It does this by making the following assumptions: (1) Any scaling is with respect to the UL corner, so that the center location scales accordingly. (2) A translation does not affect the center of the image; it just moves the boxes. We always use assumption (1). However, assumption (2) will be incorrect if the apparent translation is due to a clipping operation that, in effect, moves the origin of the image. In that case, you should NOT use these simple functions. Instead, use the functions in affinecompose.c, where the rotation center can be computed from the actual clipping due to translation of the image origin.
Input: ccba
ccb (to be added by insertion)
Return: 0 if OK; 1 on error
Input: pixs (binary image; can be null)
n (initial number of ptrs)
Return: ccba, or null on error
Input: &ccba (<to be="" nulled>="">) Return: void
Input: ccba Return: pix of border pixels, or null on error
Notes: (1) Uses global ptaa, which gives each border pixel in global coordinates, and must be computed in advance by calling ccbaGenerateGlobalLocs().
Input: ccborda Return: pix of image, or null on error
Notes: (1) Uses local ptaa, which gives each border pixel in local coordinates, so the actual pixel positions must be computed using all offsets. (2) For the holes, use coordinates relative to the c.c. (3) This is slower than Method 2. (4) This uses topological properties (Method 1) to do scan conversion to raster
This algorithm deserves some commentary.
I first tried the following:
inner borders: 4-fill from outside, stopping again at the border, XOR with the border, and invert to get the hole. This did not work, because if you have a hole border that looks like:
x x x x x x
x x
x x x x x
x x o x x
x x
x x
x x x
if you 4-fill from the outside, the pixel 'o' will not be filled! XORing with the border leaves it OFF. Inverting then gives a single bad ON pixel that is not actually part of the hole.
So what you must do instead is 4-fill the holes from inside. You can do this from a seedfill, using a pix with the hole border as the filling mask. But you need to start with a pixel inside the hole. How is this determined? The best way is from the contour. We have a right-hand shoulder rule for inside (i.e., the filled region). Take the first 2 pixels of the hole border, and compute dx and dy (second coord minus first coord: dx = sx - fx, dy = sy - fy). There are 8 possibilities, depending on the values of dx and dy (which can each be -1, 0, and +1, but not both 0). These 8 cases can be broken into 4; see the simple algorithm below. Once you have an interior seed pixel, you fill from the seed, clipping with the hole border pix by filling into its invert.
You then successively XOR these interior filled components, in any order.
Input: ccborda Return: pix of image, or null on error
Notes: (1) Uses local chain ptaa, which gives each border pixel in local coordinates, so the actual pixel positions must be computed using all offsets. (2) Treats exterior and hole borders on equivalent footing, and does all calculations on a pix that spans the c.c. with a 1 pixel added boundary. (3) This uses topological properties (Method 2) to do scan conversion to raster (4) The algorithm is described at the top of this file (Method 2). It is preferred to Method 1 because it is between 1.2x and 2x faster than Method 1.
Input: ccba Return: pix of border pixels, or null on error
Notes: (1) Uses spglobal pta, which gives each border pixel in global coordinates, one path per c.c., and must be computed in advance by calling ccbaGenerateSPGlobalLocs().
Input: ccba (with local chain ptaa of borders computed) Return: 0 if OK, 1 on error
Action: this uses the pixel locs in the local ptaa, which are all relative to each c.c., to find the global pixel locations, and stores them in the global ptaa.
Input: ccba Return: 0 if OK, 1 on error
Notes: (1) Generates a single border in local pixel coordinates. For each c.c., if there is just an outer border, copy it. If there are also hole borders, for each hole border, determine the smallest horizontal or vertical distance from the border to the outside of the c.c., and find a path through the c.c. for this cut. We do this in a way that guarantees a pixel from the hole border is the starting point of the path, and we must verify that the path intersects the outer border (if it intersects it, then it ends on it). One can imagine pathological cases, but they may not occur in images of text characters and un-textured line graphics. (2) Once it is verified that the path through the c.c. intersects both the hole and outer borders, we generate the full single path for all borders in the c.c. Starting at the start point on the outer border, when we hit a line on a cut, we take the cut, do the hold border, and return on the cut to the outer border. We compose a pta of the outer border pts that are on cut paths, and for every point on the outer border (as we go around), we check against this pta. When we find a matching point in the pta, we do its cut path and hole border. The single path is saved in the ccb.
Input: ccba
ptsflag (CCB_SAVE_ALL_PTS or CCB_SAVE_TURNING_PTS)
Return: 0 if OK, 1 on error
Notes: (1) This calculates the splocal rep if not yet made. (2) It uses the local pixel values in splocal, the single path pta, which are all relative to each c.c., to find the corresponding global pixel locations, and stores them in the spglobal pta. (3) This lists only the turning points: it both makes a valid svg file and is typically about half the size when all border points are listed.
Input: ccba (with local chain ptaa of borders computed) Return: 0 if OK, 1 on error
Notes: (1) This uses the pixel locs in the local ptaa, which are all relative to each c.c., to find the step directions for successive pixels in the chain, and stores them in the step numaa. (2) To get the step direction, use 1 2 3 0 P 4 7 6 5 where P is the previous pixel at (px, py). The step direction is the number (from 0 through 7) for each relative location of the current pixel at (cx, cy). It is easily found by indexing into a 2-d 3x3 array (dirtab).
Input: ccba Return: ccb, or null on error
Input: ccba Return: count, with 0 on error
Input: filename Return: ccba, or null on error
Input: stream Return: ccba, or null on error
Format: ccba: %7d cc
(num. c.c.) (ascii) (17B) pix width (4B) pix height (4B) [for i = 1, ncc] ulx (4B) uly (4B) w (4B) – not req'd for reconstruction h (4B) – not req'd for reconstruction number of borders (4B) [for j = 1, nb] startx (4B) starty (4B) [for k = 1, nb] 2 steps (1B) end in z8 or 88 (1B)
Input: ccba (with step chains numaa of borders)
coordtype (CCB_GLOBAL_COORDS or CCB_LOCAL_COORDS)
Return: 0 if OK, 1 on error
Notes: (1) This uses the step chain data in each ccb to determine the pixel locations, either global or local, and stores them in the appropriate ptaa, either global or local. For the latter, the pixel locations are relative to the c.c.
Input: filename
ccba
Return: 0 if OK, 1 on error
Input: stream
ccba
Return: 0 if OK; 1 on error
Format: ccba: %7d cc
(num. c.c.) (ascii) (18B) pix width (4B) pix height (4B) [for i = 1, ncc] ulx (4B) uly (4B) w (4B) – not req'd for reconstruction h (4B) – not req'd for reconstruction number of borders (4B) [for j = 1, nb] startx (4B) starty (4B) [for k = 1, nb] 2 steps (1B) end in z8 or 88 (1B)
Input: filename
ccba
Return: 0 if OK, 1 on error
Input: filename
ccba
Return: string in svg-formatted, that can be written to file,
or null on error.
Input: pixs (<optional>) Return: ccb or null on error
Input: &ccb (<to be="" nulled>="">) Return: void
| LEPT_DLL l_int32 cidConvertToPdfData | ( | L_COMP_DATA * | cid, |
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
Input: cid (compressed image data -- of jp2k image)
title (<optional> pdf title; can be NULL)
&data (<return> output pdf data for image)
&nbytes (<return> size of output pdf data)
Return: 0 if OK, 1 on error
Notes: (1) Caller must not destroy the cid. It is absorbed in the lpd and destroyed by this function.
| LEPT_DLL l_int32 compareTilesByHisto | ( | NUMAA * | naa1, |
| NUMAA * | naa2, | ||
| l_float32 | minratio, | ||
| l_int32 | w1, | ||
| l_int32 | h1, | ||
| l_int32 | w2, | ||
| l_int32 | h2, | ||
| l_float32 * | pscore, | ||
| PIXA * | pixadebug | ||
| ) |
Input: naa1, naa2 (each is a set of 256 entry histograms)
minratio (requiring image sizes be compatible; < 1.0)
w1, h1, w2, h2 (image sizes from which histograms were made)
&score (<return> similarity score of histograms)
pixadebug (<optional> use only for debug output)
Return: 0 if OK, 1 on error
Notes: (1) naa1 and naa2 must be generated using pixGenPhotoHistos(), using the same tile sizes. (2) The image dimensions must be similar. The score is 0.0 if the ratio of widths and heights (smallest / largest) exceeds a threshold @minratio, which must be between 0.5 and 1.0. If set at 1.0, both images must be exactly the same size. A typical value for @minratio is 0.9. (2) The input pixadebug is null unless debug output is requested.
| LEPT_DLL l_int32 composeRGBAPixel | ( | l_int32 | rval, |
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 | aval, | ||
| l_uint32 * | ppixel | ||
| ) |
Input: rval, gval, bval, aval
&pixel (<return> 32-bit pixel)
Return: 0 if OK; 1 on error
Notes: (1) All channels are 8 bits: the input values must be between 0 and 255. For speed, this is not enforced by masking with 0xff before shifting.
Input: rval, gval, bval
&pixel (<return> 32-bit pixel)
Return: 0 if OK; 1 on error
Notes: (1) All channels are 8 bits: the input values must be between 0 and 255. For speed, this is not enforced by masking with 0xff before shifting. (2) A slower implementation uses macros: SET_DATA_BYTE(ppixel, COLOR_RED, rval); SET_DATA_BYTE(ppixel, COLOR_GREEN, gval); SET_DATA_BYTE(ppixel, COLOR_BLUE, bval);
Input: directory name (containing single-page pdf files)
substr (<optional> substring filter on filenames; can be NULL)
fileout (concatenated pdf file)
Return: 0 if OK, 1 on error
Notes: (1) This only works with leptonica-formatted single-page pdf files. (2) If @substr is not NULL, only filenames that contain the substring can be returned. If @substr == NULL, none of the filenames are filtered out. (3) The files in the directory, after optional filtering by the substring, are lexically sorted in increasing order before concatenation.
| LEPT_DLL l_int32 concatenatePdfToData | ( | const char * | dirname, |
| const char * | substr, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
Input: directory name (containing single-page pdf files)
substr (<optional> substring filter on filenames; can be NULL)
&data (<return> concatenated pdf data in memory)
&nbytes (<return> number of bytes in pdf data)
Return: 0 if OK, 1 on error
Notes: (1) This only works with leptonica-formatted single-page pdf files. (2) If @substr is not NULL, only filenames that contain the substring can be returned. If @substr == NULL, none of the filenames are filtered out. (3) The files in the directory, after optional filtering by the substring, are lexically sorted in increasing order before concatenation.
Input: val Return: gray code value
Notes: (1) Gray code values corresponding to integers differ by only one bit transition between successive integers.
Input: byteval (input byte)
&nib1, &nib2 (<return> two hex ascii characters)
Return: void
| LEPT_DLL l_int32 convertFilesFittedToPS | ( | const char * | dirin, |
| const char * | substr, | ||
| l_float32 | xpts, | ||
| l_float32 | ypts, | ||
| const char * | fileout | ||
| ) |
| LEPT_DLL l_int32 convertFilesTo1bpp | ( | const char * | dirin, |
| const char * | substr, | ||
| l_int32 | upscaling, | ||
| l_int32 | thresh, | ||
| l_int32 | firstpage, | ||
| l_int32 | npages, | ||
| const char * | dirout, | ||
| l_int32 | outformat | ||
| ) |
Input: dirin
substr (<optional> substring filter on filenames; can be NULL)
upscaling (1, 2 or 4; only for input color or grayscale)
thresh (global threshold for binarization; use 0 for default)
firstpage
npages (use 0 to do all from @firstpage to the end)
dirout
outformat (IFF_PNG, IFF_TIFF_G4)
Return: 0 if OK, 1 on error
Notes: (1) Images are sorted lexicographically, and the names in the output directory are retained except for the extension.
| LEPT_DLL l_int32 convertFilesToPdf | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: directory name (containing images)
substr (<optional> substring filter on filenames; can be NULL)
res (input resolution of all images)
scalefactor (scaling factor applied to each image; > 0.0)
type (encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
L_FLATE_ENCODE, or 0 for default)
quality (used for JPEG only; 0 for default (75))
title (<optional> pdf title; if null, taken from the first
image filename)
fileout (pdf file of all images)
Return: 0 if OK, 1 on error
Notes: (1) If @substr is not NULL, only image filenames that contain the substring can be used. If @substr == NULL, all files in the directory are used. (2) The files in the directory, after optional filtering by the substring, are lexically sorted in increasing order before concatenation. (3) The scalefactor is applied to each image before encoding. If you enter a value <= 0.0, it will be set to 1.0. (4) Specifying one of the three encoding types for @type forces all images to be compressed with that type. Use 0 to have the type determined for each image based on depth and whether or not it has a colormap.
| LEPT_DLL l_int32 convertFilesToPS | ( | const char * | dirin, |
| const char * | substr, | ||
| l_int32 | res, | ||
| const char * | fileout | ||
| ) |
| LEPT_DLL l_int32 convertFlateToPS | ( | const char * | filein, |
| const char * | fileout, | ||
| const char * | operation, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| l_float32 | scale, | ||
| l_int32 | pageno, | ||
| l_int32 | endpage | ||
| ) |
Input: filein (input file -- any format)
fileout (output ps file)
operation ("w" for write; "a" for append)
x, y (location of LL corner of image, in pixels, relative
to the PostScript origin (0,0) at the LL corner
of the page)
res (resolution of the input image, in ppi; use 0 for default)
scale (scaling by printer; use 0.0 or 1.0 for no scaling)
pageno (page number; must start with 1; you can use 0
if there is only one page.)
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: 0 if OK, 1 on error
Notes: (1) This outputs level 3 PS as flate compressed (overlaid with ascii85 encoding). (2) An output file can contain multiple pages, each with multiple images. The arguments to convertFlateToPS() allow you to control placement of png images on multiple pages within a PostScript file. (3) For the first image written to a file, use "w", which opens for write and clears the file. For all subsequent images written to that file, use "a". (4) The (x, y) parameters give the LL corner of the image relative to the LL corner of the page. They are in units of pixels if scale = 1.0. If you use (e.g.) scale = 2.0, the image is placed at (2x, 2y) on the page, and the image dimensions are also doubled. (5) Display vs printed resolution:
Input: filein (input file -- any format)
fileout (output ps file)
Return: 0 if OK, 1 on error
Notes: (1) This function takes any image file as input and generates a flate-compressed, ascii85 encoded PS file, with a bounding box. (2) The bounding box is required when a program such as TeX (through epsf) places and rescales the image. (3) The bounding box is sized for fitting the image to an 8.5 x 11.0 inch page.
| LEPT_DLL l_int32 convertFlateToPSString | ( | const char * | filein, |
| char ** | poutstr, | ||
| l_int32 * | pnbytes, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| l_float32 | scale, | ||
| l_int32 | pageno, | ||
| l_int32 | endpage | ||
| ) |
Generates level 3 PS string in flate compressed format.
Input: filein (input image file)
&poutstr (<return> PS string)
&nbytes (<return> number of bytes in PS string)
x, y (location of LL corner of image, in pixels, relative
to the PostScript origin (0,0) at the LL corner
of the page)
res (resolution of the input image, in ppi; use 0 for default)
scale (scaling by printer; use 0.0 or 1.0 for no scaling)
pageno (page number; must start with 1; you can use 0
if there is only one page.)
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: 0 if OK, 1 on error
Notes: (1) The returned PS character array is a null-terminated ascii string. All the raster data is ascii85 encoded, so there are no null bytes embedded in it. (2) The raster encoding is made with gzip, the same as that in a png file that is compressed without prediction. The raster data itself is 25% larger than that in the binary form, due to the ascii85 encoding.
Usage: See convertFlateToPS()
| LEPT_DLL l_int32 convertG4ToPS | ( | const char * | filein, |
| const char * | fileout, | ||
| const char * | operation, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| l_float32 | scale, | ||
| l_int32 | pageno, | ||
| l_int32 | maskflag, | ||
| l_int32 | endpage | ||
| ) |
Input: filein (input tiff g4 file)
fileout (output ps file)
operation ("w" for write; "a" for append)
x, y (location of LL corner of image, in pixels, relative
to the PostScript origin (0,0) at the LL corner
of the page)
res (resolution of the input image, in ppi; typ. values
are 300 and 600; use 0 for automatic determination
based on image size)
scale (scaling by printer; use 0.0 or 1.0 for no scaling)
pageno (page number; must start with 1; you can use 0
if there is only one page.)
maskflag (boolean: use TRUE if just painting through fg;
FALSE if painting both fg and bg.
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: 0 if OK, 1 on error
Notes: (1) See the usage comments in convertJpegToPS(), some of which are repeated here. (2) This is a wrapper for tiff g4. The PostScript that is generated is expanded by about 5/4 (due to the ascii85 encoding. If you convert to pdf (ps2pdf), the ascii85 decoder is automatically invoked, so that the pdf wrapped g4 file is essentially the same size as the original g4 file. It's useful to have the PS file ascii85 encoded, because many printers will not print binary PS files. (3) For the first image written to a file, use "w", which opens for write and clears the file. For all subsequent images written to that file, use "a". (4) To render multiple images on the same page, set endpage = FALSE for each image until you get to the last, for which you set endpage = TRUE. This causes the "showpage" command to be invoked. Showpage outputs the entire page and clears the raster buffer for the next page to be added. Without a "showpage", subsequent images from the next page will overlay those previously put down. (5) For multiple images to the same page, where you are writing both jpeg and tiff-g4, you have two options: (a) write the g4 first, as either image (maskflag == FALSE) or imagemask (maskflag == TRUE), and then write the jpeg over it. (b) write the jpeg first and as the last item, write the g4 as an imagemask (maskflag == TRUE), to paint through the foreground only. We have this flexibility with the tiff-g4 because it is 1 bpp. (6) For multiple pages, increment the page number, starting with page 1. This allows PostScript (and PDF) to build a page directory, which viewers use for navigation.
Input: filein (input tiff file)
fileout (output ps file)
Return: 0 if OK, 1 on error
Notes: (1) This function takes a g4 compressed tif file as input and generates a g4 compressed, ascii85 encoded PS file, with a bounding box. (2) The bounding box is required when a program such as TeX (through epsf) places and rescales the image. (3) The bounding box is sized for fitting the image to an 8.5 x 11.0 inch page. (4) We paint this through a mask, over whatever is below.
| LEPT_DLL l_int32 convertG4ToPSString | ( | const char * | filein, |
| char ** | poutstr, | ||
| l_int32 * | pnbytes, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| l_float32 | scale, | ||
| l_int32 | pageno, | ||
| l_int32 | maskflag, | ||
| l_int32 | endpage | ||
| ) |
Input: filein (input tiff g4 file)
&poutstr (<return> PS string)
&nbytes (<return> number of bytes in PS string)
x, y (location of LL corner of image, in pixels, relative
to the PostScript origin (0,0) at the LL corner
of the page)
res (resolution of the input image, in ppi; typ. values
are 300 and 600; use 0 for automatic determination
based on image size)
scale (scaling by printer; use 0.0 or 1.0 for no scaling)
pageno (page number; must start with 1; you can use 0
if there is only one page.)
maskflag (boolean: use TRUE if just painting through fg;
FALSE if painting both fg and bg.
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: 0 if OK, 1 on error
Notes: (1) Generates PS string in G4 compressed tiff format from G4 tiff file. (2) For usage, see convertG4ToPS().
Input: gray code value Return: binary value
| LEPT_DLL l_int32 convertHSVToRGB | ( | l_int32 | hval, |
| l_int32 | sval, | ||
| l_int32 | vval, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: hval, sval, vval
&rval, &gval, &bval (<return> RGB values)
Return: 0 if OK, 1 on error
Notes: (1) See convertRGBToHSV() for valid input range of HSV values and their interpretation in color space.
| LEPT_DLL l_int32 convertImageDataToPdf | ( | l_uint8 * | imdata, |
| size_t | size, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | fileout, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
Input: imdata (array of formatted image data; e.g., png, jpeg)
size (size of image data)
type (L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE)
quality (used for JPEG only; 0 for default (75))
fileout (output pdf file; only required on last image on page)
x, y (location of lower-left corner of image, in pixels,
relative to the PostScript origin (0,0) at
the lower-left corner of the page)
res (override the resolution of the input image, in ppi;
use 0 to respect the resolution embedded in the input)
title (<optional> pdf title)
&lpd (ptr to lpd, which is created on the first invocation
and returned until last image is processed, at which
time it is destroyed)
position (in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
L_LAST_IMAGE)
Return: 0 if OK, 1 on error
Notes: (1) If @res == 0 and the input resolution field is 0, this will use DEFAULT_INPUT_RES. (2) See comments in convertToPdf().
| LEPT_DLL l_int32 convertImageDataToPdfData | ( | l_uint8 * | imdata, |
| size_t | size, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
Input: imdata (array of formatted image data; e.g., png, jpeg)
size (size of image data)
type (L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE)
quality (used for JPEG only; 0 for default (75))
&data (<return> pdf data in memory)
&nbytes (<return> number of bytes in pdf data)
x, y (location of lower-left corner of image, in pixels,
relative to the PostScript origin (0,0) at
the lower-left corner of the page)
res (override the resolution of the input image, in ppi;
use 0 to respect the resolution embedded in the input)
title (<optional> pdf title)
&lpd (ptr to lpd, which is created on the first invocation
and returned until last image is processed, at which
time it is destroyed)
position (in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
L_LAST_IMAGE)
Return: 0 if OK, 1 on error
Notes: (1) If @res == 0 and the input resolution field is 0, this will use DEFAULT_INPUT_RES. (2) See comments in convertToPdf().
| LEPT_DLL l_int32 convertJpegToPS | ( | const char * | filein, |
| const char * | fileout, | ||
| const char * | operation, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| l_float32 | scale, | ||
| l_int32 | pageno, | ||
| l_int32 | endpage | ||
| ) |
Input: filein (input jpeg file)
fileout (output ps file)
operation ("w" for write; "a" for append)
x, y (location of LL corner of image, in pixels, relative
to the PostScript origin (0,0) at the LL corner
of the page)
res (resolution of the input image, in ppi; use 0 for default)
scale (scaling by printer; use 0.0 or 1.0 for no scaling)
pageno (page number; must start with 1; you can use 0
if there is only one page)
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: 0 if OK, 1 on error
Notes: (1) This is simpler to use than pixWriteStringPS(), and it outputs in level 2 PS as compressed DCT (overlaid with ascii85 encoding). (2) An output file can contain multiple pages, each with multiple images. The arguments to convertJpegToPS() allow you to control placement of jpeg images on multiple pages within a PostScript file. (3) For the first image written to a file, use "w", which opens for write and clears the file. For all subsequent images written to that file, use "a". (4) The (x, y) parameters give the LL corner of the image relative to the LL corner of the page. They are in units of pixels if scale = 1.0. If you use (e.g.) scale = 2.0, the image is placed at (2x, 2y) on the page, and the image dimensions are also doubled. (5) Display vs printed resolution:
Input: filein (input jpeg file)
fileout (output ps file)
Return: 0 if OK, 1 on error
Notes: (1) This function takes a jpeg file as input and generates a DCT compressed, ascii85 encoded PS file, with a bounding box. (2) The bounding box is required when a program such as TeX (through epsf) places and rescales the image. (3) The bounding box is sized for fitting the image to an 8.5 x 11.0 inch page.
| LEPT_DLL l_int32 convertJpegToPSString | ( | const char * | filein, |
| char ** | poutstr, | ||
| l_int32 * | pnbytes, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| l_float32 | scale, | ||
| l_int32 | pageno, | ||
| l_int32 | endpage | ||
| ) |
Generates PS string in jpeg format from jpeg file
Input: filein (input jpeg file)
&poutstr (<return> PS string)
&nbytes (<return> number of bytes in PS string)
x, y (location of LL corner of image, in pixels, relative
to the PostScript origin (0,0) at the LL corner
of the page)
res (resolution of the input image, in ppi; use 0 for default)
scale (scaling by printer; use 0.0 or 1.0 for no scaling)
pageno (page number; must start with 1; you can use 0
if there is only one page)
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: 0 if OK, 1 on error
Notes: (1) For usage, see convertJpegToPS()
| LEPT_DLL l_int32 convertLABToRGB | ( | l_float32 | flval, |
| l_float32 | faval, | ||
| l_float32 | fbval, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: flval, faval, fbval
&rval, &gval, &bval (<return> rgb values)
Return: 0 if OK, 1 on error
Notes: (1) For values of lab that are out of gamut for rgb, the rgb components are set to the closest valid color.
| LEPT_DLL l_int32 convertLABToXYZ | ( | l_float32 | lval, |
| l_float32 | aval, | ||
| l_float32 | bval, | ||
| l_float32 * | pxval, | ||
| l_float32 * | pyval, | ||
| l_float32 * | pzval | ||
| ) |
Input: lval, aval, bval
&xval, &yval, &zval (<return> xyz values)
Return: 0 if OK, 1 on error
| LEPT_DLL BOXAA* convertNumberedMasksToBoxaa | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | numpre, | ||
| l_int32 | numpost | ||
| ) |
Input: directory name (containing mask images)
substr (<optional> substring filter on filenames; can be NULL)
numpre (number of characters in name before number)
numpost (number of characters in name after number, up
to a dot before an extension)
including an extension and the dot separator)
Return: boxaa of mask regions, or null on error
Notes: (1) This is conveniently used to generate the input boxaa for convertSegmentedFilesToPdf(). It guarantees that the boxa will be aligned with the page images, even if some of the boxa are empty.
Input: ptas (8-connected line of points) Return: ptad (4-connected line), or null on error
Notes: (1) When a polyline is generated with width = 1, the resulting line is not 4-connected in general. This function adds points as necessary to convert the line to 4-cconnected. It is useful when rendering 1 bpp on a pix. (2) Do not use this for lines generated with width > 1.
| LEPT_DLL l_int32 convertRGBToHSV | ( | l_int32 | rval, |
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 * | phval, | ||
| l_int32 * | psval, | ||
| l_int32 * | pvval | ||
| ) |
Input: rval, gval, bval (RGB input)
&hval, &sval, &vval (<return> HSV values)
Return: 0 if OK, 1 on error
Notes: (1) The range of returned values is: h [0 ... 239] s [0 ... 255] v [0 ... 255] (2) If r = g = b, the pixel is gray (s = 0), and we define h = 0. (3) h wraps around, so that h = 0 and h = 240 are equivalent in hue space. (4) h has the following correspondence to color: h = 0 magenta h = 40 red h = 80 yellow h = 120 green h = 160 cyan h = 200 blue
| LEPT_DLL l_int32 convertRGBToLAB | ( | l_int32 | rval, |
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_float32 * | pflval, | ||
| l_float32 * | pfaval, | ||
| l_float32 * | pfbval | ||
| ) |
Input: rval, gval, bval (rgb input)
&flval, &faval, &fbval (<return> lab values)
Return: 0 if OK, 1 on error
Notes: (1) These conversions are for illuminant D65 acting on linear sRGB values.
| LEPT_DLL l_int32 convertRGBToXYZ | ( | l_int32 | rval, |
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_float32 * | pfxval, | ||
| l_float32 * | pfyval, | ||
| l_float32 * | pfzval | ||
| ) |
Input: rval, gval, bval (rgb input)
&fxval, &fyval, &fzval (<return> xyz values)
Return: 0 if OK, 1 on error
Notes: (1) These conversions are for illuminant D65 acting on linear sRGB values.
| LEPT_DLL l_int32 convertRGBToYUV | ( | l_int32 | rval, |
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 * | pyval, | ||
| l_int32 * | puval, | ||
| l_int32 * | pvval | ||
| ) |
Input: rval, gval, bval (RGB input)
&yval, &uval, &vval (<return> YUV values)
Return: 0 if OK, 1 on error
Notes: (1) The range of returned values is: Y [16 ... 235] U [16 ... 240] V [16 ... 240]
| LEPT_DLL l_int32 convertSegmentedFilesToPdf | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXAA * | baa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: directory name (containing images)
substr (<optional> substring filter on filenames; can be NULL)
res (input resolution of all images)
type (compression type for non-image regions; the
image regions are always compressed with L_JPEG_ENCODE)
thresh (used for converting gray --> 1 bpp with L_G4_ENCODE)
boxaa (<optional> of image regions)
quality (used for JPEG only; 0 for default (75))
scalefactor (scaling factor applied to each image region)
title (<optional> pdf title; if null, taken from the first
image filename)
fileout (pdf file of all images)
Return: 0 if OK, 1 on error
Notes: (1) If @substr is not NULL, only image filenames that contain the substring can be used. If @substr == NULL, all files in the directory are used. (2) The files in the directory, after optional filtering by the substring, are lexically sorted in increasing order before concatenation. (3) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without colormap and many colors, or 32 bpp; FLATE for anything else. (4) The boxaa, if it exists, contains one boxa of "image regions" for each image file. The boxa must be aligned with the sorted set of images. (5) The scalefactor is applied to each image region. It is typically < 1.0, to save bytes in the final pdf, because the resolution is often not critical in non-text regions. (6) If the non-image regions have pixel depth > 1 and the encoding type is G4, they are automatically scaled up by 2x and thresholded. Otherwise, no scaling is performed on them. (7) Note that this function can be used to generate multipage G4 compressed pdf from any input, by using @boxaa == NULL and @type == L_G4_ENCODE.
| LEPT_DLL l_int32 convertSegmentedPagesToPS | ( | const char * | pagedir, |
| const char * | pagestr, | ||
| l_int32 | page_numpre, | ||
| const char * | maskdir, | ||
| const char * | maskstr, | ||
| l_int32 | mask_numpre, | ||
| l_int32 | numpost, | ||
| l_int32 | maxnum, | ||
| l_float32 | textscale, | ||
| l_float32 | imagescale, | ||
| l_int32 | threshold, | ||
| const char * | fileout | ||
| ) |
Input: path
type (UNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR)
Return: 0 if OK, 1 on error
Notes: (1) In-place conversion. (2) Type is the resulting type:
| LEPT_DLL SARRAY* convertSortedToNumberedPathnames | ( | SARRAY * | sa, |
| l_int32 | numpre, | ||
| l_int32 | numpost, | ||
| l_int32 | maxnum | ||
| ) |
convertSortedToNumberedPathnames()
Input: sorted pathnames (including zero-padded integers)
numpre (number of characters in name before number)
numpost (number of characters in name after the number,
up to a dot before an extension)
maxnum (only consider page numbers up to this value)
Return: sarray of numbered pathnames, or NULL on error
Notes: (1) Typically, numpre = numpost = 0; e.g., when the filename just has a number followed by an optional extension.
| LEPT_DLL l_int32 convertTiffMultipageToPS | ( | const char * | filein, |
| const char * | fileout, | ||
| const char * | tempfile, | ||
| l_float32 | fillfract | ||
| ) |
Input: filein (input tiff multipage file)
fileout (output ps file)
tempfile (<optional> for temporary g4 tiffs;
use NULL for default)
factor (for filling 8.5 x 11 inch page;
use 0.0 for DEFAULT_FILL_FRACTION)
Return: 0 if OK, 1 on error
Notes: (1) This converts a multipage tiff file of binary page images into a ccitt g4 compressed PS file. (2) If the images are generated from a standard resolution fax, the vertical resolution is doubled to give a normal-looking aspect ratio.
| LEPT_DLL l_int32 convertToNUpFiles | ( | const char * | dir, |
| const char * | substr, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_int32 | tw, | ||
| l_int32 | spacing, | ||
| l_int32 | border, | ||
| l_int32 | fontsize, | ||
| const char * | outdir | ||
| ) |
Input: indir (full path to directory of images)
substr (<optional> can be null)
nx, ny (in [1, ... 50], tiling factors in each direction)
tw (target width, in pixels; must be >= 20)
spacing (between images, and on outside)
border (width of additional black border on each image;
use 0 for no border)
fontsize (to print tail of filename with image. Valid set is
{4,6,8,10,12,14,16,18,20}. Use 0 to disable.)
outdir (subdirectory of /tmp to put N-up tiled images)
Return: 0 if OK, 1 on error
Notes: (1) Each set of nx*ny images is scaled and tiled into a single image, that is written out to @outdir. (2) All images in each nx*ny set are scaled to the same width. This is typically used when all images are roughly the same size. (3) Typical values for nx and ny are in [2 ... 5]. (4) All images are scaled to a width @tw. They are not rescaled when placed in the (nx,ny) mosaic.
| LEPT_DLL PIXA* convertToNUpPixa | ( | const char * | dir, |
| const char * | substr, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_int32 | tw, | ||
| l_int32 | spacing, | ||
| l_int32 | border, | ||
| l_int32 | fontsize | ||
| ) |
Input: dir (full path to directory of images)
substr (<optional> can be null)
nx, ny (in [1, ... 50], tiling factors in each direction)
tw (target width, in pixels; must be >= 20)
spacing (between images, and on outside)
border (width of additional black border on each image;
use 0 for no border)
fontsize (to print tail of filename with image. Valid set is
{4,6,8,10,12,14,16,18,20}. Use 0 to disable.)
Return: pixad, or null on error
Notes: (1) See notes for convertToNUpFiles()
| LEPT_DLL l_int32 convertToPdf | ( | const char * | filein, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | fileout, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
Input: filein (input image file -- any format)
type (L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE)
quality (used for JPEG only; 0 for default (75))
fileout (output pdf file; only required on last image on page)
x, y (location of lower-left corner of image, in pixels,
relative to the PostScript origin (0,0) at
the lower-left corner of the page)
res (override the resolution of the input image, in ppi;
use 0 to respect the resolution embedded in the input)
title (<optional> pdf title; if null, taken from filein)
&lpd (ptr to lpd, which is created on the first invocation
and returned until last image is processed, at which
time it is destroyed)
position (in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
L_LAST_IMAGE)
Return: 0 if OK, 1 on error
Notes: (1) To wrap only one image in pdf, input @plpd = NULL, and the value of @position will be ignored: convertToPdf(... type, quality, x, y, res, NULL, 0); (2) To wrap multiple images on a single pdf page, this is called once for each successive image. Do it this way: L_PDF_DATA *lpd; convertToPdf(... type, quality, x, y, res, &lpd, L_FIRST_IMAGE); convertToPdf(... type, quality, x, y, res, &lpd, L_NEXT_IMAGE); ... convertToPdf(... type, quality, x, y, res, &lpd, L_LAST_IMAGE); This will write the result to the value of @fileout specified in the first call; succeeding values of @fileout are ignored. On the last call: the pdf data bytes are computed and written to @fileout, lpd is destroyed internally, and the returned value of lpd is null. So the client has nothing to clean up. (3) (a) Set @res == 0 to respect the resolution embedded in the image file. If no resolution is embedded, it will be set to the default value. (b) Set @res to some other value to override the file resolution. (4) (a) If the input @res and the resolution of the output device are equal, the image will be "displayed" at the same size as the original. (b) If the input @res is 72, the output device will render the image at 1 pt/pixel. (c) Some possible choices for the default input pix resolution are: 72 ppi Render pix on any output device at one pt/pixel 96 ppi Windows default for generated display images 300 ppi Typical default for scanned images. We choose 300, which is sensible for rendering page images. However, images come from a variety of sources, and some are explicitly created for viewing on a display.
| LEPT_DLL l_int32 convertToPdfData | ( | const char * | filein, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
Input: filein (input image file -- any format)
type (L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE)
quality (used for JPEG only; 0 for default (75))
&data (<return> pdf data in memory)
&nbytes (<return> number of bytes in pdf data)
x, y (location of lower-left corner of image, in pixels,
relative to the PostScript origin (0,0) at
the lower-left corner of the page)
res (override the resolution of the input image, in ppi;
use 0 to respect the resolution embedded in the input)
title (<optional> pdf title; if null, use filein)
&lpd (ptr to lpd, which is created on the first invocation
and returned until last image is processed, at which
time it is destroyed)
position (in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
L_LAST_IMAGE)
Return: 0 if OK, 1 on error
Notes: (1) If @res == 0 and the input resolution field is 0, this will use DEFAULT_INPUT_RES. (2) See comments in convertToPdf().
| LEPT_DLL l_int32 convertToPdfDataSegmented | ( | const char * | filein, |
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXA * | boxa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
Input: filein (input image file -- any format)
res (input image resolution; typ. 300 ppi; use 0 for default)
type (compression type for non-image regions; the
image regions are always compressed with L_JPEG_ENCODE)
thresh (used for converting gray --> 1 bpp with L_G4_ENCODE)
boxa (<optional> image regions; can be null)
quality (used for jpeg image regions; 0 for default)
scalefactor (used for jpeg regions; must be <= 1.0)
title (<optional> pdf title; if null, uses filein)
&data (<return> pdf data in memory)
&nbytes (<return> number of bytes in pdf data)
Return: 0 if OK, 1 on error
Notes: (1) If there are no image regions, set @boxa == NULL; @quality and @scalefactor are ignored. (2) Typically, @scalefactor is < 1.0. The image regions are
| LEPT_DLL l_int32 convertToPdfSegmented | ( | const char * | filein, |
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXA * | boxa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: filein (input image file -- any format)
res (input image resolution; typ. 300 ppi; use 0 for default)
type (compression type for non-image regions; the
image regions are always compressed with L_JPEG_ENCODE)
thresh (used for converting gray --> 1 bpp with L_G4_ENCODE)
boxa (<optional> of image regions; can be null)
quality (used for jpeg image regions; 0 for default)
scalefactor (used for jpeg regions; must be <= 1.0)
title (<optional> pdf title; typically taken from the
input file for the pix)
fileout (output pdf file)
Return: 0 if OK, 1 on error
Notes: (1) If there are no image regions, set @boxa == NULL; @quality and @scalefactor are ignored. (2) Typically, @scalefactor is < 1.0, because the image regions can be rendered at a lower resolution (for better compression) than the text regions. If @scalefactor == 0, we use 1.0. If the input image is 1 bpp and scalefactor < 1.0, we use scaleToGray() to downsample the image regions to gray before compressing them. (3) If the compression type for non-image regions is L_G4_ENCODE and bpp > 1, the image is upscaled 2x and thresholded to 1 bpp. That is the only situation where @thresh is used. (4) The parameter @quality is only used for image regions. If @type == L_JPEG_ENCODE, default jpeg quality (75) is used for the non-image regions. (5) Processing matrix for non-image regions.
Input G4 JPEG FLATE
----------|---------------------------------------------------
1 bpp | 1x, 1 bpp 1x flate, 1 bpp 1x, 1 bpp
|
cmap | 2x, 1 bpp 1x flate, cmap 1x, cmap
|
2,4 bpp | 2x, 1 bpp 1x flate 1x, 2,4 bpp
no cmap | 2,4 bpp
|
8,32 bpp | 2x, 1 bpp 1x (jpeg) 1x, 8,32 bpp
no cmap | 8,32 bpp
Summary:
(a) if G4 is requested, G4 is used, with 2x upscaling
for all cases except 1 bpp.
(b) if JPEG is requested, use flate encoding for all cases
except 8 bpp without cmap and 32 bpp (rgb).
(c) if FLATE is requested, use flate with no transformation
of the raster data.
(6) Calling options/sequence for these functions: file --> file (convertToPdfSegmented) pix --> file (pixConvertToPdfSegmented) pix --> data (pixConvertToPdfDataSegmented) file --> data (convertToPdfDataSegmented) pix --> data (pixConvertToPdfDataSegmented)
| LEPT_DLL l_int32 convertUnscaledFilesToPdf | ( | const char * | dirname, |
| const char * | substr, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: directory name (containing images)
substr (<optional> substring filter on filenames; can be NULL)
title (<optional> pdf title; if null, taken from the first
image filename)
fileout (pdf file of all images)
Return: 0 if OK, 1 on error
Notes: (1) If @substr is not NULL, only image filenames that contain the substring can be used. If @substr == NULL, all files in the directory are used. (2) The files in the directory, after optional filtering by the substring, are lexically sorted in increasing order before concatenation. (3) For jpeg and jp2k, this is very fast because the compressed data is wrapped up and concatenated. For png and tiffg4, the images must be read and recompressed.
| LEPT_DLL l_int32 convertUnscaledToPdfData | ( | const char * | fname, |
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
Input: fname (of image file)
title (<optional> pdf title; can be NULL)
&data (<return> output pdf data for image)
&nbytes (<return> size of output pdf data)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 convertXYZToLAB | ( | l_float32 | xval, |
| l_float32 | yval, | ||
| l_float32 | zval, | ||
| l_float32 * | plval, | ||
| l_float32 * | paval, | ||
| l_float32 * | pbval | ||
| ) |
Input: xval, yval, zval (xyz input)
&lval, &aval, &bval (<return> lab values)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 convertXYZToRGB | ( | l_float32 | fxval, |
| l_float32 | fyval, | ||
| l_float32 | fzval, | ||
| l_int32 | blackout, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: fxval, fyval, fzval
blackout (0 to output nearest color if out of gamut;
1 to output black)
&rval, &gval, &bval (<return> rgb values)
Return: 0 if OK, 1 on error
Notes: (1) For values of xyz that are out of gamut for rgb, at least one of the r, g or b components will be either less than 0 or greater than 255. For that situation:
| LEPT_DLL l_int32 convertYUVToRGB | ( | l_int32 | yval, |
| l_int32 | uval, | ||
| l_int32 | vval, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: yval, uval, vval
&rval, &gval, &bval (<return> RGB values)
Return: 0 if OK, 1 on error
Notes: (1) The range of valid input values is: Y [16 ... 235] U [16 ... 240] V [16 ... 240] (2) Conversion of RGB --> YUV --> RGB leaves the image unchanged. (3) The YUV gamut is larger than the RBG gamut; many YUV values will result in an invalid RGB value. We clip individual r,g,b components to the range [0, 255], and do not test input.
Input: sy (rows == height)
sx (columns == width)
Return: doubly indexed array (i.e., an array of sy row pointers,
each of which points to an array of sx floats)
Notes: (1) The array[sy][sx] is indexed in standard "matrix notation", with the row index first.
Input: sy (rows == height)
sx (columns == width)
Return: doubly indexed array (i.e., an array of sy row pointers,
each of which points to an array of sx ints)
Notes: (1) The array[sy][sx] is indexed in standard "matrix notation", with the row index first.
Input: xc, yc (location of center of rotation)
angle (rotation in radians; clockwise is positive)
Return: 3x3 transform matrix, or null on error
Notes; (1) The rotation is equivalent to: v' = Av where v and v' are 1x3 column vectors in the form v = [x, y, 1]^ (^ denotes transpose) and the affine rotation matrix is A = [ cosa -sina xc*(1-cosa) + yc*sina sina cosa yc*(1-cosa) - xc*sina 0 0 1 ]
If the rotation is about the origin, (xc, yc) = (0, 0) and this simplifies to A = [ cosa -sina 0 sina cosa 0 0 0 1 ]
These relations follow from the following equations, which you can convince yourself are correct as follows. Draw a circle centered on (xc,yc) and passing through (x,y), with (x',y') on the arc at an angle 'a' clockwise from (x,y). [ Hint: cos(a + b) = cosa * cosb - sina * sinb sin(a + b) = sina * cosb + cosa * sinb ]
x' - xc = (x - xc) * cosa - (y - yc) * sina y' - yc = (x - xc) * sina + (y - yc) * cosa
Input: scalex (horizontal scale factor)
scaley (vertical scale factor)
Return: 3x3 transform matrix, or null on error
Notes; (1) The scaling is equivalent to: v' = Av where v and v' are 1x3 column vectors in the form v = [x, y, 1]^ (^ denotes transpose) and the affine scaling matrix is A = [ sx 0 0 0 sy 0 0 0 1 ]
(2) We consider scaling as with respect to a fixed origin. In other words, the origin is the only point that doesn't move in the scaling transform.
Input: transx (x component of translation wrt. the origin)
transy (y component of translation wrt. the origin)
Return: 3x3 transform matrix, or null on error
Notes; (1) The translation is equivalent to: v' = Av where v and v' are 1x3 column vectors in the form v = [x, y, 1]^ (^ denotes transpose) and the affine tranlation matrix is A = [ 1 0 tx 0 1 ty 0 0 1 ]
(2) We consider translation as with respect to a fixed origin. In a clipping operation, the origin moves and the points are fixed, and you use (-tx, -ty) where (tx, ty) is the translation vector of the origin.
Input: inarray (ascii85 input data)
insize (number of bytes in input array)
&outsize (<return> number of bytes in output l_uint8 array)
Return: outarray (binary)
Notes: (1) We assume the data is properly encoded, so we do not check for invalid characters or the final '>' character. (2) We permit whitespace to be added to the encoding in an arbitrary way.
Input: inarray (input encoded char data, with 72 chars/line))
insize (number of bytes in input array)
&outsize (<return> number of bytes in output byte array)
Return: bytea (decoded byte data), or null on error
Notes: (1) The input character data should have only 66 different characters: The 64 character set for base64 encoding, plus the pad character '=' and newlines for formatting with fixed line lengths. If there are any other characters, the decoder will declare the input data to be invalid and return NULL. (2) The decoder ignores newlines and, for a valid input string, stops reading input when a pad byte is found.
| LEPT_DLL l_int32 dewarpaApplyDisparity | ( | L_DEWARPA * | dewa, |
| l_int32 | pageno, | ||
| PIX * | pixs, | ||
| l_int32 | grayin, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| PIX ** | ppixd, | ||
| const char * | debugfile | ||
| ) |
Input: dewa
pageno (of page model to be used; may be a ref model)
pixs (image to be modified; can be 1, 8 or 32 bpp)
grayin (gray value, from 0 to 255, for pixels brought in;
use -1 to use pixels on the boundary of pixs)
x, y (origin for generation of disparity arrays)
&pixd (<return> disparity corrected image)
debugfile (use null to skip writing this)
Return: 0 if OK, 1 on error (no models or ref models available)
Notes: (1) This applies the disparity arrays to the specified image. (2) Specify gray color for pixels brought in from the outside: 0 is black, 255 is white. Use -1 to select pixels from the boundary of the source image. (3) If the models and ref models have not been validated, this will do so by calling dewarpaInsertRefModels(). (4) This works with both stripped and full resolution page models. If the full res disparity array(s) are missing, they are remade. (5) The caller must handle errors that are returned because there are no valid models or ref models for the page – typically by using the input pixs. (6) If there is no model for @pageno, this will use the model for 'refpage' and put the result in the dew for @pageno. (7) This populates the full resolution disparity arrays if necessary. If x and/or y are positive, they are used, in conjunction with pixs, to determine the required slope-based extension of the full resolution disparity arrays in each direction. When (x,y) == (0,0), all extension is to the right and down. Nonzero values of (x,y) are useful for dewarping when pixs is deliberately undercropped. (8) Important: when applying disparity to a number of images, after calling this function and saving the resulting pixd, you should call dewarpMinimize(dew) on the dew for @pageno. This will remove pixs and pixd (or their clones) stored in dew, as well as the full resolution disparity arrays. Together, these hold approximately 16 bytes for each pixel in pixs.
| LEPT_DLL l_int32 dewarpaApplyDisparityBoxa | ( | L_DEWARPA * | dewa, |
| l_int32 | pageno, | ||
| PIX * | pixs, | ||
| BOXA * | boxas, | ||
| l_int32 | mapdir, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| BOXA ** | pboxad, | ||
| const char * | debugfile | ||
| ) |
Input: dewa
pageno (of page model to be used; may be a ref model)
pixs (initial pix reference; for alignment and debugging)
boxas (boxa to be mapped)
mapdir (1 if mapping forward from original to dewarped;
0 if backward)
x, y (origin for generation of disparity arrays with
respect to the source region)
&boxad (<return> disparity corrected boxa)
debugfile (use null to skip writing this)
Return: 0 if OK, 1 on error (no models or ref models available)
Notes: (1) This applies the disparity arrays in one of two mapping directions to the specified boxa. It can be used in the backward direction to locate a box in the original coordinates that would have been dewarped to to the specified image. (2) If there is no model for @pageno, this will use the model for 'refpage' and put the result in the dew for @pageno. (3) This works with both stripped and full resolution page models. If the full res disparity array(s) are missing, they are remade. (4) If an error occurs, a copy of the input boxa is returned.
| LEPT_DLL L_DEWARPA* dewarpaCreate | ( | l_int32 | nptrs, |
| l_int32 | sampling, | ||
| l_int32 | redfactor, | ||
| l_int32 | minlines, | ||
| l_int32 | maxdist | ||
| ) |
Input: nptrs (number of dewarp page ptrs; typically the number of pages) sampling (use 0 for default value; the minimum allowed is 8) redfactor (of input images: 1 is full resolution; 2 is 2x reduced) minlines (minimum number of lines to accept; use 0 for default) maxdist (for locating reference disparity; use -1 for default) Return: dewa (or null on error)
Notes: (1) The sampling, minlines and maxdist parameters will be applied to all images. (2) The sampling factor is used for generating the disparity arrays from the input image. For 2x reduced input, use a sampling factor that is half the sampling you want on the full resolution images. (3) Use @redfactor = 1 for full resolution; 2 for 2x reduction. All input images must be at one of these two resolutions. (4) @minlines is the minimum number of nearly full-length lines required to generate a vertical disparity array. The default number is 15. Use a smaller number to accept a questionable array, but not smaller than 4. (5) When a model can't be built for a page, it looks up to @maxdist in either direction for a valid model with the same page parity. Use -1 for the default value of @maxdist; use 0 to avoid using a ref model. (6) The ptr array is expanded as necessary to accommodate page images.
| LEPT_DLL L_DEWARPA* dewarpaCreateFromPixacomp | ( | PIXAC * | pixac, |
| l_int32 | useboth, | ||
| l_int32 | sampling, | ||
| l_int32 | minlines, | ||
| l_int32 | maxdist | ||
| ) |
Input: pixac (pixacomp of G4, 1 bpp images; with 1x1x1 placeholders) useboth (0 for vert disparity; 1 for both vert and horiz) sampling (use -1 or 0 for default value; otherwise minimum of 5) minlines (minimum number of lines to accept; e.g., 10) maxdist (for locating reference disparity; use -1 for default) Return: dewa (or null on error)
Notes: (1) The returned dewa has disparity arrays calculated and is ready for serialization or for use in dewarping. (2) The sampling, minlines and maxdist parameters are applied to all images. See notes in dewarpaCreate() for details. (3) The pixac is full. Placeholders, if any, are w=h=d=1 images, and the real input images are 1 bpp at full resolution. They are assumed to be cropped to the actual page regions, and may be arbitrarily sparse in the array. (4) The output dewarpa is indexed by the page number. The offset in the pixac gives the mapping between the array index in the pixac and the page number. (5) This adds the ref page models. (6) This can be used to make models for any desired set of pages. The direct models are only made for pages with images in the pixacomp; the ref models are made for pages of the same parity within @maxdist of the nearest direct model.
Input: &dewa (<will be set to null before returning>) Return: void
Input: dewa
pageno (of dew to be destroyed)
Return: 0 if OK, 1 on error
Input: dewa (populated with dewarp structs for pages)
index (into dewa: this is the pageno)
Return: dew (handle; still owned by dewa), or null on error
Input: fp
dewa
Return: 0 if OK, 1 on error
Input: dewarpa
dewarp (to be added)
Return: 0 if OK, 1 on error
Notes: (1) This inserts the dewarp into the array, which now owns it. It also keeps track of the largest page number stored. It must be done before the disparity model is built. (2) Note that this differs from the usual method of filling out arrays in leptonica, where the arrays are compact and new elements are typically added to the end. Here, the dewarp can be added anywhere, even beyond the initial allocation.
Input: dewa
notests (if 1, ignore curvature constraints on model)
debug (1 to output information on invalid page models)
Return: 0 if OK, 1 on error
Notes: (1) This destroys all dewarp models that are invalid, and then inserts reference models where possible. (2) If @notests == 1, this ignores the curvature constraints and assumes that all successfully built models are valid. (3) If useboth == 0, it uses the closest valid model within the distance and parity constraints. If useboth == 1, it tries to use the closest allowed hvalid model; if it doesn't find an hvalid model, it uses the closest valid model. (4) For all pages without a model, this clears out any existing invalid and reference dewarps, finds the nearest valid model with the same parity, and inserts an empty dewarp with the reference page. (5) Then if it is requested to use both vertical and horizontal disparity arrays (useboth == 1), it tries to replace any hvalid == 0 model or reference with an hvalid == 1 reference. (6) The distance constraint is that any reference model must be within maxdist. Note that with the parity constraint, no reference models will be used if maxdist < 2. (7) This function must be called, even if reference models will not be used. It should be called after building models on all available pages, and after setting the rendering parameters. (8) If the dewa has been serialized, this function is called by dewarpaRead() when it is read back. It is also called any time the rendering parameters are changed. (9) Note: if this has been called with useboth == 1, and useboth is reset to 0, you should first call dewarpRestoreModels() to bring real models from the cache back to the primary array.
Input: dewa (populated with dewarp structs for pages) Return: 0 if OK, 1 on error (list of page numbers), or null on error
Notes: (1) This generates two numas, stored in the dewarpa, that give: (a) the page number for each dew that has a page model. (b) the page number for each dew that has either a page model or a reference model. It can be called at any time. (2) It is called by the dewarpa serializer before writing.
| LEPT_DLL l_int32 dewarpaModelStats | ( | L_DEWARPA * | dewa, |
| l_int32 * | pnnone, | ||
| l_int32 * | pnvsuccess, | ||
| l_int32 * | pnvvalid, | ||
| l_int32 * | pnhsuccess, | ||
| l_int32 * | pnhvalid, | ||
| l_int32 * | pnref | ||
| ) |
Input: dewa
&nnone (<optional return> number without any model)
&nvsuccess (<optional return> number with a vert model)
&nvvalid (<optional return> number with a valid vert model)
&nhsuccess (<optional return> number with both models)
&nhvalid (<optional return> number with both models valid)
&nref (<optional return> number with a reference model)
Return: 0 if OK, 1 on error
Notes: (1) A page without a model has no dew. It most likely failed to generate a vertical model, and has not been assigned a ref model from a neighboring page with a valid vertical model. (2) A page has vsuccess == 1 if there is at least a model of the vertical disparity. The model may be invalid, in which case dewarpaInsertRefModels() will stash it in the cache and attempt to replace it by a valid ref model. (3) A vvvalid model is a vertical disparity model whose parameters satisfy the constraints given in dewarpaSetValidModels(). (4) A page has hsuccess == 1 if both the vertical and horizontal disparity arrays have been constructed. (5) An hvalid model has vertical and horizontal disparity models whose parameters satisfy the constraints given in dewarpaSetValidModels(). (6) A page has a ref model if it failed to generate a valid model but was assigned a vvalid or hvalid model on another page (within maxdist) by dewarpaInsertRefModel(). (7) This calls dewarpaTestForValidModel(); it ignores the vvalid and hvalid fields.
| LEPT_DLL l_int32 dewarpaModelStatus | ( | L_DEWARPA * | dewa, |
| l_int32 | pageno, | ||
| l_int32 * | pvsuccess, | ||
| l_int32 * | phsuccess | ||
| ) |
Input: dewa
pageno
&vsuccess (<optional return> 1 on success)
&hsuccess (<optional return> 1 on success)
Return: 0 if OK, 1 on error
Notes: (1) This tests if a model has been built, not if it is valid.
Input: filename Return: dewa, or null on error
Input: stream Return: dewa, or null on error
Notes: (1) The serialized dewarp contains a Numa that gives the (increasing) page number of the dewarp structs that are contained. (2) Reference pages are added in after readback.
Input: dewa (populated with dewarp structs for pages) Return: 0 if OK, 1 on error
Notes: (1) This puts all real models (and only real models) in the primary dewarp array. First remove all dewarps that are only references to other page models. Then move all models that had been cached back into the primary dewarp array. (2) After this is done, we still need to recompute and insert the reference models before dewa->modelsready is true.
| LEPT_DLL l_int32 dewarpaSetCurvatures | ( | L_DEWARPA * | dewa, |
| l_int32 | max_linecurv, | ||
| l_int32 | min_diff_linecurv, | ||
| l_int32 | max_diff_linecurv, | ||
| l_int32 | max_edgecurv, | ||
| l_int32 | max_diff_edgecurv, | ||
| l_int32 | max_edgeslope | ||
| ) |
Input: dewa
max_linecurv (-1 for default)
min_diff_linecurv (-1 for default; 0 to accept all models)
max_diff_linecurv (-1 for default)
max_edgecurv (-1 for default)
max_diff_edgecurv (-1 for default)
max_edgeslope (-1 for default)
Return: 0 if OK, 1 on error
Notes: (1) Approximating the line by a quadratic, the coefficent of the quadratic term is the curvature, and distance units are in pixels (of course). The curvature is very small, so we multiply by 10^6 and express the constraints on the model curvatures in micro-units. (2) This sets five curvature thresholds and a slope threshold:
Input: dewa
maxdist (for using ref models)
Return: 0 if OK, 1 on error
Notes: (1) This sets the maxdist field.
Input: dewa
notests
debug (1 to output information on invalid page models)
Return: 0 if OK, 1 on error
Notes: (1) A valid model must meet the rendering requirements, which include whether or not a vertical disparity model exists and conditions on curvatures for vertical and horizontal disparity models. (2) If @notests == 1, this ignores the curvature constraints and assumes that all successfully built models are valid. (3) This function does not need to be called by the application. It is called by dewarpaInsertRefModels(), which will destroy all invalid dewarps. Consequently, to inspect an invalid dewarp model, it must be done before calling dewarpaInsertRefModels().
| LEPT_DLL l_int32 dewarpaShowArrays | ( | L_DEWARPA * | dewa, |
| l_float32 | scalefact, | ||
| l_int32 | first, | ||
| l_int32 | last | ||
| ) |
Input: dewa
scalefact (on contour images; typ. 0.5)
first (first page model to render)
last (last page model to render; use 0 to go to end)
Return: 0 if OK, 1 on error
Notes: (1) Generates a pdf of contour plots of the disparity arrays. (2) This only shows actual models; not ref models
Input: dewa (populated with dewarp structs for pages) Return: 0 if OK, 1 on error
Notes: (1) This examines each dew in a dewarpa, and removes all that don't have their own page model (i.e., all that have "references" to nearby pages with valid models). These references were generated by dewarpaInsertRefModels(dewa).
Input: dewa
useboth (0 for false, 1 for true)
Return: 0 if OK, 1 on error
Notes: (1) This sets the useboth field. If set, this will attempt to apply both vertical and horizontal disparity arrays. Note that a model with only a vertical disparity array will always be valid.
Input: filename
dewa
Return: 0 if OK, 1 on error
Input: stream (opened for "wb")
dewa
Return: 0 if OK, 1 on error
Input: dew
opensize (size of opening to remove perpendicular lines)
debugfile (use null to skip writing this)
Return: 0 if OK, 1 if unable to build the model or on error
Notes: (1) This builds the horizontal and vertical disparity arrays for an input of ruled lines, typically for calibration. In book scanning, you could lay the ruled paper over a page. Then for that page and several below it, you can use the disparity correction of the line model to dewarp the pages. (2) The dew has been initialized with the image of ruled lines. These lines must be continuous, but we do a small amount of pre-processing here to insure that. (3) @opensize is typically about 8. It must be larger than the thickness of the lines to be extracted. This is the default value, which is applied if @opensize < 3. (4) Sets vsuccess = 1 and hsuccess = 1 if the vertical and/or horizontal disparity arrays build. (5) Similar to dewarpBuildPageModel(), except here the vertical and horizontal disparity arrays are both built from ruled lines. See notes there.
Input: dew
debugfile (use null to skip writing this)
Return: 0 if OK, 1 if unable to build the model or on error
Notes: (1) This is the basic function that builds the horizontal and vertical disparity arrays, which allow determination of the src pixel in the input image corresponding to each dest pixel in the dewarped image. (2) Sets vsuccess = 1 if the vertical disparity array builds. Always attempts to build the horizontal disparity array, even if it will not be requested (useboth == 0). Sets hsuccess = 1 if horizontal disparity builds. (3) The method is as follows: (a) Estimate the points along the centers of all the long textlines. If there are too few lines, no disparity models are built. (b) From the vertical deviation of the lines, estimate the vertical disparity. (c) From the ends of the lines, estimate the horizontal disparity, assuming that the text is made of lines that are left and right justified. (d) One can also compute an additional contribution to the horizontal disparity, inferred from slopes of the top and bottom lines. We do not do this. (4) In more detail for the vertical disparity: (a) Fit a LS quadratic to center locations along each line. This smooths the curves. (b) Sample each curve at a regular interval, find the y-value of the mid-point on each curve, and subtract the sampled curve value from this value. This is the vertical disparity at sampled points along each curve. (c) Fit a LS quadratic to each set of vertically aligned disparity samples. This smooths the disparity values in the vertical direction. Then resample at the same regular interval. We now have a regular grid of smoothed vertical disparity valuels. (5) Once the sampled vertical disparity array is found, it can be interpolated to get a full resolution vertical disparity map. This can be applied directly to the src image pixels to dewarp the image in the vertical direction, making all textlines horizontal. Likewise, the horizontal disparity array is used to left- and right-align the longest textlines.
Input: pixs (1 bpp) pageno (page number) Return: dew (or null on error)
Notes: (1) The input pixs is either full resolution or 2x reduced. (2) The page number is typically 0-based. If scanned from a book, the even pages are usually on the left. Disparity arrays built for even pages should only be applied to even pages.
Input: pageno (this page number) refpage (page number of dewarp disparity arrays to be used) Return: dew (or null on error)
Notes: (1) This specifies which dewarp struct should be used for the given page. It is placed in dewarpa for pages for which no model can be built. (2) This page and the reference page have the same parity and the reference page is the closest page with a disparity model to this page.
Input: dew
subdirs (one or more subdirectories of /tmp; e.g., "dew1")
index (to help label output images; e.g., the page number)
Return: 0 if OK, 1 on error
Notes: (1) Prints dewarp fields and generates disparity array contour images. The contour images are written to file: /tmp/[subdirs]/pixv_[index].png
Input: &dew (<will be set to null before returning>) Return: void
Input: dew
ptaa (unsmoothed lines, not vertically ordered)
Return: 0 if OK, 1 if vertical disparity array is no built or on error
(1) This is not required for a successful model; only the vertical
disparity is required. This will not be called if the
function to build the vertical disparity fails.
(2) Debug output goes to /tmp/lept/dewmod/ for collection into a pdf.
Input: dew
ptaa (unsmoothed lines, not vertically ordered)
rotflag (0 if using dew->pixs; 1 if rotated by 90 degrees cw)
Return: 0 if OK, 1 on error
Notes: (1) This starts with points along the centers of textlines. It does quadratic fitting (and smoothing), first along the lines and then in the vertical direction, to generate the sampled vertical disparity map. This can then be interpolated to full resolution and used to remove the vertical line warping. (2) Use @rotflag == 1 if you are dewarping vertical lines, as is done in dewarpBuildLineModel(). The usual case is for @rotflag == 0. (3) The model fails to build if the vertical disparity fails. This sets the vsuccess flag to 1 on success. (4) Pix debug output goes to /tmp/dewvert/ for collection into a pdf. Non-pix debug output goes to /tmp.
Input: pixs (1 bpp)
debugflag (1 for debug output)
Return: ptaa (of center values of textlines)
Notes: (1) This in general does not have a point for each value of x, because there will be gaps between words. It doesn't matter because we will fit a quadratic to the points that we do have.
Input: dew Return: 0 if OK, 1 on error
Notes: (1) This removes all data that is not needed for serialization. It keeps the subsampled disparity array(s), so the full resolution arrays can be reconstructed.
Input: dew
pix (<optional>, to give size of actual image)
x, y (origin for generation of disparity arrays)
Return: 0 if OK, 1 on error
Notes: (1) If the full resolution vertical and horizontal disparity arrays do not exist, they are built from the subsampled ones. (2) If pixs is not given, the size of the arrays is determined by the original image from which the sampled version was generated. Any values of (x,y) are ignored. (3) If pixs is given, the full resolution disparity arrays must be large enough to accommodate it. (a) If the arrays do not exist, the value of (x,y) determines the origin of the full resolution arrays without extension, relative to pixs. Thus, (x,y) gives the amount of slope extension in (left, top). The (right, bottom) extension is then determined by the size of pixs and (x,y); the values should never be < 0. (b) If the arrays exist and pixs is too large, the existing full res arrays are destroyed and new ones are made, again using (x,y) to determine the extension in the four directions.
Input: filename Return: dew, or null on error
Input: stream Return: dew, or null on error
Notes: (1) The dewarp struct is stored in minimized format, with only subsampled disparity arrays. (2) The sampling and extra horizontal disparity parameters are stored here. During generation of the dewarp struct, they are passed in from the dewarpa. In readback, it is assumed that they are (a) the same for each page and (b) the same as the values used to create the dewarpa.
| LEPT_DLL PTAA* dewarpRemoveShortLines | ( | PIX * | pixs, |
| PTAA * | ptaas, | ||
| l_float32 | fract, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixs (1 bpp)
ptaas (input lines)
fract (minimum fraction of longest line to keep)
debugflag
Return: ptaad (containing only lines of sufficient length),
or null on error
| LEPT_DLL l_int32 dewarpShowResults | ( | L_DEWARPA * | dewa, |
| SARRAY * | sa, | ||
| BOXA * | boxa, | ||
| l_int32 | firstpage, | ||
| l_int32 | lastpage, | ||
| const char * | pdfout | ||
| ) |
Input: dewa
sarray (of indexed input images)
boxa (crop boxes for input images; can be null)
firstpage, lastpage
pdfout (filename)
Return: 0 if OK, 1 on error
Notes: (1) This generates a pdf of image pairs (before, after) for the designated set of input pages. (2) If the boxa exists, its elements are aligned with numbers in the filenames in
| LEPT_DLL l_int32 dewarpSinglePage | ( | PIX * | pixs, |
| l_int32 | thresh, | ||
| l_int32 | adaptive, | ||
| l_int32 | use_both, | ||
| PIX ** | ppixd, | ||
| L_DEWARPA ** | pdewa, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (with text, any depth)
thresh (for global thresholding to 1 bpp; ignored otherwise)
adaptive (1 for adaptive thresholding; 0 for global threshold)
use_both (1 for horizontal and vertical; 0 for vertical only)
&pixd (<return> dewarped result)
&dewa (<optional return> dewa with single page; NULL to skip)
debug (1 for debugging output, 0 otherwise)
Return: 0 if OK, 1 on error (list of page numbers), or null on error
Notes: (1) Dewarps pixs and returns the result in &pixd. (2) This uses default values for all model parameters. (3) If pixs is 1 bpp, the parameters @adaptive and @thresh are ignored. (4) If it can't build a model, returns a copy of pixs in &pixd.
| LEPT_DLL l_int32 dewarpSinglePageInit | ( | PIX * | pixs, |
| l_int32 | thresh, | ||
| l_int32 | adaptive, | ||
| l_int32 | use_both, | ||
| PIX ** | ppixb, | ||
| L_DEWARPA ** | pdewa | ||
| ) |
Input: pixs (with text, any depth)
thresh (for global thresholding to 1 bpp; ignored otherwise)
adaptive (1 for adaptive thresholding; 0 for global threshold)
use_both (1 for horizontal and vertical; 0 for vertical only)
&pixb (<return> 1 bpp image)
&dewa (<return> initialized dewa)
Return: 0 if OK, 1 on error (list of page numbers), or null on error
Notes: (1) This binarizes the input pixs if necessary, returning the binarized image. It also initializes the dewa to default values for the model parameters. (2) If pixs is 1 bpp, the parameters @adaptive and @thresh are ignored. (3) To change the model parameters, call dewarpaSetCurvatures() before running dewarpSinglePageRun(). For example: dewarpSinglePageInit(pixs, 0, 1, 1, &pixb, &dewa); dewarpaSetCurvatures(dewa, 250, -1, -1, 80, 70, 150); dewarpSinglePageRun(pixs, pixb, dewa, &pixd, 0); dewarpaDestroy(&dewa); pixDestroy(&pixb);
| LEPT_DLL l_int32 dewarpSinglePageRun | ( | PIX * | pixs, |
| PIX * | pixb, | ||
| L_DEWARPA * | dewa, | ||
| PIX ** | ppixd, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (any depth)
pixb (1 bpp)
dewa (initialized)
&pixd (<return> dewarped result)
debug (1 for debugging output, 0 otherwise)
Return: 0 if OK, 1 on error (list of page numbers), or null on error
Notes: (1) Dewarps pixs and returns the result in &pixd. (2) The model parameters must be set before calling this. (3) If a model cannot be built, this returns a copy of pixs in &pixd.
Input: filename
dew
Return: 0 if OK, 1 on error
Input: stream (opened for "wb")
dew
Return: 0 if OK, 1 on error
Notes: (1) This should not be written if there is no sampled vertical disparity array, which means that no model has been built for this page.
| LEPT_DLL PIX* displayHSVColorRange | ( | l_int32 | hval, |
| l_int32 | sval, | ||
| l_int32 | vval, | ||
| l_int32 | huehw, | ||
| l_int32 | sathw, | ||
| l_int32 | nsamp, | ||
| l_int32 | factor | ||
| ) |
Input: hval (hue center value; in range [0 ... 240]
sval (saturation center value; in range [0 ... 255]
vval (max intensity value; in range [0 ... 255]
huehw (half-width of hue range; > 0)
sathw (half-width of saturation range; > 0)
nsamp (number of samplings in each half-width in hue and sat)
factor (linear size of each color square, in pixels; > 3)
Return: pixd (32 bpp set of color squares over input range),
or null on error
Notes: (1) The total number of color samplings in each of the hue and saturation directions is 2 * nsamp + 1.
| LEPT_DLL void ditherTo2bppLineLow | ( | l_uint32 * | lined, |
| l_int32 | w, | ||
| l_uint32 * | bufs1, | ||
| l_uint32 * | bufs2, | ||
| l_int32 * | tabval, | ||
| l_int32 * | tab38, | ||
| l_int32 * | tab14, | ||
| l_int32 | lastlineflag | ||
| ) |
| LEPT_DLL void ditherTo2bppLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_uint32 * | bufs1, | ||
| l_uint32 * | bufs2, | ||
| l_int32 * | tabval, | ||
| l_int32 * | tab38, | ||
| l_int32 * | tab14 | ||
| ) |
| LEPT_DLL void ditherToBinaryLineLow | ( | l_uint32 * | lined, |
| l_int32 | w, | ||
| l_uint32 * | bufs1, | ||
| l_uint32 * | bufs2, | ||
| l_int32 | lowerclip, | ||
| l_int32 | upperclip, | ||
| l_int32 | lastlineflag | ||
| ) |
| LEPT_DLL void ditherToBinaryLineLUTLow | ( | l_uint32 * | lined, |
| l_int32 | w, | ||
| l_uint32 * | bufs1, | ||
| l_uint32 * | bufs2, | ||
| l_int32 * | tabval, | ||
| l_int32 * | tab38, | ||
| l_int32 * | tab14, | ||
| l_int32 | lastlineflag | ||
| ) |
Input: lined (ptr to beginning of dest line
w (width of image in pixels)
bufs1 (buffer of current source line)
bufs2 (buffer of next source line)
tabval (value to assign for current pixel)
tab38 (excess value to give to neighboring 3/8 pixels)
tab14 (excess value to give to neighboring 1/4 pixel)
lastlineflag (0 if not last dest line, 1 if last dest line)
Return: void
| LEPT_DLL void ditherToBinaryLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_uint32 * | bufs1, | ||
| l_uint32 * | bufs2, | ||
| l_int32 | lowerclip, | ||
| l_int32 | upperclip | ||
| ) |
| LEPT_DLL void ditherToBinaryLUTLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_uint32 * | bufs1, | ||
| l_uint32 * | bufs2, | ||
| l_int32 * | tabval, | ||
| l_int32 * | tab38, | ||
| l_int32 * | tab14 | ||
| ) |
Low-level function for doing Floyd-Steinberg error diffusion dithering from 8 bpp (datas) to 1 bpp (datad). Two source line buffers, bufs1 and bufs2, are provided, along with three 256-entry lookup tables: tabval gives the output pixel value, tab38 gives the extra (plus or minus) transferred to the pixels directly to the left and below, and tab14 gives the extra transferred to the diagonal below. The choice of 3/8 and 1/4 is traditional but arbitrary when you use a lookup table; the only constraint is that the sum is 1. See other comments below.
Input: dpix
addc (use 0.0 to skip the operation)
multc (use 1.0 to skip the operation)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place operation. (2) It can be used to multiply each pixel by a constant, and also to add a constant to each pixel. Multiplication is done first.
Input: dpix Return: same dpix (ptr), or null on error
Notes: (1) See pixClone() for definition and usage.
Input: dpix Return: fpix, or null on error
| LEPT_DLL PIX* dpixConvertToPix | ( | DPIX * | dpixs, |
| l_int32 | outdepth, | ||
| l_int32 | negvals, | ||
| l_int32 | errorflag | ||
| ) |
Input: dpixs
outdepth (0, 8, 16 or 32 bpp)
negvals (L_CLIP_TO_ZERO, L_TAKE_ABSVAL)
errorflag (1 to output error stats; 0 otherwise)
Return: pixd, or null on error
Notes: (1) Use @outdepth = 0 to programmatically determine the output depth. If no values are greater than 255, it will set outdepth = 8; otherwise to 16 or 32. (2) Because we are converting a float to an unsigned int with a specified dynamic range (8, 16 or 32 bits), errors can occur. If errorflag == TRUE, output the number of values out of range, both negative and positive. (3) If a pixel value is positive and out of range, clip to the maximum value represented at the outdepth of 8, 16 or 32 bits.
Input: dpixd (<optional>; can be null, or equal to dpixs,
or different from dpixs)
dpixs
Return: dpixd, or null on error
Notes: (1) There are three cases: (a) dpixd == null (makes a new dpix; refcount = 1) (b) dpixd == dpixs (no-op) (c) dpixd != dpixs (data copy; no change in refcount) If the refcount of dpixd > 1, case (c) will side-effect these handles. (2) The general pattern of use is: dpixd = dpixCopy(dpixd, dpixs); This will work for all three cases. For clarity when the case is known, you can use: (a) dpixd = dpixCopy(NULL, dpixs); (c) dpixCopy(dpixd, dpixs); (3) For case (c), we check if dpixs and dpixd are the same size. If so, the data is copied directly. Otherwise, the data is reallocated to the correct size and the copy proceeds. The refcount of dpixd is unchanged. (4) This operation, like all others that may involve a pre-existing dpixd, will side-effect any existing clones of dpixd.
Input: width, height
Return: dpix (with data allocated and initialized to 0),
or null on error
Notes: (1) Makes a DPix of specified size, with the data array allocated and initialized to 0.
Input: &dpix <will be nulled> Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the dpix. (2) Always nulls the input ptr.
Input: dpixd (can be equal to dpixs or NULL)
dpixs
Return: dpixd always
Notes: (1) On big-endian hardware, this does byte-swapping on each of the 4-byte words in the dpix data. On little-endians, the data is unchanged. This is used for serialization of dpix; the data is serialized in little-endian byte order because most hardware is little-endian. (2) The operation can be either in-place or, if dpixd == NULL, a new dpix is made. If not in-place, caller must catch the returned pointer.
Input: dpix
&w, &h (<optional return>; each can be null)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 dpixGetMax | ( | DPIX * | dpix, |
| l_float64 * | pmaxval, | ||
| l_int32 * | pxmaxloc, | ||
| l_int32 * | pymaxloc | ||
| ) |
Input: dpix
&maxval (<optional return> max value)
&xmaxloc (<optional return> x location of max)
&ymaxloc (<optional return> y location of max)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 dpixGetMin | ( | DPIX * | dpix, |
| l_float64 * | pminval, | ||
| l_int32 * | pxminloc, | ||
| l_int32 * | pyminloc | ||
| ) |
Input: dpix
&minval (<optional return> min value)
&xminloc (<optional return> x location of min)
&yminloc (<optional return> y location of min)
Return: 0 if OK; 1 on error
Input: dpix
(x,y) pixel coords
&val (<return> pixel value)
Return: 0 if OK; 1 on error
| LEPT_DLL DPIX* dpixLinearCombination | ( | DPIX * | dpixd, |
| DPIX * | dpixs1, | ||
| DPIX * | dpixs2, | ||
| l_float32 | a, | ||
| l_float32 | b | ||
| ) |
Input: dpixd (<optional>; this can be null, equal to dpixs1, or
different from dpixs1)
dpixs1 (can be == to dpixd)
dpixs2
a, b (multiplication factors on dpixs1 and dpixs2, rsp.)
Return: dpixd always
Notes: (1) Computes pixelwise linear combination: a * src1 + b * src2 (2) Alignment is to UL corner. (3) There are 3 cases. The result can go to a new dest, in-place to dpixs1, or to an existing input dest:
Input: filename Return: dpix, or null on error
Input: stream Return: dpix, or null on error
Input: dpixd, dpixs Return: 0 if OK, 1 on error
Input: dpixs (low resolution, subsampled)
factor (scaling factor)
Return: dpixd (interpolated result), or null on error
Notes: (1) The width wd of dpixd is related to ws of dpixs by: wd = factor * (ws - 1) + 1 (and ditto for the height) We avoid special-casing boundary pixels in the interpolation by constructing fpixd by inserting (factor - 1) interpolated pixels between each pixel in fpixs. Then wd = ws + (ws - 1) * (factor - 1) (same as above) This also has the advantage that if we subsample by @factor, throwing out all the interpolated pixels, we regain the original low resolution dpix.
Input: dpix
val (to set at each pixel)
Return: 0 if OK, 1 on error
Input: dpix
w, h
Return: 0 if OK, 1 on error
Input: dpix
(x,y) pixel coords
val (pixel value)
Return: 0 if OK; 1 on error
Input: filename
dpix
Return: 0 if OK, 1 on error
Input: stream (opened for "wb")
dpix
Return: 0 if OK, 1 on error
Input: inarray (input data)
insize (number of bytes in input array)
&outsize (<return> number of bytes in output char array)
Return: chara (with 64 characters + \n in each line)
Notes: (1) Ghostscript has a stack break if the last line of data only has a '>', so we avoid the problem by always putting '~>' on the last line.
Input: inarray (input binary data)
insize (number of bytes in input array)
&outsize (<return> number of bytes in output char array)
Return: chara (with MAX_BASE64_LINE characters + \n in each line)
Notes: (1) The input character data is unrestricted binary. The ouput encoded data consists of the 64 characters in the base64 set, plus newlines and the pad character '='.
| LEPT_DLL l_int32 extractG4DataFromFile | ( | const char * | filein, |
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pminisblack | ||
| ) |
Input: filein
&data (<return> binary data of ccitt g4 encoded stream)
&nbytes (<return> size of binary data)
&w (<return optional> image width)
&h (<return optional> image height)
&minisblack (<return optional> boolean)
Return: 0 if OK, 1 on error
Input: pixel (32 bpp RGB)
type (L_CHOOSE_MIN or L_CHOOSE_MAX)
Return: component (in range [0 ... 255], or null on error
Input: fname
numpre (number of characters before the digits to be found)
numpost (number of characters after the digits to be found)
Return: num (number embedded in the filename); -1 on error or if
not found
Notes: (1) The number is to be found in the basename, which is the filename without either the directory or the last extension. (2) When a number is found, it is non-negative. If no number is found, this returns -1, without an error message. The caller needs to check.
| LEPT_DLL void extractRGBAValues | ( | l_uint32 | pixel, |
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval, | ||
| l_int32 * | paval | ||
| ) |
Input: pixel (32 bit)
&rval (<optional return> red component)
&gval (<optional return> green component)
&bval (<optional return> blue component)
&aval (<optional return> alpha component)
Return: void
| LEPT_DLL void extractRGBValues | ( | l_uint32 | pixel, |
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: pixel (32 bit)
&rval (<optional return> red component)
&gval (<optional return> green component)
&bval (<optional return> blue component)
Return: void
Notes: (1) A slower implementation uses macros: *prval = GET_DATA_BYTE(&pixel, COLOR_RED); *pgval = GET_DATA_BYTE(&pixel, COLOR_GREEN); *pbval = GET_DATA_BYTE(&pixel, COLOR_BLUE);
Input: sela
fileindex
filename (<optional>; can be null)
Return: 0 if OK; 1 on error
Notes: (1) This function generates all the code for implementing dwa morphological operations using all the sels in the sela. (2) See fhmtautogen1() and fhmtautogen2() for details.
Input: sel array
fileindex
filename (<optional>; can be null)
Return: 0 if OK; 1 on error
Notes: (1) This function uses hmttemplate1.txt to create a top-level file that contains two functions that carry out the hit-miss transform for any of the sels in the input sela. (2) The fileindex parameter is inserted into the output filename, as described below. (3) If filename == NULL, the output file is fhmtgen.<n>.c, where <n> is equal to the 'fileindex' parameter. (4) If filename != NULL, the output file is <filename>.<n>.c. (5) Each sel must have at least one hit. A sel with only misses generates code that will abort the operation if it is called.
Input: sel array
fileindex
filename (<optional>; can be null)
Return: 0 if OK; 1 on error
Notes: (1) This function uses hmttemplate2.txt to create a low-level file that contains the low-level functions for implementing the hit-miss transform for every sel in the input sela. (2) The fileindex parameter is inserted into the output filename, as described below. (3) If filename == NULL, the output file is fhmtgenlow.<n>.c, where <n> is equal to the 'fileindex' parameter. (4) If filename != NULL, the output file is <filename>low.<n>.c.
| LEPT_DLL l_int32 fhmtgen_low_1 | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 | index | ||
| ) |
a dispatcher to appropriate low-level code
Input: filename
str (string to append to file)
Return: 0 if OK, 1 on error
Input: srcfile (file to append)
destfile (file to add to)
Return: 0 if OK, 1 on error
Input: srcfile (copy this file)
newfile (to this file)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 fileCorruptByDeletion | ( | const char * | filein, |
| l_float32 | loc, | ||
| l_float32 | size, | ||
| const char * | fileout | ||
| ) |
Input: filein
loc (fractional location of start of deletion)
size (fractional size of deletion)
fileout (corrupted file)
Return: 0 if OK, 1 on error
Notes: (1) @loc and @size are expressed as a fraction of the file size. (2) This makes a copy of the data in @filein, where bytes in the specified region have deleted. (3) If (@loc + @size) >= 1.0, this deletes from the position represented by @loc to the end of the file. (4) It is useful for testing robustness of I/O wrappers when the data is corrupted, by simulating data corruption by deletion.
| LEPT_DLL l_int32 fileCorruptByMutation | ( | const char * | filein, |
| l_float32 | loc, | ||
| l_float32 | size, | ||
| const char * | fileout | ||
| ) |
Input: filein
loc (fractional location of start of randomization)
size (fractional size of randomization)
fileout (corrupted file)
Return: 0 if OK, 1 on error
Notes: (1) @loc and @size are expressed as a fraction of the file size. (2) This makes a copy of the data in @filein, where bytes in the specified region have been replaced by random data. (3) If (@loc + @size) >= 1.0, this modifies data from the position represented by @loc to the end of the file. (4) It is useful for testing robustness of I/O wrappers when the data is corrupted, by simulating data corruption.
Input: fp (file stream) Return: 1 if file is tiff; 0 otherwise or on error
Input: fname1
fname2
&same (<return> 1 if identical; 0 if different)
Return: 0 if OK, 1 on error
Input: filename
&format (<return>)
Return: 0 if OK, 1 on error or if format is not recognized
Input: byte buffer (at least 12 bytes in size; we can't check)
&format (<return>)
Return: 0 if OK, 1 on error or if format is not recognized
Notes: (1) This determines the file format from the first 12 bytes in the compressed data stream, which are stored in memory. (2) For tiff files, this returns IFF_TIFF. The specific tiff compression is then determined using findTiffCompression().
Input: fp (file stream)
&format (<return>)
Return: 0 if OK, 1 on error or if format is not recognized
Notes: (1) Important: Side effect – this resets fp to BOF.
| LEPT_DLL l_int32 findNextBorderPixel | ( | l_int32 | w, |
| l_int32 | h, | ||
| l_uint32 * | data, | ||
| l_int32 | wpl, | ||
| l_int32 | px, | ||
| l_int32 | py, | ||
| l_int32 * | pqpos, | ||
| l_int32 * | pnpx, | ||
| l_int32 * | pnpy | ||
| ) |
Input: w, h, data, wpl
(px, py), (current P)
&qpos (input current Q; <return> new Q)
(&npx, &npy) (<return> new P)
Return: 0 if next pixel found; 1 otherwise
Notes: (1) qpos increases clockwise from 0 to 7, with 0 at location with Q to left of P: Q P (2) this is a low-level function that does not check input parameters. All calling functions should check them.
Input: start
&prime (<return> first prime larger than @start)
Return: 0 if OK, 1 on error
Input: stream (must be rewound to BOF)
&comptype (<return> compression type)
Return: 0 if OK, 1 on error
Notes: (1) The returned compression type is that defined in the enum in imageio.h. It is not the tiff flag value. (2) The compression type is initialized to IFF_UNKNOWN. If it is not one of the specified types, the returned type is IFF_TIFF, which indicates no compression. (3) When this function is called, the stream must be at BOF. If the opened stream is to be used again to read the file, it must be rewound to BOF after calling this function.
Input: sela
fileindex
filename (<optional>; can be null)
Return: 0 if OK; 1 on error
Notes: (1) This function generates all the code for implementing dwa morphological operations using all the sels in the sela. (2) See fmorphautogen1() and fmorphautogen2() for details.
Input: sela
fileindex
filename (<optional>; can be null)
Return: 0 if OK; 1 on error
Notes: (1) This function uses morphtemplate1.txt to create a top-level file that contains two functions. These functions will carry out dilation, erosion, opening or closing for any of the sels in the input sela. (2) The fileindex parameter is inserted into the output filename, as described below. (3) If filename == NULL, the output file is fmorphgen.<n>.c, where <n> is equal to the 'fileindex' parameter. (4) If filename != NULL, the output file is <filename>.<n>.c.
| LEPT_DLL l_int32 fmorphopgen_low_1 | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 | index | ||
| ) |
a dispatcher to appropriate low-level code
| LEPT_DLL l_int32 fmorphopgen_low_2 | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 | index | ||
| ) |
a dispatcher to appropriate low-level code
| LEPT_DLL size_t fnbytesInFile | ( | FILE * | fp | ) |
Input: file stream Return: nbytes in file; 0 on error
| LEPT_DLL FILE* fopenReadStream | ( | const char * | filename | ) |
Input: filename Return: stream, or null on error
Notes: (1) This should be used whenever you want to run fopen() to read from a stream. Never call fopen() directory. (2) This also handles pathname conversions, if necessary: ==> /tmp (unix) [default] ==> /tmp/leptonica (unix) [if ADD_LEPTONICA_SUBDIR == 1] ==> <Temp>/leptonica (windows)
| LEPT_DLL FILE* fopenWriteStream | ( | const char * | filename, |
| const char * | modestring | ||
| ) |
Input: filename
modestring
Return: stream, or null on error
Notes: (1) This should be used whenever you want to run fopen() to write or append to a stream. Never call fopen() directory. (2) This also handles pathname conversions, if necessary: ==> /tmp (unix) [default] ==> /tmp/leptonica (unix) [if ADD_LEPTONICA_SUBDIR == 1] ==> <Temp>/leptonica (windows)
Input: fpixa
fpix (to be added)
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK; 1 on error
Input: fpixa Return: 0 if OK, 1 on error
Input: fpixa (three fpix: l,a,b) Return: pixd (rgb)
Notes: (1) The lab image is stored in three fpix.
Input: fpixa (lab) Return: fpixa (xyz)
Notes: (1) The input [l,a,b] and output [x,y,z] values are stored as float values, each set in three fpix.
Input: fpixa (xyz) Return: fpixa (lab)
Notes: (1) The input [x,y,z] and output [l,a,b] values are stored as float values, each set in three fpix. (2) The CIE LAB color space was invented in 1976, as an absolute reference for specifying colors that we can perceive, independently of the rendering device. It was invented to align color display and print images. For information, see: http://www.brucelindbloom.com/ http://en.wikipedia.org/wiki/Lab_color_space
Input: fpixa (three fpix: x,y,z) Return: pixd (rgb)
Notes: (1) The xyz image is stored in three fpix. (2) For values of xyz that are out of gamut for rgb, the rgb components are set to the closest valid color.
Input: fpixas
copyflag:
L_COPY makes a new fpixa and copies each fpix
L_CLONE gives a new ref-counted handle to the input fpixa
L_COPY_CLONE makes a new fpixa with clones of all fpix
Return: new fpixa, or null on error
Input: n (initial number of ptrs) Return: fpixa, or null on error
| LEPT_DLL FPIX* fpixAddBorder | ( | FPIX * | fpixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: fpixs
left, right, top, bot (pixels on each side to be added)
Return: fpixd, or null on error
Notes: (1) Adds border of '0' 32-bit pixels
| LEPT_DLL FPIX* fpixAddContinuedBorder | ( | FPIX * | fpixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: fpixs
left, right, top, bot (pixels on each side to be added)
Return: fpixd, or null on error
Notes: (1) This adds pixels on each side whose values are equal to the value on the closest boundary pixel.
| LEPT_DLL FPIX* fpixAddMirroredBorder | ( | FPIX * | fpixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: fpixs
left, right, top, bot (pixels on each side to be added)
Return: fpixd, or null on error
Notes: (1) See pixAddMirroredBorder() for situations of usage.
Input: fpix
addc (use 0.0 to skip the operation)
multc (use 1.0 to skip the operation)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place operation. (2) It can be used to multiply each pixel by a constant, and also to add a constant to each pixel. Multiplication is done first.
| LEPT_DLL FPIX* fpixAddSlopeBorder | ( | FPIX * | fpixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: fpixs
left, right, top, bot (pixels on each side to be added)
Return: fpixd, or null on error
Notes: (1) This adds pixels on each side whose values have a normal derivative equal to the normal derivative at the boundary of fpixs.
Input: &fpixa (<can be nulled>) Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the fpixa. (2) Always nulls the input ptr.
Input: fpixa (mean, variance or root variance)
factor (replication factor at lowest level)
fontdir (directory for text fonts; e.g., ./fonts)
Return: pixd (8 bpp, mosaic of quadtree images), or null on error
Notes: (1) The mean and root variance fall naturally in the 8 bpp range, but the variance is typically outside the range. This function displays 8 bpp pix clipped to 255, so the image pixels will mostly be 255 (white).
Input: fpixs (8 bpp)
vc (vector of 8 coefficients for projective transformation)
inval (value brought in; typ. 0)
Return: fpixd, or null on error
| LEPT_DLL FPIX* fpixAffinePta | ( | FPIX * | fpixs, |
| PTA * | ptad, | ||
| PTA * | ptas, | ||
| l_int32 | border, | ||
| l_float32 | inval | ||
| ) |
Input: fpixs (8 bpp)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
border (size of extension with constant normal derivative)
inval (value brought in; typ. 0)
Return: fpixd, or null on error
Notes: (1) If @border > 0, all four sides are extended by that distance, and removed after the transformation is finished. Pixels that would be brought in to the trimmed result from outside the extended region are assigned @inval. The purpose of extending the image is to avoid such assignments. (2) On the other hand, you may want to give all pixels that are brought in from outside fpixs a specific value. In that case, set @border == 0.
Input: fpixa Return: count, or 0 if no pixa
Input: fpixa
index (into fpixa array)
Return: data (not a copy), or null on error
Input: fpixa
index (to the index-th fpix)
accesstype (L_COPY or L_CLONE)
Return: fpix, or null on error
| LEPT_DLL l_int32 fpixaGetFPixDimensions | ( | FPIXA * | fpixa, |
| l_int32 | index, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph | ||
| ) |
Input: fpixa
index (to the index-th box)
&w, &h (<optional return>; each can be null)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 fpixaGetPixel | ( | FPIXA * | fpixa, |
| l_int32 | index, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 * | pval | ||
| ) |
Input: fpixa
index (into fpixa array)
(x,y) pixel coords
&val (<return> pixel value)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 fpixaSetPixel | ( | FPIXA * | fpixa, |
| l_int32 | index, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | val | ||
| ) |
Input: fpixa
index (into fpixa array)
(x,y) pixel coords
val (pixel value)
Return: 0 if OK; 1 on error
Input: fpix
ncontours (> 1, < 500, typ. about 50)
Return: pixd (8 bpp), or null on error
Notes: (1) The increment is set to get approximately @ncontours. (2) The proximity to the target value for contour display is set to 0.15. (3) Negative values are rendered in red; positive values as black.
Input: fpix Return: same fpix (ptr), or null on error
Notes: (1) See pixClone() for definition and usage.
Input: fpix Return: dpix, or null on error
| LEPT_DLL PIX* fpixConvertToPix | ( | FPIX * | fpixs, |
| l_int32 | outdepth, | ||
| l_int32 | negvals, | ||
| l_int32 | errorflag | ||
| ) |
Input: fpixs
outdepth (0, 8, 16 or 32 bpp)
negvals (L_CLIP_TO_ZERO, L_TAKE_ABSVAL)
errorflag (1 to output error stats; 0 otherwise)
Return: pixd, or null on error
Notes: (1) Use @outdepth = 0 to programmatically determine the output depth. If no values are greater than 255, it will set outdepth = 8; otherwise to 16 or 32. (2) Because we are converting a float to an unsigned int with a specified dynamic range (8, 16 or 32 bits), errors can occur. If errorflag == TRUE, output the number of values out of range, both negative and positive. (3) If a pixel value is positive and out of range, clip to the maximum value represented at the outdepth of 8, 16 or 32 bits.
Input: fpixs (32 bit float array)
kernel
normflag (1 to normalize kernel to unit sum; 0 otherwise)
Return: fpixd (32 bit float array)
Notes: (1) This gives a float convolution with an arbitrary kernel. (2) If normflag == 1, the result is normalized by scaling all kernel values for a unit sum. If the sum of kernel values is very close to zero, the kernel can not be normalized and the convolution will not be performed. A warning is issued. (3) With the FPix, there are no issues about negative array or kernel values. The convolution is performed with single precision arithmetic. (4) To get a subsampled output, call l_setConvolveSampling(). The time to make a subsampled output is reduced by the product of the sampling factors. (5) This uses a mirrored border to avoid special casing on the boundaries.
Input: fpixs (32 bit float array)
kelx (x-dependent kernel)
kely (y-dependent kernel)
normflag (1 to normalize kernel to unit sum; 0 otherwise)
Return: fpixd (32 bit float array)
Notes: (1) This does a convolution with a separable kernel that is is a sequence of convolutions in x and y. The two one-dimensional kernel components must be input separately; the full kernel is the product of these components. The support for the full kernel is thus a rectangular region. (2) The normflag parameter is used as in fpixConvolve(). (3) Warning: if you use l_setConvolveSampling() to get a subsampled output, and the sampling factor is larger than the kernel half-width, it is faster to use the non-separable version pixConvolve(). This is because the first convolution here must be done on every raster line, regardless of the vertical sampling factor. If the sampling factor is smaller than kernel half-width, it's faster to use the separable convolution. (4) This uses mirrored borders to avoid special casing on the boundaries.
Input: fpixd (<optional>; can be null, or equal to fpixs,
or different from fpixs)
fpixs
Return: fpixd, or null on error
Notes: (1) There are three cases: (a) fpixd == null (makes a new fpix; refcount = 1) (b) fpixd == fpixs (no-op) (c) fpixd != fpixs (data copy; no change in refcount) If the refcount of fpixd > 1, case (c) will side-effect these handles. (2) The general pattern of use is: fpixd = fpixCopy(fpixd, fpixs); This will work for all three cases. For clarity when the case is known, you can use: (a) fpixd = fpixCopy(NULL, fpixs); (c) fpixCopy(fpixd, fpixs); (3) For case (c), we check if fpixs and fpixd are the same size. If so, the data is copied directly. Otherwise, the data is reallocated to the correct size and the copy proceeds. The refcount of fpixd is unchanged. (4) This operation, like all others that may involve a pre-existing fpixd, will side-effect any existing clones of fpixd.
Input: width, height
Return: fpixd (with data allocated and initialized to 0),
or null on error
Notes: (1) Makes a FPix of specified size, with the data array allocated and initialized to 0.
Input: &fpix <will be nulled> Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the fpix. (2) Always nulls the input ptr.
Input: fpixs Return: pixd (8 bpp), or null on error
Input: fpixd (can be equal to fpixs or NULL)
fpixs
Return: fpixd always
Notes: (1) On big-endian hardware, this does byte-swapping on each of the 4-byte floats in the fpix data. On little-endians, the data is unchanged. This is used for serialization of fpix; the data is serialized in little-endian byte order because most hardware is little-endian. (2) The operation can be either in-place or, if fpixd == NULL, a new fpix is made. If not in-place, caller must catch the returned pointer.
Input: fpixd (<optional>; can be null, equal to fpixs,
or different from fpixs)
fpixs
Return: fpixd, or null on error
Notes: (1) This does a left-right flip of the image, which is equivalent to a rotation out of the plane about a vertical line through the image center. (2) There are 3 cases for input: (a) fpixd == null (creates a new fpixd) (b) fpixd == fpixs (in-place operation) (c) fpixd != fpixs (existing fpixd) (3) For clarity, use these three patterns, respectively: (a) fpixd = fpixFlipLR(NULL, fpixs); (b) fpixFlipLR(fpixs, fpixs); (c) fpixFlipLR(fpixd, fpixs); (4) If an existing fpixd is not the same size as fpixs, the image data will be reallocated.
Input: fpixd (<optional>; can be null, equal to fpixs,
or different from fpixs)
fpixs
Return: fpixd, or null on error
Notes: (1) This does a top-bottom flip of the image, which is equivalent to a rotation out of the plane about a horizontal line through the image center. (2) There are 3 cases for input: (a) fpixd == null (creates a new fpixd) (b) fpixd == fpixs (in-place operation) (c) fpixd != fpixs (existing fpixd) (3) For clarity, use these three patterns, respectively: (a) fpixd = fpixFlipTB(NULL, fpixs); (b) fpixFlipTB(fpixs, fpixs); (c) fpixFlipTB(fpixd, fpixs); (4) If an existing fpixd is not the same size as fpixs, the image data will be reallocated.
Input: fpix
&w, &h (<optional return>; each can be null)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 fpixGetMax | ( | FPIX * | fpix, |
| l_float32 * | pmaxval, | ||
| l_int32 * | pxmaxloc, | ||
| l_int32 * | pymaxloc | ||
| ) |
Input: fpix
&maxval (<optional return> max value)
&xmaxloc (<optional return> x location of max)
&ymaxloc (<optional return> y location of max)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 fpixGetMin | ( | FPIX * | fpix, |
| l_float32 * | pminval, | ||
| l_int32 * | pxminloc, | ||
| l_int32 * | pyminloc | ||
| ) |
Input: fpix
&minval (<optional return> min value)
&xminloc (<optional return> x location of min)
&yminloc (<optional return> y location of min)
Return: 0 if OK; 1 on error
Input: fpix
(x,y) pixel coords
&val (<return> pixel value)
Return: 0 if OK; 1 on error
| LEPT_DLL FPIX* fpixLinearCombination | ( | FPIX * | fpixd, |
| FPIX * | fpixs1, | ||
| FPIX * | fpixs2, | ||
| l_float32 | a, | ||
| l_float32 | b | ||
| ) |
Input: fpixd (<optional>; this can be null, equal to fpixs1, or
different from fpixs1)
fpixs1 (can be == to fpixd)
fpixs2
a, b (multiplication factors on fpixs1 and fpixs2, rsp.)
Return: fpixd always
Notes: (1) Computes pixelwise linear combination: a * src1 + b * src2 (2) Alignment is to UL corner. (3) There are 3 cases. The result can go to a new dest, in-place to fpixs1, or to an existing input dest:
Input: stream
fpix
factor (subsampled)
Return: 0 if OK, 1 on error
Notes: (1) Subsampled printout of fpix for debugging.
Input: fpixs (8 bpp)
vc (vector of 8 coefficients for projective transformation)
inval (value brought in; typ. 0)
Return: fpixd, or null on error
| LEPT_DLL FPIX* fpixProjectivePta | ( | FPIX * | fpixs, |
| PTA * | ptad, | ||
| PTA * | ptas, | ||
| l_int32 | border, | ||
| l_float32 | inval | ||
| ) |
Input: fpixs (8 bpp)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
border (size of extension with constant normal derivative)
inval (value brought in; typ. 0)
Return: fpixd, or null on error
Notes: (1) If @border > 0, all four sides are extended by that distance, and removed after the transformation is finished. Pixels that would be brought in to the trimmed result from outside the extended region are assigned @inval. The purpose of extending the image is to avoid such assignments. (2) On the other hand, you may want to give all pixels that are brought in from outside fpixs a specific value. In that case, set @border == 0.
| LEPT_DLL l_int32 fpixRasterop | ( | FPIX * | fpixd, |
| l_int32 | dx, | ||
| l_int32 | dy, | ||
| l_int32 | dw, | ||
| l_int32 | dh, | ||
| FPIX * | fpixs, | ||
| l_int32 | sx, | ||
| l_int32 | sy | ||
| ) |
Input: fpixd (dest fpix)
dx (x val of UL corner of dest rectangle)
dy (y val of UL corner of dest rectangle)
dw (width of dest rectangle)
dh (height of dest rectangle)
fpixs (src fpix)
sx (x val of UL corner of src rectangle)
sy (y val of UL corner of src rectangle)
Return: 0 if OK; 1 on error.
Notes: (1) This is similar in structure to pixRasterop(), except it only allows copying from the source into the destination. For that reason, no op code is necessary. Additionally, all pixels are 32 bit words (float values), which makes the copy very simple. (2) Clipping of both src and dest fpix are done automatically. (3) This allows in-place copying, without checking to see if the result is valid: use for in-place with caution!
Input: filename Return: fpix, or null on error
Input: stream Return: fpix, or null on error
| LEPT_DLL FPIX* fpixRemoveBorder | ( | FPIX * | fpixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: fpixs
left, right, top, bot (pixels on each side to be removed)
Return: fpixd, or null on error
Input: fpixs
incr (increment between contours; must be > 0.0)
proxim (required proximity to target value; default 0.15)
Return: pixd (8 bpp), or null on error
Notes: (1) Values are displayed when val/incr is within +-proxim to an integer. The default value is 0.15; smaller values result in thinner contour lines. (2) Negative values are rendered in red; positive values as black.
Input: fpixd, fpixs Return: 0 if OK, 1 on error
Notes: (1) If the data sizes differ, this destroys the existing data in fpixd and allocates a new, uninitialized, data array of the same size as the data in fpixs. Otherwise, this doesn't do anything.
Input: fpixd (<optional>; can be null, equal to fpixs,
or different from fpixs)
fpixs
Return: fpixd, or null on error
Notes: (1) This does a 180 rotation of the image about the center, which is equivalent to a left-right flip about a vertical line through the image center, followed by a top-bottom flip about a horizontal line through the image center. (2) There are 3 cases for input: (a) fpixd == null (creates a new fpixd) (b) fpixd == fpixs (in-place operation) (c) fpixd != fpixs (existing fpixd) (3) For clarity, use these three patterns, respectively: (a) fpixd = fpixRotate180(NULL, fpixs); (b) fpixRotate180(fpixs, fpixs); (c) fpixRotate180(fpixd, fpixs);
Input: fpixs
direction (1 = clockwise, -1 = counter-clockwise)
Return: fpixd, or null on error
Notes: (1) This does a 90 degree rotation of the image about the center, either cw or ccw, returning a new pix. (2) The direction must be either 1 (cw) or -1 (ccw).
Input: fpixs
quads (0-3; number of 90 degree cw rotations)
Return: fpixd, or null on error
Input: fpixs (low resolution, subsampled)
factor (scaling factor)
Return: fpixd (interpolated result), or null on error
Notes: (1) The width wd of fpixd is related to ws of fpixs by: wd = factor * (ws - 1) + 1 (and ditto for the height) We avoid special-casing boundary pixels in the interpolation by constructing fpixd by inserting (factor - 1) interpolated pixels between each pixel in fpixs. Then wd = ws + (ws - 1) * (factor - 1) (same as above) This also has the advantage that if we subsample by @factor, throwing out all the interpolated pixels, we regain the original low resolution fpix.
Input: fpix
val (to set at each pixel)
Return: 0 if OK, 1 on error
Input: fpix
w, h
Return: 0 if OK, 1 on error
Input: fpix
(x,y) pixel coords
val (pixel value)
Return: 0 if OK; 1 on error
Input: fpix
thresh
Return: pixd (1 bpp), or null on error
Notes: (1) For all values of fpix that are <= thresh, sets the pixel in pixd to 1.
Input: filename
fpix
Return: 0 if OK, 1 on error
Input: stream (opened for "wb")
fpix
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 freadHeaderJp2k | ( | FILE * | fp, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp | ||
| ) |
Input: stream opened for read
&w (<optional return>)
&h (<optional return>)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 freadHeaderJpeg | ( | FILE * | fp, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pspp, | ||
| l_int32 * | pycck, | ||
| l_int32 * | pcmyk | ||
| ) |
Input: stream
&w (<optional return>)
&h (<optional return>)
&spp (<optional return>, samples/pixel)
&ycck (<optional return>, 1 if ycck color space; 0 otherwise)
&cmyk (<optional return>, 1 if cmyk color space; 0 otherwise)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 freadHeaderPng | ( | FILE * | fp, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | piscmap | ||
| ) |
Input: stream
&w (<optional return>)
&h (<optional return>)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
&iscmap (<optional return>)
Return: 0 if OK, 1 on error
Notes: (1) See readHeaderPng(). We only need the first 40 bytes in the file.
| LEPT_DLL l_int32 freadHeaderPnm | ( | FILE * | fp, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pd, | ||
| l_int32 * | ptype, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp | ||
| ) |
Input: stream opened for read
&w (<optional return>)
&h (<optional return>)
&d (<optional return>)
&type (<optional return> pnm type)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 freadHeaderSpix | ( | FILE * | fp, |
| l_int32 * | pwidth, | ||
| l_int32 * | pheight, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | piscmap | ||
| ) |
Input: stream
&width (<return>)
&height (<return>)
&bps (<return>, bits/sample)
&spp (<return>, samples/pixel)
&iscmap (<optional return>; input NULL to ignore)
Return: 0 if OK, 1 on error
Notes: (1) If there is a colormap, iscmap is returned as 1; else 0.
| LEPT_DLL l_int32 freadHeaderTiff | ( | FILE * | fp, |
| l_int32 | n, | ||
| l_int32 * | pwidth, | ||
| l_int32 * | pheight, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | pres, | ||
| l_int32 * | pcmap, | ||
| l_int32 * | pformat | ||
| ) |
Input: stream
n (page image number: 0-based)
&width (<return>)
&height (<return>)
&bps (<return> bits per sample -- 1, 2, 4 or 8)
&spp (<return>; samples per pixel -- 1 or 3)
&res (<optional return>; resolution in x dir; NULL to ignore)
&cmap (<optional return>; colormap exists; input NULL to ignore)
&format (<optional return>; tiff format; input NULL to ignore)
Return: 0 if OK, 1 on error
Notes: (1) If there is a colormap, cmap is returned as 1; else 0. (2) If
is equal to or greater than the number of images, returns 1.
Return: gaussian distributed variable with zero mean and unit stdev
Notes: (1) For an explanation of the Box-Muller method for generating a normally distributed random variable with zero mean and unit standard deviation, see Numerical Recipes in C, 2nd edition, p. 288ff. (2) This can be called sequentially to get samples that can be used for adding noise to each pixel of an image, for example.
Input: a (n x n matrix)
b (n x 1 right-hand side column vector)
n (dimension)
Return: 0 if ok, 1 on error
Notes: (1) There are two side-effects:
| LEPT_DLL NUMA* genConstrainedNumaInRange | ( | l_int32 | first, |
| l_int32 | last, | ||
| l_int32 | nmax, | ||
| l_int32 | use_pairs | ||
| ) |
Input: first (first number to choose; >= 0)
last (biggest possible number to reach; >= first)
nmax (maximum number of numbers to select; > 0)
use_pairs (1 = select pairs of adjacent numbers;
0 = select individual numbers)
Return: 0 if OK, 1 on error
Note: (1) Selection is made uniformly in the range. This can be used to select pages distributed as uniformly as possible through a book, where you are constrained to:
| LEPT_DLL PIX* generateBinaryMaze | ( | l_int32 | w, |
| l_int32 | h, | ||
| l_int32 | xi, | ||
| l_int32 | yi, | ||
| l_float32 | wallps, | ||
| l_float32 | ranis | ||
| ) |
Input: w, h (size of maze)
xi, yi (initial location)
wallps (probability that a pixel to the side is ON)
ranis (ratio of prob that pixel in forward direction
is a wall to the probability that pixel in
side directions is a wall)
Return: pix, or null on error
Notes: (1) We have two input probability factors that determine the density of walls and average length of straight passages. When ranis < 1.0, you are more likely to generate a wall to the side than going forward. Enter 0.0 for either if you want to use the default values. (2) This is a type of percolation problem, and exhibits different phases for different parameters wallps and ranis. For larger values of these parameters, regions in the maze are not explored because the maze generator walls them off and cannot get through. The boundary between the two phases in this two-dimensional parameter space goes near these values: wallps ranis 0.35 1.00 0.40 0.85 0.45 0.70 0.50 0.50 0.55 0.40 0.60 0.30 0.65 0.25 0.70 0.19 0.75 0.15 0.80 0.11 (3) Because there is a considerable amount of overhead in calling pixGetPixel() and pixSetPixel(), this function can be sped up with little effort using raster line pointers and the GET_DATA* and SET_DATA* macros.
| LEPT_DLL char* generateFlatePS | ( | const char * | filein, |
| L_COMP_DATA * | cid, | ||
| l_float32 | xpt, | ||
| l_float32 | ypt, | ||
| l_float32 | wpt, | ||
| l_float32 | hpt, | ||
| l_int32 | pageno, | ||
| l_int32 | endpage | ||
| ) |
Input: filein (<optional> input filename; can be null)
cid (flate compressed image data)
xpt, ypt (location of LL corner of image, in pts, relative
to the PostScript origin (0,0) at the LL corner
of the page)
wpt, hpt (rendered image size in pts)
pageno (page number; must start with 1; you can use 0
if there is only one page)
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: PS string, or null on error
| LEPT_DLL char* generateG4PS | ( | const char * | filein, |
| L_COMP_DATA * | cid, | ||
| l_float32 | xpt, | ||
| l_float32 | ypt, | ||
| l_float32 | wpt, | ||
| l_float32 | hpt, | ||
| l_int32 | maskflag, | ||
| l_int32 | pageno, | ||
| l_int32 | endpage | ||
| ) |
Input: filein (<optional> input tiff g4 file; can be null)
cid (g4 compressed image data)
xpt, ypt (location of LL corner of image, in pts, relative
to the PostScript origin (0,0) at the LL corner
of the page)
wpt, hpt (rendered image size in pts)
maskflag (boolean: use TRUE if just painting through fg;
FALSE if painting both fg and bg.
pageno (page number; must start with 1; you can use 0
if there is only one page.)
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: PS string, or null on error
Notes: (1) Low-level function.
| LEPT_DLL char* generateJpegPS | ( | const char * | filein, |
| L_COMP_DATA * | cid, | ||
| l_float32 | xpt, | ||
| l_float32 | ypt, | ||
| l_float32 | wpt, | ||
| l_float32 | hpt, | ||
| l_int32 | pageno, | ||
| l_int32 | endpage | ||
| ) |
Input: filein (<optional> input jpeg filename; can be null)
cid (jpeg compressed image data)
xpt, ypt (location of LL corner of image, in pts, relative
to the PostScript origin (0,0) at the LL corner
of the page)
wpt, hpt (rendered image size in pts)
pageno (page number; must start with 1; you can use 0
if there is only one page.)
endpage (boolean: use TRUE if this is the last image to be
added to the page; FALSE otherwise)
Return: PS string, or null on error
Notes: (1) Low-level function.
Input: boxa Return: ptaa, or null on error
Notes: (1) This generates a pta of the four corners for each box in the boxa. (2) Each of these pta can be rendered onto a pix with random colors, by using pixRenderRandomCmapPtaa() with closeflag = 1.
| LEPT_DLL PTAA* generatePtaaHashBoxa | ( | BOXA * | boxa, |
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline | ||
| ) |
Input: boxa
spacing (spacing between hash lines; must be > 1)
width (hash line width)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
Return: ptaa, or null on error
Notes: (1) The orientation takes on one of 4 orientations (horiz, vertical, slope +1, slope -1). (2) The full outline is also drawn if @outline = 1. (3) Each of these pta can be rendered onto a pix with random colors, by using pixRenderRandomCmapPtaa() with closeflag = 1.
Input: box
width (of line)
Return: ptad, or null on error
Notes: (1) Because the box is constructed so that we don't have any overlapping lines, there is no need to remove duplicates.
Input: boxa
width
removedups (1 to remove, 0 to leave)
Return: ptad, or null on error
Notes: (1) If the boxa has overlapping boxes, and if blending will be used to give a transparent effect, transparency artifacts at line intersections can be removed using removedups = 1.
Input: radius Return: pta, or null on error
Notes: (1) The circle is has diameter = 2 * radius + 1. (2) It is located with the center of the circle at the point (radius, radius). (3) Consequently, it typically must be translated if it is to represent a set of pixels in an image.
Input: side Return: pta, or null on error
Notes: (1) The center of the square can be chosen to be at (side / 2, side / 2). It must be translated by this amount when used for replication.
Input: w, h (of region where grid will be displayed)
nx, ny (number of rectangles in each direction in grid)
width (of rendered lines)
Return: ptad, or null on error
| LEPT_DLL PTA* generatePtaHashBox | ( | BOX * | box, |
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline | ||
| ) |
Input: box
spacing (spacing between lines; must be > 1)
width (of line)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
Return: ptad, or null on error
Notes: (1) The orientation takes on one of 4 orientations (horiz, vertical, slope +1, slope -1). (2) The full outline is also drawn if @outline = 1.
| LEPT_DLL PTA* generatePtaHashBoxa | ( | BOXA * | boxa, |
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline, | ||
| l_int32 | removedups | ||
| ) |
Input: boxa
spacing (spacing between lines; must be > 1)
width (of line)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
removedups (1 to remove, 0 to leave)
Return: ptad, or null on error
Notes: (1) The orientation takes on one of 4 orientations (horiz, vertical, slope +1, slope -1). (2) The full outline is also drawn if @outline = 1. (3) If the boxa has overlapping boxes, and if blending will be used to give a transparent effect, transparency artifacts at line intersections can be removed using removedups = 1.
Input: x1, y1 (end point 1)
x2, y2 (end point 2)
Return: pta, or null on error
Notes: (1) Uses Bresenham line drawing, which results in an 8-connected line.
Input: x, y (point of origination)
length (of line, including starting point)
radang (angle in radians, CW from horizontal)
Return: pta, or null on error
Notes: (1) The @length of the line is 1 greater than the distance used in locatePtRadially(). Example: a distance of 1 gives rise to a length of 2.
| LEPT_DLL PTA* generatePtaPolyline | ( | PTA * | ptas, |
| l_int32 | width, | ||
| l_int32 | closeflag, | ||
| l_int32 | removedups | ||
| ) |
Input: pta (vertices of polyline)
width
closeflag (1 to close the contour; 0 otherwise)
removedups (1 to remove, 0 to leave)
Return: ptad, or null on error
Input: x1, y1 (end point 1)
x2, y2 (end point 2)
width
Return: ptaj, or null on error
| LEPT_DLL char* generateUncompressedPS | ( | char * | hexdata, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | d, | ||
| l_int32 | psbpl, | ||
| l_int32 | bps, | ||
| l_float32 | xpt, | ||
| l_float32 | ypt, | ||
| l_float32 | wpt, | ||
| l_float32 | hpt, | ||
| l_int32 | boxflag | ||
| ) |
Input: hexdata
w, h (raster image size in pixels)
d (image depth in bpp; rgb is 32)
psbpl (raster bytes/line, when packed to the byte boundary)
bps (bits/sample: either 1 or 8)
xpt, ypt (location of LL corner of image, in pts, relative
to the PostScript origin (0,0) at the LL corner
of the page)
wpt, hpt (rendered image size in pts)
boxflag (1 to print out bounding box hint; 0 to skip)
Return: PS string, or null on error
Notes: (1) Low-level function.
| LEPT_DLL char* genPathname | ( | const char * | dir, |
| const char * | fname | ||
| ) |
Input: range (size of range; must be >= 2)
seed (use 0 to skip; otherwise call srand)
val (<return> random integer in range {0 ... range-1}
Return: 0 if OK, 1 on error
Notes: (1) For example, to choose a rand integer between 0 and 99, use @range = 100.
| LEPT_DLL char* genTempFilename | ( | const char * | dir, |
| const char * | tail, | ||
| l_int32 | usetime, | ||
| l_int32 | usepid | ||
| ) |
Input: ptas (source 3 points; unprimed)
ptad (transformed 3 points; primed)
&vc (<return> vector of coefficients of transform)
Return: 0 if OK; 1 on error
We have a set of six equations, describing the affine transformation that takes 3 points (ptas) into 3 other points (ptad). These equations are:
x1' = c[0]*x1 + c[1]*y1 + c[2]
y1' = c[3]*x1 + c[4]*y1 + c[5]
x2' = c[0]*x2 + c[1]*y2 + c[2]
y2' = c[3]*x2 + c[4]*y2 + c[5]
x3' = c[0]*x3 + c[1]*y3 + c[2]
y3' = c[3]*x3 + c[4]*y3 + c[5]
This can be represented as
AC = B
where B and C are column vectors
B = [ x1' y1' x2' y2' x3' y3' ]
C = [ c[0] c[1] c[2] c[3] c[4] c[5] c[6] ]
and A is the 6x6 matrix
x1 y1 1 0 0 0
0 0 0 x1 y1 1
x2 y2 1 0 0 0
0 0 0 x2 y2 1
x3 y3 1 0 0 0
0 0 0 x3 y3 1
These six equations are solved here for the coefficients C.
These six coefficients can then be used to find the dest point (x',y') corresponding to any src point (x,y), according to the equations
x' = c[0]x + c[1]y + c[2]
y' = c[3]x + c[4]y + c[5]
that are implemented in affineXformPt().
!!!!!!!!!!!!!!!!!! Very important !!!!!!!!!!!!!!!!!!!!!!
When the affine transform is composed from a set of simple operations such as translation, scaling and rotation, it is built in a form to convert from the un-transformed src point to the transformed dest point. However, when an affine transform is used on images, it is used in an inverted way: it converts from the transformed dest point to the un-transformed src point. So, for example, if you transform a boxa using transform A, to transform an image in the same way you must use the inverse of A.
For example, if you transform a boxa with a 3x3 affine matrix 'mat', the analogous image transformation must use 'matinv':
boxad = boxaAffineTransform(boxas, mat); affineInvertXform(mat, &matinv); pixd = pixAffine(pixs, matinv, L_BRING_IN_WHITE);
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Input: ptas (source 4 points; unprimed)
ptad (transformed 4 points; primed)
&vc (<return> vector of coefficients of transform)
Return: 0 if OK; 1 on error
We have a set of 8 equations, describing the bilinear transformation that takes 4 points (ptas) into 4 other points (ptad). These equations are:
x1' = c[0]*x1 + c[1]*y1 + c[2]*x1*y1 + c[3]
y1' = c[4]*x1 + c[5]*y1 + c[6]*x1*y1 + c[7]
x2' = c[0]*x2 + c[1]*y2 + c[2]*x2*y2 + c[3]
y2' = c[4]*x2 + c[5]*y2 + c[6]*x2*y2 + c[7]
x3' = c[0]*x3 + c[1]*y3 + c[2]*x3*y3 + c[3]
y3' = c[4]*x3 + c[5]*y3 + c[6]*x3*y3 + c[7]
x4' = c[0]*x4 + c[1]*y4 + c[2]*x4*y4 + c[3]
y4' = c[4]*x4 + c[5]*y4 + c[6]*x4*y4 + c[7]
This can be represented as
AC = B
where B and C are column vectors
B = [ x1' y1' x2' y2' x3' y3' x4' y4' ] C = [ c[0] c[1] c[2] c[3] c[4] c[5] c[6] c[7] ]
and A is the 8x8 matrix
x1 y1 x1*y1 1 0 0 0 0
0 0 0 0 x1 y1 x1*y1 1
x2 y2 x2*y2 1 0 0 0 0
0 0 0 0 x2 y2 x2*y2 1
x3 y3 x3*y3 1 0 0 0 0
0 0 0 0 x3 y3 x3*y3 1
x4 y4 x4*y4 1 0 0 0 0
0 0 0 0 x4 y4 x4*y4 1
These eight equations are solved here for the coefficients C.
These eight coefficients can then be used to find the mapping (x,y) --> (x',y'):
x' = c[0]x + c[1]y + c[2]xy + c[3]
y' = c[4]x + c[5]y + c[6]xy + c[7]
that are implemented in bilinearXformSampledPt() and bilinearXFormPt().
| LEPT_DLL l_int32 getCompositeParameters | ( | l_int32 | size, |
| l_int32 * | psize1, | ||
| l_int32 * | psize2, | ||
| char ** | pnameh1, | ||
| char ** | pnameh2, | ||
| char ** | pnamev1, | ||
| char ** | pnamev2 | ||
| ) |
Input: size
&size1 (<optional return> brick factor size)
&size2 (<optional return> comb factor size)
&nameh1 (<optional return> name of horiz brick)
&nameh2 (<optional return> name of horiz comb)
&namev1 (<optional return> name of vert brick)
&namev2 (<optional return> name of vert comb)
Return: 0 if OK, 1 on error
Notes: (1) This uses the big lookup table at the top of this file. (2) All returned strings are copies that must be freed.
| LEPT_DLL PTA* getCutPathForHole | ( | PIX * | pix, |
| PTA * | pta, | ||
| BOX * | boxinner, | ||
| l_int32 * | pdir, | ||
| l_int32 * | plen | ||
| ) |
Input: pix (of c.c.)
pta (of outer border)
boxinner (b.b. of hole path)
&dir (direction (0-3), returned; only needed for debug)
&len (length of path, returned)
Return: pta of pts on cut path from the hole border
to the outer border, including end points on
both borders; or null on error
Notes: (1) If we don't find a path, we return a pta with no pts in it and len = 0. (2) The goal is to get a reasonably short path between the inner and outer borders, that goes entirely within the fg of the pix. This function is cheap-and-dirty, may fail for some holes in complex topologies such as those you might find in a moderately dark scanned halftone. If it fails to find a path to any particular hole, it gives a warning, and because that hole path is not included, the hole will not be rendered.
| LEPT_DLL l_int32 getExtendedCompositeParameters | ( | l_int32 | size, |
| l_int32 * | pn, | ||
| l_int32 * | pextra, | ||
| l_int32 * | pactualsize | ||
| ) |
getExtendedCompositeParameters()
Input: size (of linear Sel)
&pn (<return> number of 63 wide convolutions)
&pextra (<return> size of extra Sel)
&actualsize (<optional return> actual size used in operation)
Return: 0 if OK, 1 on error
Notes: (1) The DWA implementation allows Sels to be used with hits up to 31 pixels from the origin, either horizontally or vertically. Larger Sels can be used if decomposed into a set of operations with Sels not exceeding 63 pixels in either width or height (and with the origin as close to the center of the Sel as possible). (2) This returns the decomposition of a linear Sel of length @size into a set of
Sels of length 63 plus an extra Sel of length @extra. (3) For notation, let w == @size, n ==
, and e == @extra. We have 1 < e < 63.
Then if w < 64, we have n = 0 and e = w. The general formula for w > 63 is: w = 63 + (n - 1) * 62 + (e - 1)
Where did this come from? Each successive convolution with a Sel of length L adds a total length (L - 1) to w. This accounts for using 62 for each additional Sel of size 63, and using (e - 1) for the additional Sel of size e.
Solving for n and e for w > 63: n = 1 + Int((w - 63) / 62) e = w - 63 - (n - 1) * 62 + 1
The extra part is decomposed into two factors f1 and f2, and the actual size of the extra part is e' = f1 * f2 Then the actual width is: w' = 63 + (n - 1) * 62 + f1 * f2 - 1
Input: directory name Return: sarray of file names, or NULL on error
Notes: (1) The versions compiled under unix and cygwin use the POSIX C library commands for handling directories. For windows, there is a separate implementation. (2) It returns an array of filename tails; i.e., only the part of the path after the last slash. (3) Use of the d_type field of dirent is not portable: "According to POSIX, the dirent structure contains a field char d_name[] of unspecified size, with at most NAME_MAX characters preceding the terminating null character. Use of other fields will harm the portability of your programs." (4) As a consequence of (3), we note several things:
Input: format (integer) Return: extension (string), or null if format is out of range
Notes: (1) This string is NOT owned by the caller; it is just a pointer to a global string. Do not free it.
| LEPT_DLL char* getImagelibVersions | ( | ) |
Return: string of version numbers; e.g.,
libgif 5.0.3
libjpeg 8b (libjpeg-turbo 1.3.0)
libpng 1.4.3
libtiff 3.9.5
zlib 1.2.5
libwebp 0.3.0
libopenjp2 2.1.0
Notes: (1) The caller must free the memory.
Input: filename Return: output format, or IFF_UNKNOWN on error or invalid extension.
Notes: (1) This determines the output file format from the extension of the input filename.
| LEPT_DLL char* getLeptonicaVersion | ( | ) |
Return: string of version number (e.g., 'leptonica-1.68')
Notes: (1) The caller has responsibility to free the memory.
Input: type (L_MORPH_DILATE, L_MORPH_ERODE)
depth (of pix)
Return: color of border pixels for this operation
| LEPT_DLL SARRAY* getNumberedPathnamesInDirectory | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | numpre, | ||
| l_int32 | numpost, | ||
| l_int32 | maxnum | ||
| ) |
getNumberedPathnamesInDirectory()
Input: directory name
substr (<optional> substring filter on filenames; can be NULL)
numpre (number of characters in name before number)
numpost (number of characters in name after the number,
up to a dot before an extension)
maxnum (only consider page numbers up to this value)
Return: sarray of numbered pathnames, or NULL on error
Notes: (1) Returns the full pathnames of the numbered filenames in the directory. The number in the filename is the index into the sarray. For indices for which there are no filenames, an empty string ("") is placed into the sarray. This makes reading numbered files very simple. For example, the image whose filename includes number N can be retrieved using pixReadIndexed(sa, N); (2) If @substr is not NULL, only filenames that contain the substring can be included. If @substr is NULL, all matching filenames are used. (3) If no numbered files are found, it returns an empty sarray, with no initialized strings. (4) It is assumed that the page number is contained within the basename (the filename without directory or extension). @numpre is the number of characters in the basename preceding the actual page number; @numpost is the number following the page number, up to either the end of the basename or a ".", whichever comes first. (5) This is useful when all filenames contain numbers that are not necessarily consecutive. 0-padding is not required. (6) To use a O(n) matching algorithm, the largest page number is found and two internal arrays of this size are created. This maximum is constrained not to exceed @maxsum, to make sure that an unrealistically large number is not accidentally used to determine the array sizes.
| LEPT_DLL void getOctcubeIndexFromRGB | ( | l_int32 | rval, |
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_uint32 * | rtab, | ||
| l_uint32 * | gtab, | ||
| l_uint32 * | btab, | ||
| l_uint32 * | pindex | ||
| ) |
Input: rval, gval, bval
rtab, gtab, btab (generated with makeRGBToIndexTables())
&index (<return>)
Return: void
Note: no error checking!
Input: ptas (source 4 points; unprimed)
ptad (transformed 4 points; primed)
&vc (<return> vector of coefficients of transform)
Return: 0 if OK; 1 on error
We have a set of 8 equations, describing the projective transformation that takes 4 points (ptas) into 4 other points (ptad). These equations are:
x1' = (c[0]*x1 + c[1]*y1 + c[2]) / (c[6]*x1 + c[7]*y1 + 1)
y1' = (c[3]*x1 + c[4]*y1 + c[5]) / (c[6]*x1 + c[7]*y1 + 1)
x2' = (c[0]*x2 + c[1]*y2 + c[2]) / (c[6]*x2 + c[7]*y2 + 1)
y2' = (c[3]*x2 + c[4]*y2 + c[5]) / (c[6]*x2 + c[7]*y2 + 1)
x3' = (c[0]*x3 + c[1]*y3 + c[2]) / (c[6]*x3 + c[7]*y3 + 1)
y3' = (c[3]*x3 + c[4]*y3 + c[5]) / (c[6]*x3 + c[7]*y3 + 1)
x4' = (c[0]*x4 + c[1]*y4 + c[2]) / (c[6]*x4 + c[7]*y4 + 1)
y4' = (c[3]*x4 + c[4]*y4 + c[5]) / (c[6]*x4 + c[7]*y4 + 1)
Multiplying both sides of each eqn by the denominator, we get
AC = B
where B and C are column vectors
B = [ x1' y1' x2' y2' x3' y3' x4' y4' ] C = [ c[0] c[1] c[2] c[3] c[4] c[5] c[6] c[7] ]
and A is the 8x8 matrix
x1 y1 1 0 0 0 -x1*x1' -y1*x1'
0 0 0 x1 y1 1 -x1*y1' -y1*y1'
x2 y2 1 0 0 0 -x2*x2' -y2*x2'
0 0 0 x2 y2 1 -x2*y2' -y2*y2'
x3 y3 1 0 0 0 -x3*x3' -y3*x3'
0 0 0 x3 y3 1 -x3*y3' -y3*y3'
x4 y4 1 0 0 0 -x4*x4' -y4*x4'
0 0 0 x4 y4 1 -x4*y4' -y4*y4'
These eight equations are solved here for the coefficients C.
These eight coefficients can then be used to find the mapping (x,y) --> (x',y'):
x' = (c[0]x + c[1]y + c[2]) / (c[6]x + c[7]y + 1)
y' = (c[3]x + c[4]y + c[5]) / (c[6]x + c[7]y + 1)
that is implemented in projectiveXformSampled() and projectiveXFormInterpolated().
Input: w (image width, pixels)
h (image height, pixels)
fillfract (fraction in linear dimension of full page, not
to be exceeded; use 0 for default)
Return: 0 if OK, 1 on error
Input: w (image width, pixels)
h (image height, pixels)
fillfract (fraction in linear dimension of full page, not
to be exceeded; use 0 for default)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 getRGBFromIndex | ( | l_uint32 | index, |
| l_int32 | sigbits, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: index (rgbindex)
sigbits (2-6, significant bits retained in the quantizer
for each component of the input image)
&rval, &gval, &bval (<return> rgb values)
Return: 0 if OK, 1 on error
Notes: (1) The @index is expressed in bits, based on the the @sigbits of the r, g and b components, as r7 r6 ... g7 g6 ... b7 b6 ... (2) The computed rgb values are in the center of the quantized cube. The extra bit that is OR'd accomplishes this.
| LEPT_DLL void getScaledParametersPS | ( | BOX * | box, |
| l_int32 | wpix, | ||
| l_int32 | hpix, | ||
| l_int32 | res, | ||
| l_float32 | scale, | ||
| l_float32 * | pxpt, | ||
| l_float32 * | pypt, | ||
| l_float32 * | pwpt, | ||
| l_float32 * | phpt | ||
| ) |
Input: box (<optional> location of image in mils; with
(x,y) being the LL corner)
wpix (pix width in pixels)
hpix (pix height in pixels)
res (of printer; use 0 for default)
scale (use 1.0 or 0.0 for no scaling)
&xpt (location of llx in pts)
&ypt (location of lly in pts)
&wpt (image width in pts)
&hpt (image height in pts)
Return: void (no arg checking)
Notes: (1) The image is always scaled, depending on res and scale. (2) If no box, the image is centered on the page. (3) If there is a box, the image is placed within it.
| LEPT_DLL SARRAY* getSortedPathnamesInDirectory | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | first, | ||
| l_int32 | nfiles | ||
| ) |
getSortedPathnamesInDirectory()
Input: directory name
substr (<optional> substring filter on filenames; can be NULL)
first (0-based)
nfiles (use 0 for all to the end)
Return: sarray of sorted pathnames, or NULL on error
Notes: (1) Use @substr to filter filenames in the directory. If @substr == NULL, this takes all files. (2) The files in the directory, after optional filtering by the substring, are lexically sorted in increasing order. Use @first and @nfiles to select a contiguous set of files. (3) The full pathnames are returned for the requested sequence. If no files are found after filtering, returns an empty sarray.
| LEPT_DLL l_int32 gplotAddPlot | ( | GPLOT * | gplot, |
| NUMA * | nax, | ||
| NUMA * | nay, | ||
| l_int32 | plotstyle, | ||
| const char * | plottitle | ||
| ) |
Input: gplot
nax (<optional> numa: set to null for Y_VS_I;
required for Y_VS_X)
nay (numa: required for both Y_VS_I and Y_VS_X)
plotstyle (GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES,
GPLOT_LINESPOINTS, GPLOT_DOTS)
plottitle (<optional> title for individual plot)
Return: 0 if OK, 1 on error
Notes: (1) There are 2 options for (x,y) values: o To plot an array vs a linear function of the index, set nax = NULL. o To plot one array vs another, use both nax and nay. (2) If nax is NULL, the x value corresponding to the i-th value of nay is found from the startx and delx fields in nay: x = startx + i * delx These are set with numaSetParameters(). Their default values are startx = 0.0, delx = 1.0. (3) If nax is defined, it must be the same size as nay. (4) The 'plottitle' string can have spaces, double quotes and backquotes, but not single quotes.
| LEPT_DLL GPLOT* gplotCreate | ( | const char * | rootname, |
| l_int32 | outformat, | ||
| const char * | title, | ||
| const char * | xlabel, | ||
| const char * | ylabel | ||
| ) |
Input: rootname (root for all output files)
outformat (GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_X11,
GPLOT_LATEX)
title (<optional> overall title)
xlabel (<optional> x axis label)
ylabel (<optional> y axis label)
Return: gplot, or null on error
Notes: (1) This initializes the plot. (2) The 'title', 'xlabel' and 'ylabel' strings can have spaces, double quotes and backquotes, but not single quotes.
Input: &gplot (<to be nulled>) Return: void
Input: gplot Return: 0 if OK, 1 on error
Input: gplot Return: 0 if OK, 1 on error
Input: gplot Return: 0 if OK; 1 on error
Notes: (1) This uses gplot and the new arrays to add a plot to the output, by writing a new data file and appending the appropriate plot commands to the command file. (2) This is the only function in this file that requires the gnuplot executable, to actually generate the plot. (3) The gnuplot program for windows is wgnuplot.exe. The standard gp426win32 distribution does not have a X11 terminal.
Input: filename Return: gplot, or NULL on error
Input: gplot
scaling (GPLOT_LINEAR_SCALE, GPLOT_LOG_SCALE_X,
GPLOT_LOG_SCALE_Y, GPLOT_LOG_SCALE_X_Y)
Return: 0 if OK; 1 on error
Notes: (1) By default, the x and y axis scaling is linear. (2) Call this function to set semi-log or log-log scaling.
| LEPT_DLL l_int32 gplotSimple1 | ( | NUMA * | na, |
| l_int32 | outformat, | ||
| const char * | outroot, | ||
| const char * | title | ||
| ) |
Input: na (numa; plot Y_VS_I)
outformat (GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_X11,
GPLOT_LATEX)
outroot (root of output files)
title (<optional>, can be NULL)
Return: 0 if OK, 1 on error
Notes: (1) This gives a line plot of a numa, where the array value is plotted vs the array index. The plot is generated in the specified output format; the title is optional. (2) When calling these simple plot functions more than once, use different @outroot to avoid overwriting the output files.
| LEPT_DLL l_int32 gplotSimple2 | ( | NUMA * | na1, |
| NUMA * | na2, | ||
| l_int32 | outformat, | ||
| const char * | outroot, | ||
| const char * | title | ||
| ) |
Input: na1 (numa; plotted with Y_VS_I)
na2 (ditto)
outformat (GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_X11,
GPLOT_LATEX)
outroot (root of output files)
title (<optional>)
Return: 0 if OK, 1 on error
Notes: (1) This gives a line plot of two numa, where the array values are each plotted vs the array index. The plot is generated in the specified output format; the title is optional. (2) When calling these simple plot functions more than once, use different @outroot to avoid overwriting the output files.
| LEPT_DLL l_int32 gplotSimpleN | ( | NUMAA * | naa, |
| l_int32 | outformat, | ||
| const char * | outroot, | ||
| const char * | title | ||
| ) |
Input: naa (numaa; we plotted with Y_VS_I for each numa)
outformat (GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_X11,
GPLOT_LATEX)
outroot (root of output files)
title (<optional>)
Return: 0 if OK, 1 on error
Notes: (1) This gives a line plot of all numas in a numaa (array of numa), where the array values are each plotted vs the array index. The plot is generated in the specified output format; the title is optional. (2) When calling these simple plot functions more than once, use different @outroot to avoid overwriting the output files.
| LEPT_DLL l_int32 gplotSimpleXY1 | ( | NUMA * | nax, |
| NUMA * | nay, | ||
| l_int32 | plotstyle, | ||
| l_int32 | outformat, | ||
| const char * | outroot, | ||
| const char * | title | ||
| ) |
Input: nax (<optional>)
nay
plotstyle (GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES,
GPLOT_LINESPOINTS, GPLOT_DOTS)
outformat (GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_X11,
GPLOT_LATEX)
outroot (root of output files)
title (<optional>, can be NULL)
Return: 0 if OK, 1 on error
Notes: (1) This gives a plot of a @nay vs @nax, generated in the specified output format. The title is optional. (2) Use 0 for default plotstyle (lines). (3) @nax is optional. If NULL, @nay is plotted against the array index. (4) When calling these simple plot functions more than once, use different @outroot to avoid overwriting the output files.
| LEPT_DLL l_int32 gplotSimpleXY2 | ( | NUMA * | nax, |
| NUMA * | nay1, | ||
| NUMA * | nay2, | ||
| l_int32 | plotstyle, | ||
| l_int32 | outformat, | ||
| const char * | outroot, | ||
| const char * | title | ||
| ) |
Input: nax (<optional; can be NULL)
nay1
nay2
plotstyle (GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES,
GPLOT_LINESPOINTS, GPLOT_DOTS)
outformat (GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_X11,
GPLOT_LATEX)
outroot (root of output files)
title (<optional>)
Return: 0 if OK, 1 on error
Notes: (1) This gives plots of @nay1 and @nay2 against nax, generated in the specified output format. The title is optional. (2) Use 0 for default plotstyle (lines). (3) @nax is optional. If NULL, @nay1 and @nay2 are plotted against the array index. (4) When calling these simple plot functions more than once, use different @outroot to avoid overwriting the output files.
| LEPT_DLL l_int32 gplotSimpleXYN | ( | NUMA * | nax, |
| NUMAA * | naay, | ||
| l_int32 | plotstyle, | ||
| l_int32 | outformat, | ||
| const char * | outroot, | ||
| const char * | title | ||
| ) |
Input: nax (<optional>; can be NULL)
naay (numaa of arrays to plot against @nax)
plotstyle (GPLOT_LINES, GPLOT_POINTS, GPLOT_IMPULSES,
GPLOT_LINESPOINTS, GPLOT_DOTS)
outformat (GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_X11,
GPLOT_LATEX)
outroot (root of output files)
title (<optional>)
Return: 0 if OK, 1 on error
Notes: (1) This gives plots of each Numa in @naa against nax, generated in the specified output format. The title is optional. (2) Use 0 for default plotstyle (lines). (3) @nax is optional. If NULL, each Numa array is plotted against the array index. (4) When calling these simple plot functions more than once, use different @outroot to avoid overwriting the output files.
Input: filename
gplot
Return: 0 if OK; 1 on error
Input: naa1, naa2 (two numaa, each with one or more 256-element
histograms)
&nad (<return> nad of EM distances for each histogram)
Return: 0 if OK, 1 on error
Notes: (1) The two numaas must be the same size and have corresponding 256-element histograms. Pairs do not need to be normalized to the same sum. (2) This is typically used on two sets of histograms from corresponding tiles of two images. The similarity of two images can be found with the scoring function used in pixCompareGrayByHisto(): score S = 1.0 - k * D, where k is a constant, say in the range 5-10 D = EMD for each tile; for multiple tiles, take the Min(S) over the set of tiles to be the final score.
| LEPT_DLL l_int32 grayInterHistogramStats | ( | NUMAA * | naa, |
| l_int32 | wc, | ||
| NUMA ** | pnam, | ||
| NUMA ** | pnams, | ||
| NUMA ** | pnav, | ||
| NUMA ** | pnarv | ||
| ) |
Input: naa (numaa with two or more 256-element histograms)
wc (half-width of the smoothing window)
&nam (<optional return> mean values)
&nams (<optional return> mean square values)
&pnav (<optional return> variances)
&pnarv (<optional return> rms deviations from the mean)
Return: 0 if OK, 1 on error
Notes: (1) The @naa has two or more 256-element numa histograms, which are to be compared value-wise at each of the 256 gray levels. The result are stats (mean, mean square, variance, root variance) aggregated across the set of histograms, and each is output as a 256 entry numa. Think of these histograms as a matrix, where each histogram is one row of the array. The stats are then aggregated column-wise, between the histograms. (2) These stats are:
Input: filename (input file) Return: 0 if OK; 1 on error or if the test fails
Notes: (1) This writes and reads a set of output files losslessly in different formats to /tmp/format/, and tests that the result before and after is unchanged. (2) This should work properly on input images of any depth, with and without colormaps. (3) All supported formats are tested for bmp, png, tiff and non-ascii pnm. Ascii pnm also works (but who'd ever want to use it?) We allow 2 bpp bmp, although it's not supported elsewhere. And we don't support reading 16 bpp png, although this can be turned on in pngio.c. (4) This silently skips png or tiff testing if HAVE_LIBPNG or HAVE_LIBTIFF are 0, respectively.
Input: filename
&interlaced (<return> 1 if interlaced png; 0 otherwise)
Return: 0 if OK, 1 on error
Input: pixaa (one pixa for each class)
&pna (<return> number of samples used to build each composite)
&ptat (<return> centroids of bordered composites)
Return: pixad (accumulated sum of samples in each class),
or null on error
Input: jbclasser
pixs (of input page)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 jbAddPageComponents | ( | JBCLASSER * | classer, |
| PIX * | pixs, | ||
| BOXA * | boxas, | ||
| PIXA * | pixas | ||
| ) |
Input: jbclasser
pixs (of input page)
boxas (b.b. of components for this page)
pixas (components for this page)
Return: 0 if OK; 1 on error
Notes: (1) If there are no components on the page, we don't require input of empty boxas or pixas, although that's the typical situation.
Input: jbclasser
safiles (of page image file names)
Return: 0 if OK; 1 on error
Note: (1) jbclasser makes a copy of the array of file names. (2) The caller is still responsible for destroying the input array.
Input: method (JB_RANKHAUS, JB_CORRELATION)
components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
Return: jbclasser, or null on error
Input: jbclasser
boxa (of new components for classification)
pixas (of new components for classification)
Return: 0 if OK; 1 on error
Input: jbclasser
boxa (of new components for classification)
pixas (of new components for classification)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 jbCorrelation | ( | const char * | dirin, |
| l_float32 | thresh, | ||
| l_float32 | weight, | ||
| l_int32 | components, | ||
| const char * | rootname, | ||
| l_int32 | firstpage, | ||
| l_int32 | npages, | ||
| l_int32 | renderflag | ||
| ) |
Input: dirin (directory of input images)
thresh (typically ~0.8)
weight (typically ~0.6)
components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
rootname (for output files)
firstpage (0-based)
npages (use 0 for all pages in dirin)
renderflag (1 to render from templates; 0 to skip)
Return: 0 if OK, 1 on error
Notes: (1) The images must be 1 bpp. If they are not, you can convert them using convertFilesTo1bpp(). (2) See prog/jbcorrelation for generating more output (e.g., for debugging)
| LEPT_DLL JBCLASSER* jbCorrelationInit | ( | l_int32 | components, |
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| l_float32 | thresh, | ||
| l_float32 | weightfactor | ||
| ) |
Input: components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
maxwidth (of component; use 0 for default)
maxheight (of component; use 0 for default)
thresh (value for correlation score: in [0.4 - 0.98])
weightfactor (corrects thresh for thick characters [0.0 - 1.0])
Return: jbclasser if OK; NULL on error
Notes: (1) For scanned text, suggested input values are: thresh ~ [0.8 - 0.85] weightfactor ~ [0.5 - 0.6] (2) For electronically generated fonts (e.g., rasterized pdf), a very high thresh (e.g., 0.95) will not cause a significant increase in the number of classes.
| LEPT_DLL JBCLASSER* jbCorrelationInitWithoutComponents | ( | l_int32 | components, |
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| l_float32 | thresh, | ||
| l_float32 | weightfactor | ||
| ) |
jbCorrelationInitWithoutComponents()
Input: same as jbCorrelationInit Output: same as jbCorrelationInit
Note: acts the same as jbCorrelationInit(), but the resulting object doesn't keep a list of all the components.
Input: rootname (for template and data files) Return: jbdata, or NULL on error
Input: jbdata
debugflag (if TRUE, writes into 2 bpp pix and adds
component outlines in color)
Return: pixa (reconstruction of original images, using templates) or
null on error
Input: jbclasser
latticew, latticeh (cell size used to store each
connected component in the composite)
Return: jbdata, or null on error
Notes: (1) This routine stores the jbig2-type data required for generating a lossy jbig2 version of the image. It can be losslessly written to (and read from) two files. (2) It generates and stores the mosaic of templates. (3) It clones the Numa and Pta arrays, so these must all be destroyed by the caller. (4) Input 0 to use the default values for latticew and/or latticeh,
Input: rootname (for output files; everything but the extension)
jbdata
Return: 0 if OK, 1 on error
Notes: (1) Serialization function that writes data in jbdata to file.
| LEPT_DLL l_int32 jbGetComponents | ( | PIX * | pixs, |
| l_int32 | components, | ||
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| BOXA ** | pboxad, | ||
| PIXA ** | ppixad | ||
| ) |
Input: pixs (1 bpp)
components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
maxwidth, maxheight (of saved components; larger are discarded)
&pboxa (<return> b.b. of component items)
&ppixa (<return> component items)
Return: 0 if OK, 1 on error
Input: jbclasser Return: 0 if OK, 1 on error
Notes: (1) This computes the ptall field, which has the global LL corners, adjusted for each specific component, so that each component can be replaced by the template for its class and have the centroid in the template in the same position as the centroid of the original connected component. It is important that this be done properly to avoid a wavy baseline in the result. (2) It is computed here from the corresponding UL corners, where the input templates and stored instances are all bordered. This should be done after all pages have been processed. (3) For proper substitution, the templates whose LL corners are placed in these locations must be UN-bordered. This is available for a realistic jbig2 encoder, which would (1) encode each template without a border, and (2) encode the position using the LL corner (rather than the UL corner) because the difference between y-values of successive instances is typically close to zero.
Input: jbclasser
pixs (full res image)
boxa (of c.c. bounding rectangles for this page)
Return: 0 if OK, 1 on error
Notes: (1) This computes the ptaul field, which has the global UL corners, adjusted for each specific component, so that each component can be replaced by the template for its class and have the centroid in the template in the same position as the centroid of the original connected component. It is important that this be done properly to avoid a wavy baseline in the result. (2) The array fields ptac and ptact give the centroids of those components relative to the UL corner of each component. Here, we compute the difference in each component, round to nearest integer, and correct the box->x and box->y by the appropriate integral difference. (3) The templates and stored instances are all bordered.
| LEPT_DLL l_int32 jbRankHaus | ( | const char * | dirin, |
| l_int32 | size, | ||
| l_float32 | rank, | ||
| l_int32 | components, | ||
| const char * | rootname, | ||
| l_int32 | firstpage, | ||
| l_int32 | npages, | ||
| l_int32 | renderflag | ||
| ) |
Input: dirin (directory of input images)
size (of Sel used for dilation; typ. 2)
rank (rank value of match; typ. 0.97)
components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
rootname (for output files)
firstpage (0-based)
npages (use 0 for all pages in dirin)
renderflag (1 to render from templates; 0 to skip)
Return: 0 if OK, 1 on error
Notes: (1) See prog/jbrankhaus for generating more output (e.g., for debugging)
| LEPT_DLL JBCLASSER* jbRankHausInit | ( | l_int32 | components, |
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| l_int32 | size, | ||
| l_float32 | rank | ||
| ) |
Input: components (JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS)
maxwidth (of component; use 0 for default)
maxheight (of component; use 0 for default)
size (of square structuring element; 2, representing
2x2 sel, is necessary for reasonable accuracy of
small components; combine this with rank ~ 0.97
to avoid undue class expansion)
rank (rank val of match, each way; in [0.5 - 1.0];
when using size = 2, 0.97 is a reasonable value)
Return: jbclasser if OK; NULL on error
Input: pixac (one pix of composites for each class)
na (number of samples used for each class composite)
Return: pixad (8 bpp templates for each class), or null on error
| LEPT_DLL JBCLASSER* jbWordsInTextlines | ( | const char * | dirin, |
| l_int32 | reduction, | ||
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| l_float32 | thresh, | ||
| l_float32 | weight, | ||
| NUMA ** | pnatl, | ||
| l_int32 | firstpage, | ||
| l_int32 | npages | ||
| ) |
Input: dirin (directory of input pages)
reduction (1 for full res; 2 for half-res)
maxwidth (of word mask components, to be kept)
maxheight (of word mask components, to be kept)
thresh (on correlation; 0.80 is reasonable)
weight (for handling thick text; 0.6 is reasonable)
natl (<return> numa with textline index for each component)
firstpage (0-based)
npages (use 0 for all pages in dirin)
Return: classer (for the set of pages)
Notes: (1) This is a high-level function. See prog/jbwords for example of usage. (2) Typically, words can be found reasonably well at a resolution of about 150 ppi. For highest accuracy, you should use 300 ppi. Assuming that the input images are 300 ppi, use reduction = 1 for finding words at full res, and reduction = 2 for finding them at 150 ppi.
Input: kels (source kernel) Return: keld (copy of kels), or null on error
Input: height, width Return: kernel, or null on error
Notes: (1) kernelCreate() initializes all values to 0. (2) After this call, (cy,cx) and nonzero data values must be assigned.
Input: filename Return: kernel, or null on error
Notes: (1) The file contains, in the following order:
3 3 1 1 25.5 51 24.3 70.2 146.3 73.4 20 50.9 18.4
Input: pix
cy, cx (origin of kernel)
Return: kernel, or null on error
Notes: (1) The origin must be positive and within the dimensions of the pix.
| LEPT_DLL L_KERNEL* kernelCreateFromString | ( | l_int32 | h, |
| l_int32 | w, | ||
| l_int32 | cy, | ||
| l_int32 | cx, | ||
| const char * | kdata | ||
| ) |
Input: height, width
cy, cx (origin)
kdata
Return: kernel of the given size, or null on error
Notes: (1) The data is an array of chars, in row-major order, giving space separated integers in the range [-255 ... 255]. (2) The only other formatting limitation is that you must leave space between the last number in each row and the double-quote. If possible, it's also nice to have each line in the string represent a line in the kernel; e.g., static const char *kdata = " 20 50 20 " " 70 140 70 " " 20 50 20 ";
Input: &kel (<to be nulled>) Return: void
Input: kernel
size (of grid interiors; odd; either 1 or a minimum size
of 17 is enforced)
gthick (grid thickness; either 0 or a minimum size of 2
is enforced)
Return: pix (display of kernel), or null on error
Notes: (1) This gives a visual representation of a kernel. (2) There are two modes of display: (a) Grid lines of minimum width 2, surrounding regions representing kernel elements of minimum size 17, with a "plus" mark at the kernel origin, or (b) A pix without grid lines and using 1 pixel per kernel element. (3) For both cases, the kernel absolute value is displayed, normalized such that the maximum absolute value is 255. (4) Large 2D separable kernels should be used for convolution with two 1D kernels. However, for the bilateral filter, the computation time is independent of the size of the 2D content kernel.
Input: kel
row
col
&val
Return: 0 if OK; 1 on error
Input: kernel
&min (<optional return> minimum value)
&max (<optional return> maximum value)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 kernelGetParameters | ( | L_KERNEL * | kel, |
| l_int32 * | psy, | ||
| l_int32 * | psx, | ||
| l_int32 * | pcy, | ||
| l_int32 * | pcx | ||
| ) |
Input: kernel
&sy, &sx, &cy, &cx (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Input: kernel
&sum (<return> sum of all kernel values)
Return: 0 if OK, 1 on error
Input: kels (source kel, to be inverted) Return: keld (spatially inverted, about the origin), or null on error
Notes: (1) For convolution, the kernel is spatially inverted before a "correlation" operation is done between the kernel and the image.
Input: kels (source kel, to be normalized)
normsum (desired sum of elements in keld)
Return: keld (normalized version of kels), or null on error
or if sum of elements is very close to 0)
Notes: (1) If the sum of kernel elements is close to 0, do not try to calculate the normalized kernel. Instead, return a copy of the input kernel, with a warning.
Input: filename Return: kernel, or null on error
Input: stream Return: kernel, or null on error
Input: kernel
row
col
val
Return: 0 if OK; 1 on error
Input: kernel
cy, cx
Return: 0 if OK; 1 on error
Input: fname (output file)
kernel
Return: 0 if OK, 1 on error
Input: stream
kel
Return: 0 if OK, 1 on error
| LEPT_DLL L_AMAP_NODE* l_amapGetFirst | ( | L_AMAP * | m | ) |
| LEPT_DLL L_AMAP_NODE* l_amapGetLast | ( | L_AMAP * | m | ) |
| LEPT_DLL L_AMAP_NODE* l_amapGetNext | ( | L_AMAP_NODE * | n | ) |
| LEPT_DLL L_AMAP_NODE* l_amapGetPrev | ( | L_AMAP_NODE * | n | ) |
Input: x1, y1 (end point of first vector)
x2, y2 (end point of second vector)
Return: angle (radians), or 0.0 on error
Notes: (1) This gives the angle between two vectors, going between vector1 (x1,y1) and vector2 (x2,y2). The angle is swept out from 1 --> 2. If this is clockwise, the angle is positive, but the result is folded into the interval [-pi, pi].
Input: na Return: set (using the floats in the numa as keys)
Input: pta Return: set (using a 64-bit hash of (x,y) as the key)
Input: sa Return: set (using a string hash into a uint32 as the key)
| LEPT_DLL L_ASET_NODE* l_asetGetFirst | ( | L_ASET * | s | ) |
| LEPT_DLL L_ASET_NODE* l_asetGetLast | ( | L_ASET * | s | ) |
| LEPT_DLL L_ASET_NODE* l_asetGetNext | ( | L_ASET_NODE * | n | ) |
| LEPT_DLL L_ASET_NODE* l_asetGetPrev | ( | L_ASET_NODE * | n | ) |
Input: datas
size (of data array)
Return: datad (on heap), or null on error
Notes: (1) We add 4 bytes to the zeroed output because in some cases (e.g., string handling) it is important to have the data be null terminated. This guarantees that after the memcpy, the result is automatically null terminated.
Input: filename
&nbytes (<return> number of bytes read)
Return: data, or null on error
| LEPT_DLL l_uint8* l_binaryReadSelect | ( | const char * | filename, |
| size_t | start, | ||
| size_t | nbytes, | ||
| size_t * | pnread | ||
| ) |
Input: filename
start (first byte to read)
nbytes (number of bytes to read; use 0 to read to end of file)
&nread (<return> number of bytes actually read)
Return: data, or null on error
Notes: (1) The returned array is terminated with a null byte so that it can be used to read ascii data from a file into a proper C string.
| LEPT_DLL l_uint8* l_binaryReadSelectStream | ( | FILE * | fp, |
| size_t | start, | ||
| size_t | nbytes, | ||
| size_t * | pnread | ||
| ) |
Input: stream
start (first byte to read)
nbytes (number of bytes to read; use 0 to read to end of file)
&nread (<return> number of bytes actually read)
Return: null-terminated array, or null on error
(reading 0 bytes is not an error)
Notes: (1) The returned array is terminated with a null byte so that it can be used to read ascii data from a file into a proper C string. If the file to be read is empty and @start == 0, an array with a single null byte is returned. (2) Side effect: the stream pointer is re-positioned to the beginning of the file.
Input: fp (stream opened to read; can be stdin)
&nbytes (<return> number of bytes read)
Return: null-terminated array, or null on error
(reading 0 bytes is not an error)
Notes: (1) The returned array is terminated with a null byte so that it can be used to read ascii data from a file into a proper C string. (2) This can be used to capture data that is piped in via stdin, because it does not require seeking within the file. (3) For example, you can read an image from stdin into memory using shell redirection, with one of these shell commands: cat <imagefile> | readprog readprog < <imagefile> where readprog is: l_uint8 *data = l_binaryReadStream(stdin, &nbytes); Pix *pix = pixReadMem(data, nbytes);
| LEPT_DLL l_int32 l_binaryWrite | ( | const char * | filename, |
| const char * | operation, | ||
| void * | data, | ||
| size_t | nbytes | ||
| ) |
Input: filename (output)
operation ("w" for write; "a" for append)
data (binary data to be written)
nbytes (size of data array)
Return: 0 if OK; 1 on error
| LEPT_DLL void* l_bootnum_gen1 | ( | void | ) |
Return: the bootnum1 pixa
Call this way: PIXA *pixa = (PIXA *)l_bootnum_gen1(); (C) Pixa *pixa = (Pixa *)l_bootnum_gen1(); (C++)
| LEPT_DLL void* l_bootnum_gen2 | ( | void | ) |
Return: the bootnum2 pixa
Call this way: PIXA *pixa = (PIXA *)l_bootnum_gen2(); (C) Pixa *pixa = (Pixa *)l_bootnum_gen2(); (C++)
Input: ba
newdata (byte array to be appended)
size (size of data array)
Return: 0 if OK, 1 on error
Input: ba
str (null-terminated string to be appended)
Return: 0 if OK, 1 on error
Input: bas (source lba)
copyflag (L_COPY, L_CLONE)
Return: clone or copy of bas, or null on error
Notes: (1) If cloning, up the refcount and return a ptr to @bas.
Input: ba
&size (<returned> size of data in lba)
Return: copy of data in use in the data array, or null on error.
Notes: (1) The returned data is owned by the caller. The input @ba still owns the original data array.
Input: n (determines initial size of data array) Return: l_bytea, or null on error
Notes: (1) The allocated array is n + 1 bytes. This allows room for null termination.
Input: &ba (<will be set to null before returning>) Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the lba. (2) Always nulls the input ptr. (3) If the data has been previously removed, the lba will have been nulled, so this will do nothing.
| LEPT_DLL l_int32 l_byteaFindEachSequence | ( | L_BYTEA * | ba, |
| l_uint8 * | sequence, | ||
| l_int32 | seqlen, | ||
| L_DNA ** | pda | ||
| ) |
Input: ba
sequence (subarray of bytes to find in data)
seqlen (length of sequence, in bytes)
&da (<return> byte positions of each occurrence of @sequence)
Return: 0 if OK, 1 on error
Input: ba
&size (<returned> size of data in lba)
Return: ptr to existing data array, or NULL on error
Notes: (1) The returned ptr is owned by @ba. Do not free it!
Input: ba Return: size of stored byte array, or 0 on error
Input: fname Return: l_bytea, or null on error
Input: data (to be copied to the array)
size (amount of data)
Return: l_bytea, or null on error
Input: stream Return: l_bytea, or null on error
Input: ba1
&ba2 (data array is added to the one in ba1, and
then ba2 is destroyed)
Return: 0 if OK, 1 on error
Notes: (1) It is a no-op, not an error, for @ba2 to be null.
Input: ba1 (lba to split; array bytes nulled beyond the split loc)
splitloc (location in ba1 to split; ba2 begins there)
&ba2 (<return> with data starting at splitloc)
Return: 0 if OK, 1 on error
Input: fname (output file)
ba
startloc (first byte to output)
endloc (last byte to output; use 0 to write to the
end of the data array)
Return: 0 if OK, 1 on error
Input: stream (opened for binary write)
ba
startloc (first byte to output)
endloc (last byte to output; use 0 to write to the
end of the data array)
Return: 0 if OK, 1 on error
| LEPT_DLL void l_CIDataDestroy | ( | L_COMP_DATA ** | pcid | ) |
Input: &cid (<will be set to null before returning>) Return: void
Input: line (ptr to beginning of data line)
n (pixel index)
Return: void
Action: sets the (1-bit) pixel to 0
Input: line (ptr to beginning of data line)
n (pixel index)
Return: void
Action: sets the (2-bit) pixel to 0
Input: line (ptr to beginning of data line)
n (pixel index)
Return: void
Action: sets the (4-bit) pixel to 0
Input: numaa (set of 256-entry histograms)
w, h (size of image)
&size (<return> size of byte array)
Return: 0 if OK, 1 on error
Notes: (1) This first writes w and h to the byte array as 4 byte ints. (2) Then it normalizes each histogram to a max value of 255, and saves each value as a byte. If there are N histograms, the output bytearray has 8 + 256 * N bytes. (3) Further compression of the array with zlib yields only about a 25% decrease in size, so we don't bother. If size reduction were important, a lossy transform using a 1-dimensional DCT would be effective, because we don't care about the fine details of these histograms.
Input: str (input string representing one UTF-8 character;
not more than 4 bytes)
&val (<return> integer value for the input. Think of it
as a 1-to-1 hash code.)
Return: 0 if OK, 1 on error
Input: daa
da (to be added)
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK, 1 on error
Input: daa
index (of l_dna within l_dnaa)
val (number to be added; stored as a double)
Return: 0 if OK, 1 on error
Notes: (1) Adds to an existing l_dna only.
Input: size of l_dna ptr array to be alloc'd (0 for default) Return: daa, or null on error
Input: nptr: size of dna ptr array to be alloc'd
n: size of individual dna arrays to be alloc'd (0 for default)
Return: daa, or null on error
Notes: (1) This allocates a dnaa and fills the array with allocated dnas. In use, after calling this function, use l_dnaaAddNumber(dnaa, index, val); to add val to the index-th dna in dnaa.
Input: da
val (float or int to be added; stored as a float)
Return: 0 if OK, 1 on error
Input: &dnaa <to be nulled if it exists> Return: void
Input: daa Return: count (number of l_dna), or 0 if no l_dna or on error
Input: daa
index (to the index-th l_dna)
accessflag (L_COPY or L_CLONE)
Return: l_dna, or null on error
Input: daa
index (of l_dna in daa)
Return: count of numbers in the referenced l_dna, or 0 on error.
Input: daa
Return: count (total number of numbers in the l_dnaa),
or 0 if no numbers or on error
Input: daa
i (index of l_dna within l_dnaa)
j (index into l_dna)
val (<return> double value)
Return: 0 if OK, 1 on error
Input: filename Return: daa, or null on error
Input: stream Return: daa, or null on error
Input: daa
index (to the index-th l_dna)
l_dna (insert and replace any existing one)
Return: 0 if OK, 1 on error
Notes: (1) Any existing l_dna is destroyed, and the input one is inserted in its place. (2) If the index is invalid, return 1 (error)
Input: daa Return: 0 if OK, 1 on error
Notes: (1) This identifies the largest index containing a dna that has any numbers within it, destroys all dna beyond that index, and resets the count.
Input: filename, daa Return: 0 if OK, 1 on error
Input: stream, daa Return: 0 if OK, 1 on error
l_dnaChangeRefCount()
Input: da
delta (change to be applied)
Return: 0 if OK, 1 on error
Input: da Return: ptr to same l_dna, or null on error
Input: da Return: na, or null on error
Input: da Return: copy of l_dna, or null on error
Notes: (1) This removes unused ptrs above da->n.
Input: dad (destination DNuma)
das (source DNuma)
Return: 0 if OK, 1 on error
Input: size of number array to be alloc'd (0 for default) Return: da, or null on error
Input: da (float)
size (of the array)
copyflag (L_INSERT or L_COPY)
Return: da, or null on error
Notes: (1) With L_INSERT, ownership of the input array is transferred to the returned l_dna, and all @size elements are considered to be valid.
Input: iarray (integer)
size (of the array)
Return: da, or null on error
Notes: (1) We can't insert this int array into the l_dna, because a l_dna takes a double array. So this just copies the data from the input array into the l_dna. The input array continues to be owned by the caller.
Input: &da (<to be nulled if it exists>) Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the l_dna. (2) Always nulls the input ptr.
Input: da Return: 0 if OK; 1 on error
Notes: (1) This does not change the allocation of the array. It just clears the number of stored numbers, so that the array appears to be empty.
| LEPT_DLL l_int32 l_dnaFindValByHash | ( | L_DNA * | da, |
| L_DNAHASH * | dahash, | ||
| l_float64 | val, | ||
| l_int32 * | pindex | ||
| ) |
Input: da
dahash (containing indices into @da)
val (searching for this number in @da)
&index (<return> index into da if found; -1 otherwise)
Return: 0 if OK; 1 on error
Notes: (1) Algo: hash @val into a key; hash the key to get the dna in @dahash (that holds indices into @da); traverse the dna of indices looking for @val in @da.
Input: da Return: count, or 0 if no numbers or on error
Input: da
copyflag (L_NOCOPY or L_COPY)
Return: either the bare internal array or a copy of it,
or null on error
Notes: (1) If copyflag == L_COPY, it makes a copy which the caller is responsible for freeing. Otherwise, it operates directly on the bare array of the l_dna. (2) Very important: for L_NOCOPY, any writes to the array will be in the l_dna. Do not write beyond the size of the count field, because it will not be accessible from the l_dna! If necessary, be sure to set the count field to a larger number (such as the alloc size) BEFORE calling this function. Creating with l_dnaMakeConstant() is another way to insure full initialization.
Input: da
index (into l_dna)
&val (<return> double value; 0.0 on error)
Return: 0 if OK; 1 on error
Notes: (1) Caller may need to check the function return value to decide if a 0.0 in the returned ival is valid.
Input: da
Return: a copy of the bare internal array, integerized
by rounding, or null on error
Notes: (1) A copy of the array is made, because we need to generate an integer array from the bare double array. The caller is responsible for freeing the array. (2) The array size is determined by the number of stored numbers, not by the size of the allocated array in the l_dna. (3) This function is provided to simplify calculations using the bare internal array, rather than continually calling accessors on the l_dna. It is typically used on an array of size 256.
Input: da
index (into l_dna)
&ival (<return> integer value; 0 on error)
Return: 0 if OK; 1 on error
Notes: (1) Caller may need to check the function return value to decide if a 0 in the returned ival is valid.
Input: da
&startx (<optional return> startx)
&delx (<optional return> delx)
Return: 0 if OK, 1 on error
l_dnaGetRefCount()
Input: da Return: refcount, or UNDEF on error
Input: dahash
key (key to be hashed into a bucket number)
value (float value to be appended to the specific dna)
Return: 0 if OK; 1 on error
Input: nbuckets (the number of buckets in the hash table,
which should be prime.)
initsize (initial size of each allocated numa; 0 for default)
Return: ptr to new dnahash, or null on error
Note: actual dna are created only as required by l_dnaHashAdd()
Input: da Return: dahash if OK; 1 on error
Notes: (1) The values stored in the @dahash are indices into @da; @dahash has no use without @da.
Input: pta Return: dahash, or null on error
Input: sa Return: dahash, or null on error
Input: &dahash (<to be nulled, if it exists>) Return: void
Input: dahash Return: nbuckets (allocated, or 0 on error)
Input: dahash
key (key to be hashed into a bucket number)
copyflag (L_NOCOPY, L_COPY, L_CLONE)
Return: ptr to numa
Input: dahash Return: n (number of numbers in all dna, or 0 on error)
Input: da
index (location in da to insert new value)
val (float64 or integer to be added)
Return: 0 if OK, 1 on error
Notes: (1) This shifts da[i] --> da[i + 1] for all i >= index, and then inserts val as da[index]. (2) It should not be used repeatedly on large arrays, because the function is O(n).
Input: da1, da2 Return: dad (intersection of the number arrays), or null on error
Notes: (1) This uses the same method for building the intersection set as ptaIntersectionByHash() and sarrayIntersectionByHash().
Input: dad (dest dna; add to this one)
das (<optional> source dna; add from this one)
istart (starting index in das)
iend (ending index in das; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) istart < 0 is taken to mean 'read from the start' (istart = 0) (2) iend < 0 means 'read to the end' (3) if das == NULL, this is a no-op
Input: das (input l_dna)
Return: dad (of difference values val[i+1] - val[i]),
or null on error
| LEPT_DLL l_int32 l_dnaMakeHistoByHash | ( | L_DNA * | das, |
| L_DNAHASH ** | pdahash, | ||
| L_DNA ** | pdav, | ||
| L_DNA ** | pdac | ||
| ) |
Input: das
&dahash (<return> hash map: val --> index)
&dav (<return> array of values: index --> val)
&dac (<return> histo array of counts: index --> count)
Return: 0 if OK; 1 on error
Notes: (1) Generates and returns a dna of occurrences (histogram), an aligned dna of values, and an associated hashmap. The hashmap takes @dav and a value, and points into the histogram in @dac. (2) The dna of values, @dav, is aligned with the histogram @dac, and is needed for fast lookup. It is a hash set, because the values are unique. (3) Lookup is simple: l_dnaFindValByHash(dav, dahash, val, &index); if (index >= 0) l_dnaGetIValue(dac, index, &icount); else icount = 0;
Input: startval
increment
size (of sequence)
Return: l_dna of sequence of evenly spaced values, or null on error
Input: filename Return: da, or null on error
Input: stream Return: da, or null on error
Notes: (1) fscanf takes lf to read a double; fprintf takes f to write it.
Input: das
&dad (<return> hash set)
&dahash (<optional return> dnahash used for lookup)
Return: 0 if OK; 1 on error
Notes: (1) Generates a dna with unique values. (2) The dnahash is built up with dad to assure uniqueness. It can be used to find if an element is in the set: l_dnaFindValByHash(dad, dahash, val, &index)
Input: da
index (element to be removed)
Return: 0 if OK, 1 on error
Notes: (1) This shifts da[i] --> da[i - 1] for all i > index. (2) It should not be used repeatedly on large arrays, because the function is O(n).
Input: da
index (element to be replaced)
val (new value to replace old one)
Return: 0 if OK, 1 on error
Input: da
newcount
Return: 0 if OK, 1 on error
Notes: (1) If newcount <= da->nalloc, this resets da->n. Using newcount = 0 is equivalent to l_dnaEmpty(). (2) If newcount > da->nalloc, this causes a realloc to a size da->nalloc = newcount. (3) All the previously unused values in da are set to 0.0.
Input: da
startx (x value corresponding to da[0])
delx (difference in x values for the situation where the
elements of da correspond to the evaulation of a
function at equal intervals of size @delx)
Return: 0 if OK, 1 on error
Input: da
index (to element to be set)
val (to set element)
Return: 0 if OK; 1 on error
Input: da
index (to element to change relative to the current value)
diff (increment if diff > 0 or decrement if diff < 0)
Return: 0 if OK; 1 on error
Input: filename, da Return: 0 if OK, 1 on error
Input: stream, da Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 l_generateCIData | ( | const char * | fname, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| l_int32 | ascii85, | ||
| L_COMP_DATA ** | pcid | ||
| ) |
Input: fname
type (L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE)
quality (used for jpeg only; 0 for default (75))
ascii85 (0 for binary; 1 for ascii85-encoded)
&cid (<return> compressed data)
Return: 0 if OK, 1 on error
Notes: (1) This can be used for both PostScript and pdf. (1) Set ascii85:
| LEPT_DLL l_int32 l_generateCIDataForPdf | ( | const char * | fname, |
| PIX * | pix, | ||
| l_int32 | quality, | ||
| L_COMP_DATA ** | pcid | ||
| ) |
Input: fname
pix (<optional>; can be null)
quality (for jpeg if transcoded; 75 is standard)
&cid (<return> compressed data)
Return: 0 if OK, 1 on error
Notes: (1) Given an image file and optionally a pix raster of that data, this provides a CID that is compatible with PDF, preferably without transcoding. (2) The pix is included for efficiency, in case transcoding is required and the pix is available to the caller.
| LEPT_DLL L_COMP_DATA* l_generateFlateData | ( | const char * | fname, |
| l_int32 | ascii85flag | ||
| ) |
Input: fname
ascii85flag (0 for gzipped; 1 for ascii85-encoded gzipped)
Return: cid (flate compressed image data), or null on error
Notes: (1) The input image is converted to one of these 4 types:
| LEPT_DLL L_COMP_DATA* l_generateFlateDataPdf | ( | const char * | fname, |
| PIX * | pixs | ||
| ) |
Input: fname (preferably png)
pix (<optional>; can be null)
Return: cid (containing png data), or null on error
Notes: (1) If you hand this a png file, you are going to get png predictors embedded in the flate data. So it has come to this. http://xkcd.com/1022/ (2) Exception: if the png is interlaced or if it is RGBA, it will be transcoded. (3) If transcoding is required, this will not have to read from file if you also input a pix.
| LEPT_DLL L_COMP_DATA* l_generateG4Data | ( | const char * | fname, |
| l_int32 | ascii85flag | ||
| ) |
Input: fname (of g4 compressed file)
ascii85flag (0 for g4 compressed; 1 for ascii85-encoded g4)
Return: cid (g4 compressed image data), or null on error
Notes: (1) Set ascii85flag:
| LEPT_DLL L_COMP_DATA* l_generateJpegData | ( | const char * | fname, |
| l_int32 | ascii85flag | ||
| ) |
Input: fname (of jpeg file)
ascii85flag (0 for jpeg; 1 for ascii85-encoded jpeg)
Return: cid (containing jpeg data), or null on error
Notes: (1) Set ascii85flag:
Input: &sec (<optional return> in seconds since birth of Unix)
&usec (<optional return> in microseconds since birth of Unix)
Return: void
Input: line (ptr to beginning of data line)
n (pixel index)
Return: val of the nth (1-bit) pixel.
Input: line (ptr to beginning of data line)
n (pixel index)
Return: value of the n-th (byte) pixel
Input: line (ptr to beginning of data line)
n (pixel index)
Return: val of the nth (2-bit) pixel.
Input: line (ptr to beginning of data line)
n (pixel index)
Return: value of the n-th (4-byte) pixel
Input: line (ptr to beginning of data line)
n (pixel index)
Return: val of the nth (4-bit) pixel.
Input: line (ptr to beginning of data line)
n (pixel index)
Return: value of the n-th (2-byte) pixel
| LEPT_DLL char* l_getFormattedDate | ( | ) |
Input: (none) Return: formatted date string, or null on error
Notes: (1) This is used in pdf, in the form specified in section 3.8.2 of http://partners.adobe.com/public/developer/en/pdf/PDFReference.pdf (2) Contributed by Dave Bryan. Works on all platforms.
Input: filename
&sn (<return> structname; e.g., "Pixa")
Return: 0 if found, 1 on error.
Input: nbuckets
val
&hash (<return>)
Return: 0 if OK, 1 on error
Notes: (1) Simple, fast hash for using dnaHash with 64-bit data (e.g., sets and histograms). (2) The resulting hash is called a "key" in a lookup operation. The bucket for @val in a dnaHash is simply found by taking the mod of the hash with the number of buckets (which is prime). What gets stored in the dna in that bucket could depend on use, but for the most flexibility, we store an index into the associated dna. This is all that is required for generating either a hash set or a histogram (an example of a hash map). (3) For example, to generate a histogram, the histogram dna, a histogram of unique values aligned with the histogram dna, and a dnahash hashmap are built. See l_dnaHashHistoFromDna().
Input: x, y
&hash (<return>)
Return: 0 if OK, 1 on error
Notes: (1) I just made up a hash function and fiddled with it to get decent coverage over the 2^64 values. There are no collisions for any of 100 million points with x and y up to 10000.
Input: nbuckets
x, y
&hash (<return>)
Return: 0 if OK, 1 on error
Notes: (1) This is a simple, fast hash that is used with the dna hash map, which takes the mod with a prime number of buckets. The number of buckets is selected so that collisions occur, aiming for about 20 results in each bucket. The design goal is that the hash is fast (mult/add) and approximately the same number of points are hashed to each bucket.
Input: str
&hash (<return>)
Return: 0 if OK, 1 on error
Notes: (1) The intent of the hash is to avoid collisions by mapping the string as randomly as possible into 64 bits. (2) To the extent that the hashes are random, the probability of a collision can be approximated by the square of the number of strings divided by 2^64. For 1 million strings, the collision probability is about 1 in 16 million. (3) I expect non-randomness of the distribution to be most evident for small text strings. This hash function has been tested for all 5-character text strings composed of 26 letters, of which there are 26^5 = 12356630. There are no hash collisions for this set.
Input: flag (1 for writing date/time and leptonica version;
0 for omitting this from the metadata)
Return: void
Notes: (1) The default is for writing this data. For regression tests that compare output against golden files, it is useful to omit.
Input: flag (1 for writing g4 data as fg only through a mask;
0 for writing fg and bg)
Return: void
Notes: (1) The default is for writing only the fg (through the mask). That way when you write a 1 bpp image, the bg is transparent, so any previously written image remains visible behind it.
Input: flag (1 for stripping 16 bpp to 8 bpp on reading;
0 for leaving 16 bpp)
Return: void
| LEPT_DLL l_int32 l_productMat2 | ( | l_float32 * | mat1, |
| l_float32 * | mat2, | ||
| l_float32 * | matd, | ||
| l_int32 | size | ||
| ) |
Input: mat1 (square matrix, as a 1-dimensional size^2 array)
mat2 (square matrix, as a 1-dimensional size^2 array)
matd (square matrix; product stored here)
size (of matrices)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 l_productMat3 | ( | l_float32 * | mat1, |
| l_float32 * | mat2, | ||
| l_float32 * | mat3, | ||
| l_float32 * | matd, | ||
| l_int32 | size | ||
| ) |
Input: mat1 (square matrix, as a 1-dimensional size^2 array)
mat2 (square matrix, as a 1-dimensional size^2 array)
mat3 (square matrix, as a 1-dimensional size^2 array)
matd (square matrix; product stored here)
size (of matrices)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 l_productMat4 | ( | l_float32 * | mat1, |
| l_float32 * | mat2, | ||
| l_float32 * | mat3, | ||
| l_float32 * | mat4, | ||
| l_float32 * | matd, | ||
| l_int32 | size | ||
| ) |
Input: mat1 (square matrix, as a 1-dimensional size^2 array)
mat2 (square matrix, as a 1-dimensional size^2 array)
mat3 (square matrix, as a 1-dimensional size^2 array)
mat4 (square matrix, as a 1-dimensional size^2 array)
matd (square matrix; product stored here)
size (of matrices)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 l_productMatVec | ( | l_float32 * | mat, |
| l_float32 * | vecs, | ||
| l_float32 * | vecd, | ||
| l_int32 | size | ||
| ) |
Input: mat (square matrix, as a 1-dimensional @size^2 array)
vecs (input column vector of length @size)
vecd (result column vector)
size (matrix is @size x @size; vectors are length @size)
Return: 0 if OK, 1 on error
| LEPT_DLL L_RBTREE_NODE* l_rbtreeGetFirst | ( | L_RBTREE * | t | ) |
| LEPT_DLL L_RBTREE_NODE* l_rbtreeGetLast | ( | L_RBTREE * | t | ) |
| LEPT_DLL L_RBTREE_NODE* l_rbtreeGetNext | ( | L_RBTREE_NODE * | n | ) |
| LEPT_DLL L_RBTREE_NODE* l_rbtreeGetPrev | ( | L_RBTREE_NODE * | n | ) |
Input: val1, val2 (in [0.0 ... 1.0]) Return: void
Notes: (1) This sets the opacity values used to generate the two outer boundary rings in the alpha mask associated with geometric transforms such as pixRotateWithAlpha(). (2) The default values are val1 = 0.0 (completely transparent in the outermost ring) and val2 = 0.5 (half transparent in the second ring). When the image is blended, this completely removes the outer ring (shrinking the image by 2 in each direction), and alpha-blends with 0.5 the second ring. Using val1 = 0.25 and val2 = 0.75 gives a slightly more blurred border, with no perceptual difference at screen resolution. (3) The actual mask values are found by multiplying these normalized opacity values by 255.
Input: xfact, yfact (integer >= 1) Return: void
Notes: (1) This sets the x and y output subsampling factors for generic pix and fpix convolution. The default values are 1 (no subsampling).
Input: line (ptr to beginning of data line)
n (pixel index)
Return: void
Action: sets the pixel to 1
Input: line (ptr to beginning of data line)
n (pixel index)
val (val to be inserted: 0 or 1)
Return: void
Notes: (1) This is an accessor for a 1 bpp pix. (2) It is actually a little slower than using: if (val == 0) l_ClearDataBit(line, n); else l_SetDataBit(line, n);
Input: line (ptr to beginning of data line)
n (pixel index)
val (val to be inserted: 0 - 0xff)
Return: void
Input: line (ptr to beginning of data line)
n (pixel index)
val (val to be inserted: 0 - 3)
Return: void
Input: line (ptr to beginning of data line)
n (pixel index)
val (val to be inserted: 0 - 0xffffffff)
Return: void
Input: line (ptr to beginning of data line)
n (pixel index)
val (val to be inserted: 0 - 0xf)
Return: void
Input: line (ptr to beginning of data line)
n (pixel index)
val (val to be inserted: 0 - 0xffff)
Return: void
| LEPT_DLL NUMAA* l_uncompressGrayHistograms | ( | l_uint8 * | bytea, |
| size_t | size, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph | ||
| ) |
Input: bytea (byte array of size 8 + 256 * N, N an integer)
size (size of byte array)
&w (<return> width of the image that generated the histograms)
&h (<return> height of the image)
Return: numaa (representing N histograms, each with 256 bins),
or null on error.
Notes: (1) The first 8 bytes are read as two 32-bit ints. (2) Then this constructs a numaa representing some number of gray histograms that are normalized such that the max value in each histogram is 255. The data is stored as a byte array, with 256 bytes holding the data for each histogram. Each gray histogram was computed from a tile of a grayscale image.
| LEPT_DLL void* lept_calloc | ( | size_t | nmemb, |
| size_t | size | ||
| ) |
Input: nmemb (number of members)
size (of each member)
Return: void ptr, or null on error
Notes: (1) For safety with windows DLLs, this can be used in conjunction with lept_free() to avoid C-runtime boundary problems. Just use these two functions throughout your application.
| LEPT_DLL l_int32 lept_cp | ( | const char * | srcfile, |
| const char * | newdir, | ||
| const char * | newtail, | ||
| char ** | pnewpath | ||
| ) |
Input: srcfile
newdir (<optional>; can be NULL)
newtail (<optional>; can be NULL)
&newpath (<optional return> of actual path; can be NULL)
Return: 0 on success, non-zero on failure
Notes: (1) This copies @srcfile to /tmp or to a subdirectory of /tmp. (2) @srcfile can either be a full path or relative to the current directory. (3) @newdir can either specify an existing subdirectory of /tmp, or can be NULL. In the latter case, the file will be written into /tmp. (4) @newtail can either specify a filename tail or, if NULL, the filename is taken from src-tail, the tail of @srcfile. (5) For debugging, the computed newpath can be returned. It must be freed by the caller. (6) Reminders: (a) specify files using unix pathnames (b) for windows, translates /tmp ==> <Temp> where <Temp> is the windows temp directory (7) Examples:
Input: dir
&exists (<return> 1 if it exists; 0 otherwise)
Return: void
Notes: (1) Always use unix pathname separators. (2) By calling genPathname(), if the pathname begins with "/tmp" this does an automatic directory translation on windows to a path in the windows <Temp> directory: "/tmp" ==> <Temp> (windows)
Input: fp (stream handle) Return: 0 if OK, 1 on error
Notes: (1) This should be used by any application that accepts a file handle generated by a leptonica Windows DLL.
| LEPT_DLL FILE* lept_fopen | ( | const char * | filename, |
| const char * | mode | ||
| ) |
Input: filename
mode (same as for fopen(); e.g., "rb")
Return: stream or null on error
Notes: (1) This must be used by any application that passes a file handle to a leptonica Windows DLL.
| LEPT_DLL void lept_free | ( | void * | ptr | ) |
Input: void ptr Return: 0 if OK, 1 on error
Notes: (1) This should be used by any application that accepts heap data allocated by a leptonica Windows DLL.
Input: n (64-bit unsigned)
&is_prime (<return> 1 if prime, 0 otherwise)
&factor (<optional return> smallest divisor,
or 0 on error or if prime)
Return: 0 if OK, 1 on error
Input: subdir (of /tmp or its equivalent on Windows) Return: 0 on success, non-zero on failure
Notes: (1) @subdir is a partial path that can consist of one or more directories. (2) This makes any subdirectories of /tmp that are required. (3) The root temp directory is: /tmp (unix) [default] <Temp> (windows)
| LEPT_DLL l_int32 lept_mv | ( | const char * | srcfile, |
| const char * | newdir, | ||
| const char * | newtail, | ||
| char ** | pnewpath | ||
| ) |
Input: srcfile
newdir (<optional>; can be NULL)
newtail (<optional>; can be NULL)
&newpath (<optional return> of actual path; can be NULL)
Return: 0 on success, non-zero on failure
Notes: (1) This moves @srcfile to /tmp or to a subdirectory of /tmp. (2) @srcfile can either be a full path or relative to the current directory. (3) @newdir can either specify an existing subdirectory of /tmp or can be NULL. In the latter case, the file will be written into /tmp. (4) @newtail can either specify a filename tail or, if NULL, the filename is taken from src-tail, the tail of @srcfile. (5) For debugging, the computed newpath can be returned. It must be freed by the caller. (6) Reminders: (a) specify files using unix pathnames (b) for windows, translates /tmp ==> <Temp> where <Temp> is the windows temp directory (7) Examples:
Input: subdir (<optional> of '/tmp'; can be NULL)
tail (filename without the directory)
Return: 0 on success, non-zero on failure
Notes: (1) By calling genPathname(), this does an automatic directory translation on windows to a path in the windows <Temp> directory: "/tmp/..." ==> <Temp>/... (windows)
Input: subdir (<optional> If NULL, the removed files are in /tmp)
substr (<optional> pattern to match in filename)
Return: 0 on success, non-zero on failure
Notes: (1) This removes the matched files in /tmp or a subdirectory of /tmp. Use NULL for @subdir if the files are in /tmp. (2) If @substr == NULL, this removes all files in the directory. If @substr == "" (empty), this removes no files. If both @subdir == NULL and @substr == NULL, this removes all files in /tmp. (3) Use unix pathname separators. (4) By calling genPathname(), if the pathname begins with "/tmp" this does an automatic directory translation on windows to a path in the windows <Temp> directory: "/tmp" ==> <Temp> (windows) (5) Error conditions:
Input: subdir (of /tmp or its equivalent on Windows) Return: 0 on success, non-zero on failure
Notes: (1) @subdir is a partial path that can consist of one or more directories. (2) This removes all files from the specified subdirectory of the root temp directory: /tmp (unix) <Temp> (windows) and then removes the subdirectory. (3) The combination lept_rmdir(subdir); lept_mkdir(subdir); is guaranteed to give you an empty subdirectory.
TODO: Remove this function ?
Input: filepath (full path to file including the directory) Return: 0 on success, non-zero on failure
Notes: (1) This removes the named file. (2) Use unix pathname separators. (3) Unlike the other lept_* functions in this section, this can remove any file – it is not restricted to files that are in /tmp or a subdirectory of it.
Input: fval Return: value rounded to int
Notes: (1) For fval >= 0, fval --> round(fval) == floor(fval + 0.5) For fval < 0, fval --> -round(-fval)) This is symmetric around 0. e.g., for fval in (-0.5 ... 0.5), fval --> 0
Input: lheap
item to be added to the tail of the heap
Return: 0 if OK, 1 on error
Input: size of ptr array to be alloc'd (0 for default)
direction (L_SORT_INCREASING, L_SORT_DECREASING)
Return: lheap, or null on error
Input: &lheap (<to be nulled>)
freeflag (TRUE to free each remaining struct in the array)
Return: void
Notes: (1) Use freeflag == TRUE when the items in the array can be simply destroyed using free. If those items require their own destroy function, they must be destroyed before calling this function, and then this function is called with freeflag == FALSE. (2) To destroy the lheap, we destroy the ptr array, then the lheap, and then null the contents of the input ptr.
Input: lheap Return: count, or 0 on error
Input: stream
lheap
Return: 0 if OK; 1 on error
Input: lheap
Return: ptr to item popped from the root of the heap,
or null if the heap is empty or on error
Input: lh (heap, with internal array) Return: 0 if OK, 1 on error
Notes: (1) This sorts an array into heap order. If the heap is already in heap order for the direction given, this has no effect.
Input: lh (heap, with internal array) Return: 0 if OK, 1 on error
Notes: (1) This sorts a heap into strict order. (2) For each element, starting at the end of the array and working forward, the element is swapped with the head element and then allowed to swap down onto a heap of size reduced by one. The result is that the heap is reversed but in strict order. The array elements are then reversed to put it in the original order.
Input: lh (heap) Return: 0 if OK, 1 on error
Notes: (1) This is called after an item has been popped off the root of the heap, and the last item in the heap has been placed at the root. (2) To regain the heap order, we let it bubble down, iteratively swapping with one of its children. For a decreasing sort, it swaps with the largest child; for an increasing sort, the smallest. This continues until it either reaches the lowest level in the heap, or the parent finds that neither child should swap with it (e.g., for a decreasing heap, the parent is larger than or equal to both children).
Input: lh (heap)
index (of array corresponding to node to be swapped up)
Return: 0 if OK, 1 on error
Notes: (1) This is called after a new item is put on the heap, at the bottom of a complete tree. (2) To regain the heap order, we let it bubble up, iteratively swapping with its parent, until it either reaches the root of the heap or it finds a parent that is in the correct position already vis-a-vis the child.
| LEPT_DLL l_int32 linearInterpolatePixelColor | ( | l_uint32 * | datas, |
| l_int32 | wpls, | ||
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_float32 | x, | ||
| l_float32 | y, | ||
| l_uint32 | colorval, | ||
| l_uint32 * | pval | ||
| ) |
Input: datas (ptr to beginning of image data)
wpls (32-bit word/line for this data array)
w, h (of image)
x, y (floating pt location for evaluation)
colorval (color brought in from the outside when the
input x,y location is outside the image;
in 0xrrggbb00 format))
&val (<return> interpolated color value)
Return: 0 if OK, 1 on error
Notes: (1) This is a standard linear interpolation function. It is equivalent to area weighting on each component, and avoids "jaggies" when rendering sharp edges.
| LEPT_DLL l_int32 linearInterpolatePixelFloat | ( | l_float32 * | datas, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_float32 | x, | ||
| l_float32 | y, | ||
| l_float32 | inval, | ||
| l_float32 * | pval | ||
| ) |
Input: datas (ptr to beginning of float image data)
wpls (32-bit word/line for this data array)
w, h (of image)
x, y (floating pt location for evaluation)
inval (float value brought in from the outside when the
input x,y location is outside the image)
&val (<return> interpolated float value)
Return: 0 if OK, 1 on error
Notes: (1) This is a standard linear interpolation function. It is equivalent to area weighting on each component, and avoids "jaggies" when rendering sharp edges.
| LEPT_DLL l_int32 linearInterpolatePixelGray | ( | l_uint32 * | datas, |
| l_int32 | wpls, | ||
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_float32 | x, | ||
| l_float32 | y, | ||
| l_int32 | grayval, | ||
| l_int32 * | pval | ||
| ) |
Input: datas (ptr to beginning of image data)
wpls (32-bit word/line for this data array)
w, h (of image)
x, y (floating pt location for evaluation)
grayval (color brought in from the outside when the
input x,y location is outside the image)
&val (<return> interpolated gray value)
Return: 0 if OK, 1 on error
Notes: (1) This is a standard linear interpolation function. It is equivalent to area weighting on each component, and avoids "jaggies" when rendering sharp edges.
Input datad (dest byte array data, reordered on little-endians)
datas (a src line of pix data)
wpl (number of 32 bit words in the line)
Return: 0 if OK, 1 on error
Notes: (1) This is used on little-endian platforms to swap the bytes within each word in the line of image data. Bytes 0 <==> 3 and 1 <==> 2 are swapped in the dest byte array data8d, relative to the pix data in datas. (2) The bytes represent 8 bit pixel values. They are swapped for little endians so that when the dest array (char *)datad is addressed by bytes, the pixels are chosen sequentially from left to right in the image.
Input: &head (<optional> input head)
data (void* ptr, to be added)
Return: 0 if OK; 1 on error
Notes: (1) This makes a new cell, attaches the data, and adds the cell to the head of the list. (2) When consing from NULL, be sure to initialize head to NULL before calling this function.
Input: &head (<may be updated>, head can be null)
&tail (<updated>, tail can be null)
data (void* ptr, to be hung on tail cons cell)
Return: 0 if OK; 1 on error
Notes: (1) This makes a new cell, attaches the data, and adds the cell to the tail of the list. (2) &head is input to allow the list to be "cons'd" up from NULL. (3) &tail is input to allow the tail to be updated for efficient sequential operation with this function. (4) We assume that if *phead and/or *ptail are not NULL, then they are valid addresses. Therefore: (a) when consing from NULL, be sure to initialize both head and tail to NULL. (b) when tail == NULL for an existing list, the tail will be found and updated.
Input: &head (<to be nulled> head of list) Return: void
Notes: (1) This only destroys the cons cells. Before destroying the list, it is necessary to remove all data and set the data pointers in each cons cell to NULL. (2) listDestroy() will give a warning message for each data ptr that is not NULL.
Input: head (list head)
data (void* address, to be searched for)
Return: cell (the containing cell, or null if not found or on error)
Notes: (1) This returns a ptr to the cell, which is still embedded in the list. (2) This handle and the attached data have not been copied or reference counted, so they must not be destroyed. This violates our basic rule that every handle returned from a function is owned by that function and must be destroyed, but if rules aren't there to be broken, why have them?
Input: head Return: tail, or null on error
Input: head (of list) Return: number of elements; 0 if no list or on error
Input: &head (<optional> input head)
elem (list element to be inserted after;
must be null if head is null)
data (void* ptr, to be added)
Return: 0 if OK; 1 on error
Notes: (1) This can be called on a null list, in which case both head and elem must be null. The head is included in the call to allow "consing" up from NULL. (2) If you are searching through a list, looking for a condition to add an element, you can do something like this: L_BEGIN_LIST_FORWARD(head, elem) <identify an="" elem="" to="" insert="" after>=""> listInsertAfter(&head, elem, data); L_END_LIST
Input: &head (<optional> input head)
elem (list element to be inserted in front of;
must be null if head is null)
data (void* address, to be added)
Return: 0 if OK; 1 on error
Notes: (1) This can be called on a null list, in which case both head and elem must be null. (2) If you are searching through a list, looking for a condition to add an element, you can do something like this: L_BEGIN_LIST_FORWARD(head, elem) <identify an="" elem="" to="" insert="" before>=""> listInsertBefore(&head, elem, data); L_END_LIST
Input: &head1 (<may be changed> head of first list)
&head2 (<to be nulled> head of second list)
Return: 0 if OK, 1 on error
Notes: (1) The concatenated list is returned with head1 as the new head. (2) Both input ptrs must exist, though either can have the value NULL.
Input: &head (<can be changed> input head)
elem (list element to be removed)
Return: data (void* struct on cell)
Notes: (1) in ANSI C, it is not necessary to cast return to actual type; e.g., pix = listRemoveElement(&head, elem); but in ANSI C++, it is necessary to do the cast: pix = (Pix *)listRemoveElement(&head, elem);
Input: &head (<to be updated> head of list) Return: data (void* struct on cell), or null on error
Notes: (1) in ANSI C, it is not necessary to cast return to actual type; e.g., pix = listRemoveFromHead(&head); but in ANSI C++, it is necessary to do the cast; e.g., pix = (Pix *)listRemoveFromHead(&head);
Input: &head (<may be changed>, head must NOT be null)
&tail (<always updated>, tail may be null)
Return: data (void* struct on cell) or null on error
Notes: (1) We include &head so that it can be set to NULL if if the only element in the list is removed. (2) The function is relying on the fact that if tail is not NULL, then is is a valid address. You can use this function with tail == NULL for an existing list, in which case the tail is found and updated, and the removed element is returned. (3) In ANSI C, it is not necessary to cast return to actual type; e.g., pix = listRemoveFromTail(&head, &tail); but in ANSI C++, it is necessary to do the cast; e.g., pix = (Pix *)listRemoveFromTail(&head, &tail);
Input: &head (<may be changed> list head) Return: 0 if OK, 1 on error
Notes: (1) This reverses the list in-place.
| LEPT_DLL void locateOutsideSeedPixel | ( | l_int32 | fpx, |
| l_int32 | fpy, | ||
| l_int32 | spx, | ||
| l_int32 | spy, | ||
| l_int32 * | pxs, | ||
| l_int32 * | pys | ||
| ) |
Input: fpx, fpy (location of first pixel)
spx, spy (location of second pixel)
&xs, &xy (seed pixel to be returned)
Notes: (1) the first and second pixels must be 8-adjacent, so |dx| <= 1 and |dy| <= 1 and both dx and dy cannot be 0. There are 8 possible cases. (2) the seed pixel is OUTSIDE the foreground of the c.c. (3) these rules are for the situation where the INSIDE of the c.c. is on the right as you follow the border: cw for an exterior border and ccw for a hole border.
| LEPT_DLL l_int32 locatePtRadially | ( | l_int32 | xr, |
| l_int32 | yr, | ||
| l_float64 | dist, | ||
| l_float64 | radang, | ||
| l_float64 * | px, | ||
| l_float64 * | py | ||
| ) |
Input: xr, yr (reference point)
radang (angle in radians, CW from horizontal)
dist (distance of point from reference point along line
given by the specified angle)
&x, &y (<return> location of point)
Return: 0 if OK, 1 on error
Input: lqueue
item to be added to the tail of the queue
Return: 0 if OK, 1 on error
Notes: (1) The algorithm is as follows. If the queue is populated to the end of the allocated array, shift all ptrs toward the beginning of the array, so that the head of the queue is at the beginning of the array. Then, if the array is more than 0.75 full, realloc with double the array size. Finally, add the item to the tail of the queue.
Input: size of ptr array to be alloc'd (0 for default) Return: lqueue, or null on error
Notes: (1) Allocates a ptr array of given size, and initializes counters.
Input: &lqueue (<to be nulled>)
freeflag (TRUE to free each remaining struct in the array)
Return: void
Notes: (1) If freeflag is TRUE, frees each struct in the array. (2) If freeflag is FALSE but there are elements on the array, gives a warning and destroys the array. This will cause a memory leak of all the items that were on the queue. So if the items require their own destroy function, they must be destroyed before the queue. The same applies to the auxiliary stack, if it is used. (3) To destroy the L_Queue, we destroy the ptr array, then the lqueue, and then null the contents of the input ptr.
Input: lqueue Return: count, or 0 on error
Input: stream
lqueue
Return: 0 if OK; 1 on error
Input: lqueue
Return: ptr to item popped from the head of the queue,
or null if the queue is empty or on error
Notes: (1) If this is the last item on the queue, so that the queue becomes empty, nhead is reset to the beginning of the array.
Input: lstack
item to be added to the lstack
Return: 0 if OK; 1 on error.
Input: nalloc (initial ptr array size; use 0 for default) Return: lstack, or null on error
Input: &lstack (<to be nulled>)
freeflag (TRUE to free each remaining struct in the array)
Return: void
Notes: (1) If freeflag is TRUE, frees each struct in the array. (2) If freeflag is FALSE but there are elements on the array, gives a warning and destroys the array. This will cause a memory leak of all the items that were on the lstack. So if the items require their own destroy function, they must be destroyed before the lstack. (3) To destroy the lstack, we destroy the ptr array, then the lstack, and then null the contents of the input ptr.
Input: lstack Return: count, or 0 on error
Input: stream
lstack
Return: 0 if OK; 1 on error
Input: lstack
Return: ptr to item popped from the top of the lstack,
or null if the lstack is empty or on error
| LEPT_DLL l_int32 make8To1DitherTables | ( | l_int32 ** | ptabval, |
| l_int32 ** | ptab38, | ||
| l_int32 ** | ptab14, | ||
| l_int32 | lowerclip, | ||
| l_int32 | upperclip | ||
| ) |
Input: &tabval (value assigned to output pixel; 0 or 1)
&tab38 (amount propagated to pixels left and below)
&tab14 (amount propagated to pixel to left and down)
lowerclip (values near 0 where the excess is not propagated)
upperclip (values near 255 where the deficit is not propagated)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 make8To2DitherTables | ( | l_int32 ** | ptabval, |
| l_int32 ** | ptab38, | ||
| l_int32 ** | ptab14, | ||
| l_int32 | cliptoblack, | ||
| l_int32 | cliptowhite | ||
| ) |
Input: &tabval (value assigned to output pixel; 0, 1, 2 or 3)
&tab38 (amount propagated to pixels left and below)
&tab14 (amount propagated to pixel to left and down)
cliptoblack (values near 0 where the excess is not propagated)
cliptowhite (values near 255 where the deficit is not propagated)
Return: 0 if OK, 1 on error
| LEPT_DLL L_KERNEL* makeDoGKernel | ( | l_int32 | halfheight, |
| l_int32 | halfwidth, | ||
| l_float32 | stdev, | ||
| l_float32 | ratio | ||
| ) |
Input: halfheight, halfwidth (sx = 2 * halfwidth + 1, etc)
stdev (standard deviation of narrower gaussian)
ratio (of stdev for wide filter to stdev for narrow one)
Return: kernel, or null on error
Notes: (1) The DoG (difference of gaussians) is a wavelet mother function with null total sum. By subtracting two blurred versions of the image, it acts as a bandpass filter for frequencies passed by the narrow gaussian but stopped by the wide one.See: http://en.wikipedia.org/wiki/Difference_of_Gaussians (2) The kernel size (sx, sy) = (2 * halfwidth + 1, 2 * halfheight + 1). (3) The kernel center (cx, cy) = (halfwidth, halfheight). (4) The halfwidth and halfheight are typically equal, and are typically several times larger than the standard deviation. (5) The ratio is the ratio of standard deviations of the wide to narrow gaussian. It must be >= 1.0; 1.0 is a no-op. (6) Because the kernel is a null sum, it must be invoked without normalization in pixConvolve().
Input: height, width
cy, cx (origin of kernel)
Return: kernel, or null on error
Notes: (1) This is the same low-pass filtering kernel that is used in the block convolution functions. (2) The kernel origin (@cy, @cx) is typically placed as near the center of the kernel as possible. If height and width are odd, then using cy = height / 2 and cx = width / 2 places the origin at the exact center. (3) This returns a normalized kernel.
| LEPT_DLL L_KERNEL* makeGaussianKernel | ( | l_int32 | halfheight, |
| l_int32 | halfwidth, | ||
| l_float32 | stdev, | ||
| l_float32 | max | ||
| ) |
Input: halfheight, halfwidth (sx = 2 * halfwidth + 1, etc)
stdev (standard deviation)
max (value at (cx,cy))
Return: kernel, or null on error
Notes: (1) The kernel size (sx, sy) = (2 * halfwidth + 1, 2 * halfheight + 1). (2) The kernel center (cx, cy) = (halfwidth, halfheight). (3) The halfwidth and halfheight are typically equal, and are typically several times larger than the standard deviation. (4) If pixConvolve() is invoked with normalization (the sum of kernel elements = 1.0), use 1.0 for max (or any number that's not too small or too large).
| LEPT_DLL l_int32 makeGaussianKernelSep | ( | l_int32 | halfheight, |
| l_int32 | halfwidth, | ||
| l_float32 | stdev, | ||
| l_float32 | max, | ||
| L_KERNEL ** | pkelx, | ||
| L_KERNEL ** | pkely | ||
| ) |
Input: halfheight, halfwidth (sx = 2 * halfwidth + 1, etc)
stdev (standard deviation)
max (value at (cx,cy))
&kelx (<return> x part of kernel)
&kely (<return> y part of kernel)
Return: 0 if OK, 1 on error
Notes: (1) See makeGaussianKernel() for description of input parameters. (2) These kernels are constructed so that the result of both normalized and un-normalized convolution will be the same as when convolving with pixConvolve() using the full kernel. (3) The trick for the un-normalized convolution is to have the product of the two kernel elemets at (cx,cy) be equal to max, not max**2. That's why the max for kely is 1.0. If instead we use sqrt(max) for both, the results are slightly less accurate, when compared to using the full kernel in makeGaussianKernel().
| LEPT_DLL l_int32 makeGrayQuantColormapArb | ( | PIX * | pixs, |
| l_int32 * | tab, | ||
| l_int32 | outdepth, | ||
| PIXCMAP ** | pcmap | ||
| ) |
Input: pixs (8 bpp)
tab (table mapping input gray level to cmap index)
outdepth (of colormap: 1, 2, 4 or 8)
&cmap (<return> colormap)
Return: 0 if OK, 1 on error
Notes: (1) The table is a 256-entry inverse colormap: it maps input gray level to colormap index (the bin number). It is computed using makeGrayQuantTableArb(). (2) The colormap generated here has quantized values at the average gray value of the pixels that are in each bin. (3) Returns an error if there are not enough levels in the output colormap for the number of bins. The number of bins must not exceed 2^outdepth.
Input: nlevels (number of output levels)
Return: table (maps input gray level to colormap index,
or null on error)
Notes: (1) 'nlevels' is some number between 2 and 256 (typically 8 or less). (2) The table is typically used for quantizing 2, 4 and 8 bpp grayscale src pix, and generating a colormapped dest pix.
| LEPT_DLL l_int32 makeGrayQuantTableArb | ( | NUMA * | na, |
| l_int32 | outdepth, | ||
| l_int32 ** | ptab, | ||
| PIXCMAP ** | pcmap | ||
| ) |
Input: na (numa of bin boundaries)
outdepth (of colormap: 1, 2, 4 or 8)
&tab (<return> table mapping input gray level to cmap index)
&cmap (<return> colormap)
Return: 0 if OK, 1 on error
Notes: (1) The number of bins is the count of @na + 1. (2) The bin boundaries in na must be sorted in increasing order. (3) The table is an inverse colormap: it maps input gray level to colormap index (the bin number). (4) The colormap generated here has quantized values at the center of each bin. If you want to use the average gray value of pixels within the bin, discard the colormap and compute it using makeGrayQuantColormapArb(). (5) Returns an error if there are not enough levels in the output colormap for the number of bins. The number of bins must not exceed 2^outdepth.
Input: nlevels (number of output levels)
depth (of dest pix, in bpp; 2, 4 or 8 bpp)
Return: table (maps input gray level to thresholded gray level,
or null on error)
Notes: (1) nlevels is some number between 2 and 2^(depth) (2) The table is used in two similar ways:
Input: w, h
direction (L_SCAN_HORIZONTAL or L_SCAN_VERTICAL)
size (of strips in the scan direction)
Return: boxa, or null on error
Notes: (1) For example, this can be used to generate a pixa of vertical strips of width 10 from an image, using: pixGetDimensions(pix, &w, &h, NULL); boxa = makeMosaicStrips(w, h, L_SCAN_HORIZONTAL, 10); pixa = pixClipRectangles(pix, boxa); All strips except the last will be the same width. The last strip will have width w % 10.
Input: bitval (either 0 or 1)
Return: table (giving, for an input byte, the MS bit location,
starting at 0 with the MSBit in the byte),
or null on error.
Notes: (1) If bitval == 1, it finds the leftmost ON pixel in a byte; otherwise if bitval == 0, it finds the leftmost OFF pixel. (2) If there are no pixels of the indicated color in the byte, this returns 8.
| LEPT_DLL l_int32 makeOrientDecision | ( | l_float32 | upconf, |
| l_float32 | leftconf, | ||
| l_float32 | minupconf, | ||
| l_float32 | minratio, | ||
| l_int32 * | porient, | ||
| l_int32 | debug | ||
| ) |
Input: upconf (nonzero)
leftconf (nonzero)
minupconf (minimum value for which a decision can be made)
minratio (minimum conf ratio required for a decision)
&orient (<return> text orientation enum {0,1,2,3,4})
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) This can be run after pixOrientDetect() (2) Both upconf and leftconf must be nonzero; otherwise the orientation cannot be determined. (3) The abs values of the input confidences are compared to minupconf. (4) The abs value of the largest of (upconf/leftconf) and (leftconf/upconf) is compared with minratio. (5) Input 0.0 for the default values for minupconf and minratio. (6) The return value of orient is interpreted thus: L_TEXT_ORIENT_UNKNOWN: not enough evidence to determine L_TEXT_ORIENT_UP: text rightside-up L_TEXT_ORIENT_LEFT: landscape, text up facing left L_TEXT_ORIENT_DOWN: text upside-down L_TEXT_ORIENT_RIGHT: landscape, text up facing right
Input: void Return: table of 256 l_int32, or null on error
Notes: (1) This table of integers gives the centroid weight of the 1 bits in the 8 bit index. In other words, if sumtab is obtained by makePixelSumTab8, and centroidtab is obtained by makePixelCentroidTab8, then, for 1 <= i <= 255, centroidtab[i] / (float)sumtab[i] is the centroid of the 1 bits in the 8-bit index i, where the MSB is considered to have position 0 and the LSB is considered to have position 7.
Input: void Return: table of 256 l_int32, or null on error
Notes: (1) This table of integers gives the number of 1 bits in the 8 bit index.
| LEPT_DLL PTA* makePlotPtaFromNuma | ( | NUMA * | na, |
| l_int32 | size, | ||
| l_int32 | plotloc, | ||
| l_int32 | linewidth, | ||
| l_int32 | max | ||
| ) |
Input: numa
size (pix height for horizontal plot; width for vertical plot)
plotloc (location of plot: L_PLOT_AT_TOP, etc)
linewidth (width of "line" that is drawn; between 1 and 7)
max (maximum excursion in pixels from baseline)
Return: ptad, or null on error
Notes: (1) This generates points from @numa representing y(x) or x(y) with respect to a pix. A horizontal plot y(x) is drawn for a function of column position, and a vertical plot is drawn for a function x(y) of row position. The baseline is located so that all plot points will fit in the pix. (2) See makePlotPtaFromNumaGen() for more details.
| LEPT_DLL PTA* makePlotPtaFromNumaGen | ( | NUMA * | na, |
| l_int32 | orient, | ||
| l_int32 | linewidth, | ||
| l_int32 | refpos, | ||
| l_int32 | max, | ||
| l_int32 | drawref | ||
| ) |
Input: numa
orient (L_HORIZONTAL_LINE, L_VERTICAL_LINE)
linewidth (width of "line" that is drawn; between 1 and 7)
refpos (reference position: y for horizontal and x for vertical)
max (maximum excursion in pixels from baseline)
drawref (1 to draw the reference line and the normal to it)
Return: ptad, or null on error
Notes: (1) This generates points from @numa representing y(x) or x(y) with respect to a pix. For y(x), we draw a horizontal line at the reference position and a vertical line at the edge; then we draw the values of @numa, scaled so that the maximum excursion from the reference position is @max pixels. (2) The start and delx parameters of @numa are used to refer its values to the raster lines (L_VERTICAL_LINE) or columns (L_HORIZONTAL_LINE). (3) The linewidth is chosen in the interval [1 ... 7]. (4) @refpos should be chosen so the plot is entirely within the pix that it will be painted onto. (5) This would typically be used to plot, in place, a function computed along pixel rows or columns.
Input: range_stdev (> 0) Return: kel, or null on error
Notes: (1) Creates a one-sided Gaussian kernel with the given standard deviation. At grayscale difference of one stdev, the kernel falls to 0.6, and to 0.01 at three stdev. (2) A typical input number might be 20. Then pixels whose value differs by 60 from the center pixel have their weight in the convolution reduced by a factor of about 0.01.
| LEPT_DLL l_int32 makeRGBIndexTables | ( | l_uint32 ** | prtab, |
| l_uint32 ** | pgtab, | ||
| l_uint32 ** | pbtab, | ||
| l_int32 | sigbits | ||
| ) |
Input: &rtab, >ab, &btab (<return> 256-entry index tables)
sigbits (2-6, significant bits retained in the quantizer
for each component of the input image)
Return: 0 if OK, 1 on error
Notes: (1) These tables are used to map from rgb sample values to an rgb index, using rgbindex = rtab[rval] | gtab[gval] | btab[bval] where, e.g., if sigbits = 3, the index is a 9 bit integer: r7 r6 r5 g7 g6 g5 b7 b6 b5
| LEPT_DLL l_int32 makeRGBToIndexTables | ( | l_uint32 ** | prtab, |
| l_uint32 ** | pgtab, | ||
| l_uint32 ** | pbtab, | ||
| l_int32 | cqlevels | ||
| ) |
Input: &rtab, >ab, &btab (<return> tables)
cqlevels (can be 1, 2, 3, 4, 5 or 6)
Return: 0 if OK; 1 on error
Set up tables. e.g., for cqlevels = 5, we need an integer 0 < i < 2^15: rtab = (0 i7 0 0 i6 0 0 i5 0 0 i4 0 0 i3 0 0) gtab = (0 0 i7 0 0 i6 0 0 i5 0 0 i4 0 0 i3 0) btab = (0 0 0 i7 0 0 i6 0 0 i5 0 0 i4 0 0 i3)
The tables are then used to map from rbg --> index as follows: index = (0 r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4 r3 g3 b3)
e.g., for cqlevels = 4, we map to index = (0 0 0 0 r7 g7 b7 r6 g6 b6 r5 g5 b5 r4 g4 b4)
This may look a bit strange. The notation 'r7' means the MSBit of the r value (which has 8 bits, going down from r7 to r0). Keep in mind that r7 is actually the r component bit for level 1 of the octtree. Level 1 is composed of 8 octcubes, represented by the bits (r7 g7 b7), which divide the entire color space into 8 cubes. At level 2, each of these 8 octcubes is further divided into 8 cubes, each labeled by the second most significant bits (r6 g6 b6) of the rgb color.
This table permutes the bits in a byte, from 0 4 1 5 2 6 3 7 to 0 1 2 3 4 5 6 7
Returns a table of 256 l_uint32s, giving the four output 8-bit grayscale sums corresponding to 8 input bits of a binary image, for a 2x scale-to-gray op. The sums from two adjacent scanlines are then added and transformed to output four 8 bpp pixel values, using makeValTabSG2().
Returns a table of 64 l_uint32s, giving the two output 8-bit grayscale sums corresponding to 6 input bits of a binary image, for a 3x scale-to-gray op. In practice, this would be used three times (on adjacent scanlines), and the sums would be added and then transformed to output 8 bpp pixel values, using makeValTabSG3().
Returns a table of 256 l_uint32s, giving the two output 8-bit grayscale sums corresponding to 8 input bits of a binary image, for a 4x scale-to-gray op. The sums from four adjacent scanlines are then added and transformed to output 8 bpp pixel values, using makeValTabSG4().
Input: result (preallocated on stack or heap and passed in)
nbytes (size of @result array, in bytes)
subdirs (<optional>; can be NULL or an empty string)
Return: 0 if OK, 1 on error
Notes: (1) This generates the directory path for output temp files, written into
Returns an 8 bit value for the sum of ON pixels in a 2x2 square, according to
val = 255 - (255 * sum)/4
where sum is in set {0,1,2,3,4}
Returns an 8 bit value for the sum of ON pixels in a 3x3 square, according to val = 255 - (255 * sum)/9 where sum is in set {0, ... ,9}
Returns an 8 bit value for the sum of ON pixels in a 4x4 square, according to
val = 255 - (255 * sum)/16
where sum is in set {0, ... ,16}
Returns an 8 bit value for the sum of ON pixels in a 6x6 square, according to val = 255 - (255 * sum)/36 where sum is in set {0, ... ,36}
Returns an 8 bit value for the sum of ON pixels in an 8x8 square, according to val = 255 - (255 * sum)/64 where sum is in set {0, ... ,64}
Input: path (preallocated on stack or heap and passed in)
nbytes (size of @path array, in bytes)
flag (L_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH)
Return: 0 if OK, 1 on error
Notes: (1) This carries out the requested action if necessary.
Input: sarray (of operation sequence) Return: TRUE if valid; FALSE otherwise or on error
Notes: (1) This does verification of valid binary morphological operation sequences. (2) See pixMorphSequence() for notes on valid operations in the sequence.
| LEPT_DLL size_t nbytesInFile | ( | const char * | filename | ) |
Input: filename Return: nbytes in file; 0 on error
| LEPT_DLL l_int32 nextOnPixelInRaster | ( | PIX * | pixs, |
| l_int32 | xstart, | ||
| l_int32 | ystart, | ||
| l_int32 * | px, | ||
| l_int32 * | py | ||
| ) |
Input: pixs (1 bpp)
xstart, ystart (starting point for search)
&x, &y (<return> coord value of next ON pixel)
Return: 1 if a pixel is found; 0 otherwise or on error
| LEPT_DLL l_int32 nextOnPixelInRasterLow | ( | l_uint32 * | data, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpl, | ||
| l_int32 | xstart, | ||
| l_int32 | ystart, | ||
| l_int32 * | px, | ||
| l_int32 * | py | ||
| ) |
Input: naa
na (to be added)
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK, 1 on error
Input: naa
index (of numa within numaa)
val (float or int to be added; stored as a float)
Return: 0 if OK, 1 on error
Notes: (1) Adds to an existing numa only.
| LEPT_DLL l_int32 numaaCompareImagesByBoxes | ( | NUMAA * | naa1, |
| NUMAA * | naa2, | ||
| l_int32 | nperline, | ||
| l_int32 | nreq, | ||
| l_int32 | maxshiftx, | ||
| l_int32 | maxshifty, | ||
| l_int32 | delx, | ||
| l_int32 | dely, | ||
| l_int32 * | psame, | ||
| l_int32 | debugflag | ||
| ) |
Input: naa1 (for image 1, formatted by boxaExtractSortedPattern())
naa2 (ditto; for image 2)
nperline (number of box regions to be used in each textline)
nreq (number of complete row matches required)
maxshiftx (max allowed x shift between two patterns, in pixels)
maxshifty (max allowed y shift between two patterns, in pixels)
delx (max allowed difference in x data, after alignment)
dely (max allowed difference in y data, after alignment)
&same (<return> 1 if @nreq row matches are found; 0 otherwise)
debugflag (1 for debug output)
Return: 0 if OK, 1 on error
Notes: (1) Each input numaa describes a set of sorted bounding boxes (sorted by textline and, within each textline, from left to right) in the images from which they are derived. See boxaExtractSortedPattern() for a description of the data format in each of the input numaa. (2) This function does an alignment between the input descriptions of bounding boxes for two images. The input parameter @nperline specifies the number of boxes to consider in each line when testing for a match, and @nreq is the required number of lines that must be well-aligned to get a match. (3) Testing by alignment has 3 steps: (a) Generating the location of word bounding boxes from the images (prior to calling this function). (b) Listing all possible pairs of aligned rows, based on tolerances in horizontal and vertical positions of the boxes. Specifically, all pairs of rows are enumerated whose first @nperline boxes can be brought into close alignment, based on the delx parameter for boxes in the line and within the overall the @maxshiftx and @maxshifty constraints. (c) Each pair, starting with the first, is used to search for a set of @nreq - 1 other pairs that can all be aligned with a difference in global translation of not more than (@delx, @dely).
Input: size of numa ptr array to be alloc'd (0 for default) Return: naa, or null on error
Input: nptr: size of numa ptr array to be alloc'd
n: size of individual numa arrays to be alloc'd (0 for default)
Return: naa, or null on error
Notes: (1) This allocates numaa and fills the array with allocated numas. In use, after calling this function, use numaaAddNumber(naa, index, val); to add val to the index-th numa in naa.
Input: nas
left, right (number of elements to add on each side)
val (initialize border elements)
Return: nad (with added elements at left and right), or null on error
Input: na
val (float or int to be added; stored as a float)
Return: 0 if OK, 1 on error
Input: nas
left, right (number of elements to add on each side)
type (L_CONTINUED_BORDER, L_MIRRORED_BORDER)
Return: nad (with added elements at left and right), or null on error
Input: na
index (element to be changed)
val (new value to be added)
Return: 0 if OK, 1 on error
Notes: (1) This is useful for accumulating sums, regardless of the index order in which the values are made available. (2) Before use, the numa has to be filled up to @index. This would typically be used by creating the numa with the full sized array, initialized to 0.0, using numaMakeConstant().
Input: &numaa <to be nulled if it exists> Return: void
Input: naa Return: count (number of numa), or 0 if no numa or on error
Input: naa
index (to the index-th numa)
accessflag (L_COPY or L_CLONE)
Return: numa, or null on error
Input: naa
index (of numa in naa)
Return: count of numbers in the referenced numa, or 0 on error.
Input: naa
Return: count (total number of numbers in the numaa),
or 0 if no numbers or on error
Input: naa Return: the internal array of ptrs to Numa, or null on error
Notes: (1) This function is convenient for doing direct manipulation on a fixed size array of Numas. To do this, it sets the count to the full size of the allocated array of Numa ptrs. The originating Numaa owns this array: DO NOT free it! (2) Intended usage: Numaa *naa = numaaCreate(n); Numa **array = numaaGetPtrArray(naa); ... [manipulate Numas directly on the array] numaaDestroy(&naa); (3) Cautions:
| LEPT_DLL l_int32 numaaGetValue | ( | NUMAA * | naa, |
| l_int32 | i, | ||
| l_int32 | j, | ||
| l_float32 * | pfval, | ||
| l_int32 * | pival | ||
| ) |
Input: naa
i (index of numa within numaa)
j (index into numa)
fval (<optional return> float value)
ival (<optional return> int value)
Return: 0 if OK, 1 on error
Input: naad (dest naa; add to this one)
naas (<optional> source naa; add from this one)
istart (starting index in nas)
iend (ending index in naas; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) istart < 0 is taken to mean 'read from the start' (istart = 0) (2) iend < 0 means 'read to the end' (3) if naas == NULL, this is a no-op
Input: filename Return: naa, or null on error
Input: stream Return: naa, or null on error
Input: naa
index (to the index-th numa)
numa (insert and replace any existing one)
Return: 0 if OK, 1 on error
Notes: (1) Any existing numa is destroyed, and the input one is inserted in its place. (2) If the index is invalid, return 1 (error)
Input: nad (<optional> can be null or equal to na1 (in-place)
na1
na2
op (L_ARITH_ADD, L_ARITH_SUBTRACT,
L_ARITH_MULTIPLY, L_ARITH_DIVIDE)
Return: nad (always: operation applied to na1 and na2)
Notes: (1) The sizes of na1 and na2 must be equal. (2) nad can only null or equal to na1. (3) To add a constant to a numa, or to multipy a numa by a constant, use numaTransform().
Input: naa Return: 0 if OK, 1 on error
Notes: (1) This identifies the largest index containing a numa that has any numbers within it, destroys all numa beyond that index, and resets the count.
Input: filename, naa Return: 0 if OK, 1 on error
Input: stream, naa Return: 0 if OK, 1 on error
Input: nas (of non-negative integers with a max that is
typically less than 50,000)
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
Return: na (sorted), or null on error
Notes: (1) Because this uses a bin sort with buckets of size 1, it is not appropriate for sorting either small arrays or arrays containing very large integer values. For such arrays, use a standard general sort function like numaSort().
numaChangeRefCount()
Input: na
delta (change to be applied)
Return: 0 if OK, 1 on error
Input: na (to be sorted) Return: sorttype (L_SHELL_SORT or L_BIN_SORT), or UNDEF on error.
Notes: (1) This selects either a shell sort or a bin sort, depending on the number of elements in nas and the dynamic range. (2) If there are negative values in nas, it selects shell sort.
Input: numa
first, last (clipping interval)
Return: numa with the same values as the input, but clipped
to the specified interval
Note: If you want the indices of the array values to be unchanged, use first = 0. Usage: This is useful to clip a histogram that has a few nonzero values to its nonzero range.
Input: na Return: ptr to same numa, or null on error
Input: nas
size (of sel; greater than 0, odd; origin implicitly in center)
Return: nad (opened), or null on error
Notes: (1) The structuring element (sel) is linear, all "hits" (2) If size == 1, this returns a copy (3) We add a border before doing this operation, for the same reason that we add a border to a pix before doing a safe closing. Without the border, a small component near the border gets clipped at the border on dilation, and can be entirely removed by the following erosion, violating the basic extensivity property of closing.
Input: factor (generally between 0.0 (no enhancement)
and 1.0, but can be larger than 1.0)
Return: na, or null on error
Notes: (1) The mapping is monotonic increasing, where 0 is mapped to 0 and 255 is mapped to 255. (2) As 'factor' is increased from 0.0 (where the mapping is linear), the map gets closer to its limit as a step function that jumps from 0 to 255 at the center (input value = 127).
numaConvertToDna
Input: na Return: da, or null on error
Input: na
Return: na with all values rounded to nearest integer, or
null on error
| LEPT_DLL SARRAY* numaConvertToSarray | ( | NUMA * | na, |
| l_int32 | size1, | ||
| l_int32 | size2, | ||
| l_int32 | addzeros, | ||
| l_int32 | type | ||
| ) |
Input: na
size1 (size of conversion field)
size2 (for float conversion: size of field to the right
of the decimal point)
addzeros (for integer conversion: to add lead zeros)
type (L_INTEGER_VALUE, L_FLOAT_VALUE)
Return: a sarray of the float values converted to strings
representing either integer or float values; or null on error.
Notes: (1) For integer conversion, size2 is ignored. For float conversion, addzeroes is ignored.
Input: na Return: copy of numa, or null on error
Input: nad (destination Numa)
nas (source Numa)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 numaCountReversals | ( | NUMA * | nas, |
| l_float32 | minreversal, | ||
| l_int32 * | pnr, | ||
| l_float32 * | pnrpl | ||
| ) |
Input: nas (input values)
minreversal (relative amount to resolve peaks and valleys)
&nr (<optional return> number of reversals
&nrpl (<optional return> reversal density: reversals/length)
Return: 0 if OK, 1 on error
Notes: (1) The input numa is can be generated from pixExtractAlongLine(). If so, the x parameters can be used to find the reversal frequency along a line.
Input: size of number array to be alloc'd (0 for default) Return: na, or null on error
Input: farray (float)
size (of the array)
copyflag (L_INSERT or L_COPY)
Return: na, or null on error
Notes: (1) With L_INSERT, ownership of the input array is transferred to the returned numa, and all @size elements are considered to be valid.
Input: iarray (integer)
size (of the array)
Return: na, or null on error
Notes: (1) We can't insert this int array into the numa, because a numa takes a float array. So this just copies the data from the input array into the numa. The input array continues to be owned by the caller.
Input: string (of comma-separated numbers) Return: na, or null on error
Notes: (1) The numbers can be ints or floats; they will be interpreted and stored as floats. To use them as integers (e.g., for indexing into arrays), use numaGetIValue(...).
Input: nax (<optional> numa of abscissa values)
nay (numa of ordinate values, corresponding to nax)
delta (parameter used to identify when a new peak can be found)
Return: nad (abscissa pts at threshold), or null on error
Notes: (1) If nax == NULL, we use startx and delx from nay to compute the crossing values in nad.
Input: nax (<optional> numa of abscissa values; can be NULL)
nay (numa of ordinate values, corresponding to nax)
thresh (threshold value for nay)
Return: nad (abscissa pts at threshold), or null on error
Notes: (1) If nax == NULL, we use startx and delx from nay to compute the crossing values in nad.
Input: &na (<to be nulled if it exists>) Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the numa. (2) Always nulls the input ptr.
| LEPT_DLL l_int32 numaDifferentiateInterval | ( | NUMA * | nax, |
| NUMA * | nay, | ||
| l_float32 | x0, | ||
| l_float32 | x1, | ||
| l_int32 | npts, | ||
| NUMA ** | pnadx, | ||
| NUMA ** | pnady | ||
| ) |
Input: nax (numa of abscissa values)
nay (numa of ordinate values, corresponding to nax)
x0 (start value of interval)
x1 (end value of interval)
npts (number of points to evaluate function in interval)
&nadx (<optional return> array of x values in interval)
&nady (<return> array of derivatives in interval)
Return: 0 if OK, 1 on error (e.g., if x0 or x1 is outside range)
Notes: (1) The values in nax must be sorted in increasing order. If they are not sorted, it is done in the interpolation step, and a warning is issued. (2) Caller should check for valid return.
Input: nas
size (of sel; greater than 0, odd; origin implicitly in center)
Return: nad (dilated), or null on error
Notes: (1) The structuring element (sel) is linear, all "hits" (2) If size == 1, this returns a copy
| LEPT_DLL l_int32 numaDiscretizeRankAndIntensity | ( | NUMA * | na, |
| l_int32 | nbins, | ||
| NUMA ** | pnarbin, | ||
| NUMA ** | pnam, | ||
| NUMA ** | pnar, | ||
| NUMA ** | pnabb | ||
| ) |
numaDiscretizeRankAndIntensity()
Input: na (normalized histogram of probability density vs intensity)
nbins (number of bins at which the rank is divided)
&pnarbin (<optional return> rank bin value vs intensity)
&pnam (<optional return> median intensity in a bin vs
rank bin value, with @nbins of discretized rank values)
&pnar (<optional return> rank vs intensity; this is
a cumulative norm histogram)
&pnabb (<optional return> intensity at the right bin boundary
vs rank bin)
Return: 0 if OK, 1 on error
Notes: (1) We are inverting the rank(intensity) function to get the intensity(rank) function at @nbins equally spaced values of rank between 0.0 and 1.0. We save integer values for the intensity. (2) We are using the word "intensity" to describe the type of array values, but any array of non-negative numbers will work. (3) The output arrays give the following mappings, where the input is a normalized histogram of array values: array values --> rank bin number (narbin) rank bin number --> median array value in bin (nam) array values --> cumulative norm = rank (nar) rank bin number --> array value at right bin edge (nabb)
Input: na1, na2 (two numas of the same size, typically histograms)
&dist (<return> EM distance)
Return: 0 if OK, 1 on error
Notes: (1) The two numas must have the same size. They do not need to be normalized to the same sum before applying the function. (2) For a 1D discrete function, the implementation of the EMD is trivial. Just keep filling or emptying buckets in one numa to match the amount in the other, moving sequentially along both arrays. (3) We divide the sum of the absolute value of everything moved (by 1 unit at a time) by the sum of the numa (amount of "earth") to get the average distance that the "earth" was moved. This is the value returned here. (4) The caller can do a further normalization, by the number of buckets (minus 1), to get the EM distance as a fraction of the maximum possible distance, which is n-1. This fraction is 1.0 for the situation where all the 'earth' in the first array is at one end, and all in the second array is at the other end.
Input: na Return: 0 if OK; 1 on error
Notes: (1) This does not change the allocation of the array. It just clears the number of stored numbers, so that the array appears to be empty.
Input: pix (8 bpp, no colormap)
fract (fraction of equalization movement of pixel values)
factor (subsampling factor; integer >= 1)
Return: nad, or null on error
Notes: (1) If fract == 0.0, no equalization will be performed. If fract == 1.0, equalization is complete. (2) Set the subsampling factor > 1 to reduce the amount of computation. (3) The map is returned as a numa with 256 values, specifying the equalized value (array value) for every input value (the array index).
Input: nas
size (of sel; greater than 0, odd; origin implicitly in center)
Return: nad (eroded), or null on error
Notes: (1) The structuring element (sel) is linear, all "hits" (2) If size == 1, this returns a copy (3) General comment. The morphological operations are equivalent to those that would be performed on a 1-dimensional fpix. However, because we have not implemented morphological operations on fpix, we do this here. Because it is only 1 dimensional, there is no reason to use the more complicated van Herk/Gil-Werman algorithm, and we do it by brute force.
| LEPT_DLL l_int32 numaEvalBestHaarParameters | ( | NUMA * | nas, |
| l_float32 | relweight, | ||
| l_int32 | nwidth, | ||
| l_int32 | nshift, | ||
| l_float32 | minwidth, | ||
| l_float32 | maxwidth, | ||
| l_float32 * | pbestwidth, | ||
| l_float32 * | pbestshift, | ||
| l_float32 * | pbestscore | ||
| ) |
Input: nas (numa of non-negative signal values)
relweight (relative weight of (-1 comb) / (+1 comb)
contributions to the 'convolution'. In effect,
the convolution kernel is a comb consisting of
alternating +1 and -weight.)
nwidth (number of widths to consider)
nshift (number of shifts to consider for each width)
minwidth (smallest width to consider)
maxwidth (largest width to consider)
&bestwidth (<return> width giving largest score)
&bestshift (<return> shift giving largest score)
&bestscore (<optional return> convolution with
"Haar"-like comb)
Return: 0 if OK, 1 on error
Notes: (1) This does a linear sweep of widths, evaluating at @nshift shifts for each width, computing the score from a convolution with a long comb, and finding the (width, shift) pair that gives the maximum score. The best width is the "half-wavelength" of the signal. (2) The convolving function is a comb of alternating values +1 and -1 * relweight, separated by the width and phased by the shift. This is similar to a Haar transform, except there the convolution is performed with a square wave. (3) The function is useful for finding the line spacing and strength of line signal from pixel sum projections. (4) The score is normalized to the size of nas divided by the number of half-widths. For image applications, the input is typically an array of pixel projections, so one should normalize by dividing the score by the image width in the pixel projection direction.
| LEPT_DLL l_int32 numaEvalHaarSum | ( | NUMA * | nas, |
| l_float32 | width, | ||
| l_float32 | shift, | ||
| l_float32 | relweight, | ||
| l_float32 * | pscore | ||
| ) |
Input: nas (numa of non-negative signal values)
width (distance between +1 and -1 in convolution comb)
shift (phase of the comb: location of first +1)
relweight (relative weight of (-1 comb) / (+1 comb)
contributions to the 'convolution'. In effect,
the convolution kernel is a comb consisting of
alternating +1 and -weight.)
&score (<return> convolution with "Haar"-like comb)
Return: 0 if OK, 1 on error
Notes: (1) This does a convolution with a comb of alternating values +1 and -relweight, separated by the width and phased by the shift. This is similar to a Haar transform, except that for Haar, (1) the convolution kernel is symmetric about 0, so the relweight is 1.0, and (2) the convolution is performed with a square wave. (2) The score is normalized to the size of nas divided by twice the "width". For image applications, the input is typically an array of pixel projections, so one should normalize by dividing the score by the image width in the pixel projection direction. (3) To get a Haar-like result, use relweight = 1.0. For detecting signals where you expect every other sample to be close to zero, as with barcodes or filtered text lines, you can use relweight > 1.0.
Input: nas (input values)
delta (relative amount to resolve peaks and valleys)
Return: nad (locations of extrema), or null on error
Notes: (1) This returns a sequence of extrema (peaks and valleys). (2) The algorithm is analogous to that for determining mountain peaks. Suppose we have a local peak, with bumps on the side. Under what conditions can we consider those 'bumps' to be actual peaks? The answer: if the bump is separated from the peak by a saddle that is at least 500 feet below the bump. (3) Operationally, suppose we are looking for a peak. We are keeping the largest value we've seen since the last valley, and are looking for a value that is delta BELOW our current peak. When we find such a value, we label the peak, use the current value to label the valley, and then do the same operation in reverse (looking for a valley).
Input: source na
max number of peaks to be found
fract1 (min fraction of peak value)
fract2 (min slope)
Return: peak na, or null on error.
Notes: (1) The returned na consists of sets of four numbers representing the peak, in the following order: left edge; peak center; right edge; normalized peak area
Input: na (numa of ordinate values, to fit a max to)
&maxval (<return> max value)
naloc (<optional> associated numa of abscissa values)
&maxloc (<return> abscissa value that gives max value in na;
if naloc == null, this is given as an interpolated
index value)
Return: 0 if OK; 1 on error
Note: if naloc is given, there is no requirement that the data points are evenly spaced. Lagrangian interpolation handles that. The only requirement is that the data points are ordered so that the values in naloc are either increasing or decreasing. We test to make sure that the sizes of na and naloc are equal, and it is assumed that the correspondences na[i] as a function of naloc[i] are properly arranged for all i.
The formula for Lagrangian interpolation through 3 data pts is: y(x) = y1(x-x2)(x-x3)/((x1-x2)(x1-x3)) + y2(x-x1)(x-x3)/((x2-x1)(x2-x3)) + y3(x-x1)(x-x2)/((x3-x1)(x3-x2))
Then the derivative, using the constants (c1,c2,c3) defined below, is set to 0: y'(x) = 2x(c1+c2+c3) - c1(x2+x3) - c2(x1+x3) - c3(x1+x2) = 0
Input: gamma (gamma factor; must be > 0.0)
minval (input value that gives 0 for output)
maxval (input value that gives 255 for output)
Return: na, or null on error
Notes: (1) The map is returned as a numa; values are clipped to [0, 255]. (2) To force all intensities into a range within fraction delta of white, use: minval = -256 * (1 - delta) / delta maxval = 255 (3) To force all intensities into a range within fraction delta of black, use: minval = 0 maxval = 256 * (1 - delta) / delta
Input: na
&val (<return> integer median value)
Return: 0 if OK; 1 on error
Notes: (1) Computes the median value of the numbers in the numa, using bin sort and finding the middle value in the sorted array. (2) See numaGetRankValue() for conditions on na for which this should be used. Otherwise, use numaGetMedian().
Input: na (of non-negative integers with a max that is typically
less than 1,000,000)
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
Return: na (sorted), or null on error
Notes: (1) This creates an array (or lookup table) that contains the sorted position of the elements in the input Numa. (2) Because it uses a bin sort with buckets of size 1, it is not appropriate for sorting either small arrays or arrays containing very large integer values. For such arrays, use a standard general sort function like numaGetSortIndex().
Input: na Return: count, or 0 if no numbers or on error
Input: numa
type (L_LESS_THAN_ZERO, L_EQUAL_TO_ZERO, L_GREATER_THAN_ZERO)
&count (<return> count of values of given type)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 numaGetEdgeValues | ( | NUMA * | na, |
| l_int32 | edge, | ||
| l_int32 * | pstart, | ||
| l_int32 * | pend, | ||
| l_int32 * | psign | ||
| ) |
Input: na (numa that is output of numaThresholdEdges())
edge (edge number, zero-based)
&start (<optional return> location of start of transition)
&end (<optional return> location of end of transition)
&sign (<optional return> transition sign: +1 is rising,
-1 is falling)
Output: 0 if OK, 1 on error
Input: na
copyflag (L_NOCOPY or L_COPY)
Return: either the bare internal array or a copy of it,
or null on error
Notes: (1) If copyflag == L_COPY, it makes a copy which the caller is responsible for freeing. Otherwise, it operates directly on the bare array of the numa. (2) Very important: for L_NOCOPY, any writes to the array will be in the numa. Do not write beyond the size of the count field, because it will not be accessible from the numa! If necessary, be sure to set the count field to a larger number (such as the alloc size) BEFORE calling this function. Creating with numaMakeConstant() is another way to insure full initialization.
Input: na
index (into numa)
&val (<return> float value; 0.0 on error)
Return: 0 if OK; 1 on error
Notes: (1) Caller may need to check the function return value to decide if a 0.0 in the returned ival is valid.
| LEPT_DLL l_int32 numaGetHistogramStats | ( | NUMA * | nahisto, |
| l_float32 | startx, | ||
| l_float32 | deltax, | ||
| l_float32 * | pxmean, | ||
| l_float32 * | pxmedian, | ||
| l_float32 * | pxmode, | ||
| l_float32 * | pxvariance | ||
| ) |
Input: nahisto (histogram: y(x(i)), i = 0 ... nbins - 1)
startx (x value of first bin: x(0))
deltax (x increment between bins; the bin size; x(1) - x(0))
&xmean (<optional return> mean value of histogram)
&xmedian (<optional return> median value of histogram)
&xmode (<optional return> mode value of histogram:
xmode = x(imode), where y(xmode) >= y(x(i)) for
all i != imode)
&xvariance (<optional return> variance of x)
Return: 0 if OK, 1 on error
Notes: (1) If the histogram represents the relation y(x), the computed values that are returned are the x values. These are NOT the bucket indices i; they are related to the bucket indices by x(i) = startx + i * deltax
| LEPT_DLL l_int32 numaGetHistogramStatsOnInterval | ( | NUMA * | nahisto, |
| l_float32 | startx, | ||
| l_float32 | deltax, | ||
| l_int32 | ifirst, | ||
| l_int32 | ilast, | ||
| l_float32 * | pxmean, | ||
| l_float32 * | pxmedian, | ||
| l_float32 * | pxmode, | ||
| l_float32 * | pxvariance | ||
| ) |
numaGetHistogramStatsOnInterval()
Input: nahisto (histogram: y(x(i)), i = 0 ... nbins - 1)
startx (x value of first bin: x(0))
deltax (x increment between bins; the bin size; x(1) - x(0))
ifirst (first bin to use for collecting stats)
ilast (last bin for collecting stats; use 0 to go to the end)
&xmean (<optional return> mean value of histogram)
&xmedian (<optional return> median value of histogram)
&xmode (<optional return> mode value of histogram:
xmode = x(imode), where y(xmode) >= y(x(i)) for
all i != imode)
&xvariance (<optional return> variance of x)
Return: 0 if OK, 1 on error
Notes: (1) If the histogram represents the relation y(x), the computed values that are returned are the x values. These are NOT the bucket indices i; they are related to the bucket indices by x(i) = startx + i * deltax
Input: na
Return: a copy of the bare internal array, integerized
by rounding, or null on error
Notes: (1) A copy of the array is always made, because we need to generate an integer array from the bare float array. The caller is responsible for freeing the array. (2) The array size is determined by the number of stored numbers, not by the size of the allocated array in the Numa. (3) This function is provided to simplify calculations using the bare internal array, rather than continually calling accessors on the numa. It is typically used on an array of size 256.
Input: na
index (into numa)
&ival (<return> integer value; 0 on error)
Return: 0 if OK; 1 on error
Notes: (1) Caller may need to check the function return value to decide if a 0 in the returned ival is valid.
Input: na
&maxval (<optional return> max value)
&imaxloc (<optional return> index of max location)
Return: 0 if OK; 1 on error
Input: na
&val (<return> median value)
Return: 0 if OK; 1 on error
Notes: (1) Computes the median value of the numbers in the numa, by sorting and finding the middle value in the sorted array.
Input: na
&medval (<optional return> median value)
&medvar (<return> median variation from median val)
Return: 0 if OK; 1 on error
Notes: (1) Finds the median of the absolute value of the variation from the median value in the array. Why take the absolute value? Consider the case where you have values equally distributed about both sides of a median value. Without taking the absolute value of the differences, you will get 0 for the variation, and this is not useful.
Input: na
&minval (<optional return> min value)
&iminloc (<optional return> index of min location)
Return: 0 if OK; 1 on error
Input: na
&val (<return> mode val)
&count (<optional return> mode count)
Return: 0 if OK; 1 on error
Notes: (1) Computes the mode value of the numbers in the numa, by sorting and finding the value of the number with the largest count. (2) Optionally, also returns that count.
| LEPT_DLL l_int32 numaGetNonzeroRange | ( | NUMA * | na, |
| l_float32 | eps, | ||
| l_int32 * | pfirst, | ||
| l_int32 * | plast | ||
| ) |
Input: numa
eps (largest value considered to be zero)
&first, &last (<return> interval of array indices
where values are nonzero)
Return: 0 if OK, 1 on error or if no nonzero range is found.
Input: na
&startx (<optional return> startx)
&delx (<optional return> delx)
Return: 0 if OK, 1 on error
Input: na Return: nasum, or null on error
Notes: (1) nasum[i] is the sum for all j <= i of na[j]. So nasum[0] = na[0]. (2) If you want to generate a rank function, where rank[0] - 0.0, insert a 0.0 at the beginning of the nasum array.
Input: na (just an array of values)
nbins (number of bins at which the rank is divided)
&pnarbin (<optional return> rank bin value vs array value)
&pnam (<optional return> median intensity in a bin vs
rank bin value, with @nbins of discretized rank values)
Return: 0 if OK, 1 on error
Notes: (1) Simple interface for getting a binned rank representation of an input array of values. This returns two mappings: array value --> rank bin number (narbin) rank bin number --> median array value in each rank bin (nam)
| LEPT_DLL l_int32 numaGetRankValue | ( | NUMA * | na, |
| l_float32 | fract, | ||
| NUMA * | nasort, | ||
| l_int32 | usebins, | ||
| l_float32 * | pval | ||
| ) |
Input: na
fract (use 0.0 for smallest, 1.0 for largest)
nasort (<optional> increasing sorted version of na)
usebins (0 for general sort; 1 for bin sort)
&val (<return> rank val)
Return: 0 if OK; 1 on error
Notes: (1) Computes the rank value of a number in the @na, which is the number that is a fraction @fract from the small end of the sorted version of @na. (2) If you do this multiple times for different rank values, sort the array in advance and use that for @nasort; if you're only calling this once, input @nasort == NULL. (3) If @usebins == 1, this uses a bin sorting method. Use this only where:
numaGetRefCount()
Input: na Return: refcount, or UNDEF on error
Input: na
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
Return: na giving an array of indices that would sort
the input array, or null on error
Input: na (numa that is output of numaLowPassIntervals())
span (span number, zero-based)
&start (<optional return> location of start of transition)
&end (<optional return> location of end of transition)
Output: 0 if OK, 1 on error
| LEPT_DLL l_int32 numaGetStatsUsingHistogram | ( | NUMA * | na, |
| l_int32 | maxbins, | ||
| l_float32 * | pmin, | ||
| l_float32 * | pmax, | ||
| l_float32 * | pmean, | ||
| l_float32 * | pvariance, | ||
| l_float32 * | pmedian, | ||
| l_float32 | rank, | ||
| l_float32 * | prval, | ||
| NUMA ** | phisto | ||
| ) |
Input: na (an arbitrary set of numbers; not ordered and not
a histogram)
maxbins (the maximum number of bins to be allowed in
the histogram; use an integer larger than the
largest number in @na for consecutive integer bins)
&min (<optional return> min value of set)
&max (<optional return> max value of set)
&mean (<optional return> mean value of set)
&variance (<optional return> variance)
&median (<optional return> median value of set)
rank (in [0.0 ... 1.0]; median has a rank 0.5; ignored
if &rval == NULL)
&rval (<optional return> value in na corresponding to @rank)
&histo (<optional return> Numa histogram; use NULL to prevent)
Return: 0 if OK, 1 on error
Notes: (1) This is a simple interface for gathering statistics from a numa, where a histogram is used 'under the covers' to avoid sorting if a rank value is requested. In that case, by using a histogram we are trading speed for accuracy, because the values in @na are quantized to the center of a set of bins. (2) If the median, other rank value, or histogram are not requested, the calculation is all performed on the input Numa. (3) The variance is the average of the square of the difference from the mean. The median is the value in na with rank 0.5. (4) There are two situations where this gives rank results with accuracy comparable to computing stastics directly on the input data, without binning into a histogram: (a) the data is integers and the range of data is less than @maxbins, and (b) the data is floats and the range is small compared to @maxbins, so that the binsize is much less than 1. (5) If a histogram is used and the numbers in the Numa extend over a large range, you can limit the required storage by specifying the maximum number of bins in the histogram. Use @maxbins == 0 to force the bin size to be 1. (6) This optionally returns the median and one arbitrary rank value. If you need several rank values, return the histogram and use numaHistogramGetValFromRank(nah, rank, &rval) multiple times.
Input: na
&sum (<return> sum of values)
Return: 0 if OK, 1 on error
Input: na
first (beginning index)
last (final index)
&sum (<return> sum of values in the index interval range)
Return: 0 if OK, 1 on error
Input: na
maxsamples (maximum number of samples to check)
&allints (<return> 1 if all sampled values are ints; else 0)
Return: 0 if OK, 1 on error
Notes: (1) Set @maxsamples == 0 to check every integer in na. Otherwise, this samples no more than @maxsamples.
Input: na (histogram)
rval (value of input sample for which we want the rank)
&rank (<return> fraction of total samples below rval)
Return: 0 if OK, 1 on error
Notes: (1) If we think of the histogram as a function y(x), normalized to 1, for a given input value of x, this computes the rank of x, which is the integral of y(x) from the start value of x to the input value. (2) This function only makes sense when applied to a Numa that is a histogram. The values in the histogram can be ints and floats, and are computed as floats. The rank is returned as a float between 0.0 and 1.0. (3) The numa parameters startx and binsize are used to compute x from the Numa index i.
Input: na (histogram)
rank (fraction of total samples)
&rval (<return> approx. to the bin value)
Return: 0 if OK, 1 on error
Notes: (1) If we think of the histogram as a function y(x), this returns the value x such that the integral of y(x) from the start value to x gives the fraction 'rank' of the integral of y(x) over all bins. (2) This function only makes sense when applied to a Numa that is a histogram. The values in the histogram can be ints and floats, and are computed as floats. The val is returned as a float, even though the buckets are of integer width. (3) The numa parameters startx and binsize are used to compute x from the Numa index i.
Input: na
index (location in na to insert new value)
val (float32 or integer to be added)
Return: 0 if OK, 1 on error
Notes: (1) This shifts na[i] --> na[i + 1] for all i >= index, and then inserts val as na[index]. (2) It should not be used repeatedly on large arrays, because the function is O(n).
| LEPT_DLL l_int32 numaIntegrateInterval | ( | NUMA * | nax, |
| NUMA * | nay, | ||
| l_float32 | x0, | ||
| l_float32 | x1, | ||
| l_int32 | npts, | ||
| l_float32 * | psum | ||
| ) |
Input: nax (numa of abscissa values)
nay (numa of ordinate values, corresponding to nax)
x0 (start value of interval)
x1 (end value of interval)
npts (number of points to evaluate function in interval)
&sum (<return> integral of function over interval)
Return: 0 if OK, 1 on error (e.g., if x0 or x1 is outside range)
Notes: (1) The values in nax must be sorted in increasing order. If they are not sorted, it is done in the interpolation step, and a warning is issued. (2) Caller should check for valid return.
| LEPT_DLL l_int32 numaInterpolateArbxInterval | ( | NUMA * | nax, |
| NUMA * | nay, | ||
| l_int32 | type, | ||
| l_float32 | x0, | ||
| l_float32 | x1, | ||
| l_int32 | npts, | ||
| NUMA ** | pnadx, | ||
| NUMA ** | pnady | ||
| ) |
Input: nax (numa of abscissa values)
nay (numa of ordinate values, corresponding to nax)
type (L_LINEAR_INTERP, L_QUADRATIC_INTERP)
x0 (start value of interval)
x1 (end value of interval)
npts (number of points to evaluate function in interval)
&nadx (<optional return> array of x values in interval)
&nady (<return> array of y values in interval)
Return: 0 if OK, 1 on error (e.g., if x0 or x1 is outside range)
Notes: (1) The values in nax must be sorted in increasing order. If they are not sorted, we do it here, and complain. (2) If the values in nax are equally spaced, you can use numaInterpolateEqxInterval(). (3) Caller should check for valid return. (4) We don't call numaInterpolateArbxVal() for each output point, because that requires an O(n) search for each point. Instead, we do a single O(n) pass through nax, saving the indices to be used for each output yval. (5) Uses lagrangian interpolation. See numaInterpolateEqxVal() for formulas.
| LEPT_DLL l_int32 numaInterpolateArbxVal | ( | NUMA * | nax, |
| NUMA * | nay, | ||
| l_int32 | type, | ||
| l_float32 | xval, | ||
| l_float32 * | pyval | ||
| ) |
Input: nax (numa of abscissa values)
nay (numa of ordinate values, corresponding to nax)
type (L_LINEAR_INTERP, L_QUADRATIC_INTERP)
xval
&yval (<return> interpolated value)
Return: 0 if OK, 1 on error (e.g., if xval is outside range)
Notes: (1) The values in nax must be sorted in increasing order. If, additionally, they are equally spaced, you can use numaInterpolateEqxVal(). (2) Caller should check for valid return. (3) Uses lagrangian interpolation. See numaInterpolateEqxVal() for formulas.
| LEPT_DLL l_int32 numaInterpolateEqxInterval | ( | l_float32 | startx, |
| l_float32 | deltax, | ||
| NUMA * | nasy, | ||
| l_int32 | type, | ||
| l_float32 | x0, | ||
| l_float32 | x1, | ||
| l_int32 | npts, | ||
| NUMA ** | pnax, | ||
| NUMA ** | pnay | ||
| ) |
Input: startx (xval corresponding to first element in nas)
deltax (x increment between array elements in nas)
nasy (numa of ordinate values, assumed equally spaced)
type (L_LINEAR_INTERP, L_QUADRATIC_INTERP)
x0 (start value of interval)
x1 (end value of interval)
npts (number of points to evaluate function in interval)
&nax (<optional return> array of x values in interval)
&nay (<return> array of y values in interval)
Return: 0 if OK, 1 on error
Notes: (1) Considering nasy as a function of x, the x values are equally spaced. (2) This creates nay (and optionally nax) of interpolated values over the specified interval (x0, x1). (3) If the interval (x0, x1) lies partially outside the array nasy (as interpreted by startx and deltax), it is an error and returns 1. (4) Note that deltax is the intrinsic x-increment for the input array nasy, whereas delx is the intrinsic x-increment for the output interpolated array nay.
| LEPT_DLL l_int32 numaInterpolateEqxVal | ( | l_float32 | startx, |
| l_float32 | deltax, | ||
| NUMA * | nay, | ||
| l_int32 | type, | ||
| l_float32 | xval, | ||
| l_float32 * | pyval | ||
| ) |
Input: startx (xval corresponding to first element in array)
deltax (x increment between array elements)
nay (numa of ordinate values, assumed equally spaced)
type (L_LINEAR_INTERP, L_QUADRATIC_INTERP)
xval
&yval (<return> interpolated value)
Return: 0 if OK, 1 on error (e.g., if xval is outside range)
Notes: (1) Considering nay as a function of x, the x values are equally spaced (2) Caller should check for valid return.
For linear Lagrangian interpolation (through 2 data pts): y(x) = y1(x-x2)/(x1-x2) + y2(x-x1)/(x2-x1)
For quadratic Lagrangian interpolation (through 3 data pts): y(x) = y1(x-x2)(x-x3)/((x1-x2)(x1-x3)) + y2(x-x1)(x-x3)/((x2-x1)(x2-x3)) + y3(x-x1)(x-x2)/((x3-x1)(x3-x2))
Input: na1, na2 Return: nad (with the intersection of the numa set), or null on error
Notes: (1) See sarrayIntersection() for the approach. (2) Here, the key in building the sorted tree is the number itself. (3) A bucket sort approach can be used if the numbers are integers and if they are small enough, because that is O(n) instead of O(nlogn).
Input: nad (<optional> can be null or equal to nas (in-place)
nas
Return: nad (always: 'inverts' nas)
Notes: (1) This is intended for use with indicator arrays (0s and 1s). It gives a boolean-type output, taking the input as an integer and inverting it: 0 --> 1 anything else --> 0
Input: nas Return: nad (the inverted map), or null on error or if not invertible
Notes: (1) This requires that nas contain each integer from 0 to n-1. The array is typically an index array into a sort or permutation of another array.
Input: nas
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
&sorted (<return> 1 if sorted; 0 if not)
Return: 1 if OK; 0 on error
Notes: (1) This is a quick O(n) test if nas is sorted. It is useful in situations where the array is likely to be already sorted, and a sort operation can be avoided.
Input: nad (dest numa; add to this one)
nas (<optional> source numa; add from this one)
istart (starting index in nas)
iend (ending index in nas; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) istart < 0 is taken to mean 'read from the start' (istart = 0) (2) iend < 0 means 'read to the end' (3) if nas == NULL, this is a no-op
Input: nad (<optional> can be null or equal to na1 (in-place)
na1
na2
op (L_UNION, L_INTERSECTION, L_SUBTRACTION, L_EXCLUSIVE_OR)
Return: nad (always: operation applied to na1 and na2)
Notes: (1) The sizes of na1 and na2 must be equal. (2) nad can only be null or equal to na1. (3) This is intended for use with indicator arrays (0s and 1s). Input data is extracted as integers (0 == false, anything else == true); output results are 0 and 1. (4) L_SUBTRACTION is subtraction of val2 from val1. For bit logical arithmetic this is (val1 & ~val2), but because these values are integers, we use (val1 && !val2).
Input: nas (input numa)
thresh (threshold fraction of max; in [0.0 ... 1.0])
maxn (for normalizing; set maxn = 0.0 to use the max in nas)
Output: nad (interval abscissa pairs), or null on error
Notes: (1) For each interval where the value is less than a specified fraction of the maximum, this records the left and right "x" value.
Input: nad (can be null for new array, or the same as nas for inplace)
nas (input numa)
Return: nad (with all numbers being the absval of the input),
or null on error
Input: val
size (of numa)
Return: numa (of given size with all entries equal to 'val'),
or null on error
Input: nas (input numa)
Return: numa (of difference values val[i+1] - val[i]),
or null on error
| LEPT_DLL NUMA* numaMakeHistogram | ( | NUMA * | na, |
| l_int32 | maxbins, | ||
| l_int32 * | pbinsize, | ||
| l_int32 * | pbinstart | ||
| ) |
Input: na
maxbins (max number of histogram bins)
&binsize (<return> size of histogram bins)
&binstart (<optional return> start val of minimum bin;
input NULL to force start at 0)
Return: na consisiting of histogram of integerized values,
or null on error.
Note: (1) This simple interface is designed for integer data. The bins are of integer width and start on integer boundaries, so the results on float data will not have high precision. (2) Specify the max number of input bins. Then @binsize, the size of bins necessary to accommodate the input data, is returned. It is one of the sequence: {1, 2, 5, 10, 20, 50, ...}. (3) If &binstart is given, all values are accommodated, and the min value of the starting bin is returned. Otherwise, all negative values are discarded and the histogram bins start at 0.
Input: na (numa of floats; these may be integers)
maxbins (max number of histogram bins; >= 1)
Return: na consisiting of histogram of quantized float values,
or null on error.
Notes: (1) This simple interface is designed for accurate binning of both integer and float data. (2) If the array data is integers, and the range of integers is smaller than @maxbins, they are binned as they fall, with binsize = 1. (3) If the range of data, (maxval - minval), is larger than @maxbins, or if the data is floats, they are binned into exactly @maxbins bins. (4) Unlike numaMakeHistogram(), these bins in general have non-integer location and width, even for integer data.
Input: na
binsize (typically 1.0)
maxsize (of histogram ordinate)
Return: na (histogram of bins of size @binsize, starting with
the na[0] (x = 0.0) and going up to a maximum of
x = @maxsize, by increments of @binsize), or null on error
Notes: (1) This simple function generates a histogram of values from na, discarding all values < 0.0 or greater than min(@maxsize, maxval), where maxval is the maximum value in na. The histogram data is put in bins of size delx = @binsize, starting at x = 0.0. We use as many bins as are needed to hold the data.
| LEPT_DLL l_int32 numaMakeRankFromHistogram | ( | l_float32 | startx, |
| l_float32 | deltax, | ||
| NUMA * | nasy, | ||
| l_int32 | npts, | ||
| NUMA ** | pnax, | ||
| NUMA ** | pnay | ||
| ) |
Input: startx (xval corresponding to first element in nay)
deltax (x increment between array elements in nay)
nasy (input histogram, assumed equally spaced)
npts (number of points to evaluate rank function)
&nax (<optional return> array of x values in range)
&nay (<return> rank array of specified npts)
Return: 0 if OK, 1 on error
Input: startval
increment
size (of sequence)
Return: numa of sequence of evenly spaced values, or null on error
Input: nas (input numa)
thresh (threshold value)
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
Output: nad (indicator array: values are 0 and 1)
Notes: (1) For each element in nas, if the constraint given by 'type' correctly specifies its relation to thresh, a value of 1 is recorded in nad.
Input: nas (input histogram)
tsum (target sum of all numbers in dest histogram;
e.g., use @tsum= 1.0 if this represents a
probability distribution)
Return: nad (normalized histogram), or null on error
Input: nas
size (of sel; greater than 0, odd; origin implicitly in center)
Return: nad (opened), or null on error
Notes: (1) The structuring element (sel) is linear, all "hits" (2) If size == 1, this returns a copy
Input: size (of sequence)
seed (for random number generation)
Return: na (pseudorandom on {0,...,size - 1}), or null on error
Notes: (1) This uses the Durstenfeld shuffle. See: http://en.wikipedia.org/wiki/Fisher–Yates_shuffle. Result is a pseudorandom permutation of the sequence of integers from 0 to size - 1.
| LEPT_DLL NUMA* numaQuantizeCrossingsByWidth | ( | NUMA * | nas, |
| l_float32 | binfract, | ||
| NUMA ** | pnaehist, | ||
| NUMA ** | pnaohist, | ||
| l_int32 | debugflag | ||
| ) |
numaQuantizeCrossingsByWidth()
Input: nas (numa of crossing locations, in pixel units)
binfract (histo binsize as a fraction of minsize; e.g., 0.25)
&naehist (<optional return> histo of even (black) bar widths)
&naohist (<optional return> histo of odd (white) bar widths)
debugflag (1 to generate plots of histograms of bar widths)
Return: nad (sequence of widths, in unit sizes), or null on error
Notes: (1) This first computes the histogram of black and white bar widths, binned in appropriate units. There should be well-defined peaks, each corresponding to a specific width. The sequence of barcode widths (namely, the integers from the set {1,2,3,4}) is returned. (2) The optional returned histograms are binned in width units that are inversely proportional to @binfract. For example, if @binfract = 0.25, there are 4.0 bins in the distance of the width of the narrowest bar.
| LEPT_DLL NUMA* numaQuantizeCrossingsByWindow | ( | NUMA * | nas, |
| l_float32 | ratio, | ||
| l_float32 * | pwidth, | ||
| l_float32 * | pfirstloc, | ||
| NUMA ** | pnac, | ||
| l_int32 | debugflag | ||
| ) |
numaQuantizeCrossingsByWindow()
Input: nas (numa of crossing locations)
ratio (of max window size over min window size in search;
typ. 2.0)
&width (<optional return> best window width)
&firstloc (<optional return> center of window for first xing)
&nac (<optional return> array of window crossings (0, 1, 2))
debugflag (1 to generate various plots of intermediate results)
Return: nad (sequence of widths, in unit sizes), or null on error
Notes: (1) The minimum size of the window is set by the minimum distance between zero crossings. (2) The optional return signal @nac is a sequence of 0s, 1s, and perhaps a few 2s, giving the number of crossings in each window. On the occasion where there is a '2', it is interpreted as ending two runs: the previous one and another one that has length 1.
Input: nas (input array)
seed (for random number generation)
Return: nas (randomly shuffled array), or null on error
Input: filename Return: na, or null on error
Input: stream Return: numa, or null on error
Input: nas (input histogram)
newsize (number of old bins contained in each new bin)
Return: nad (more coarsely re-binned histogram), or null on error
Input: nas
left, right (number of elements to remove from each side)
Return: nad (with removed elements at left and right), or null on error
Input: nas Return: nad (with duplicates removed), or null on error
Input: na
index (element to be removed)
Return: 0 if OK, 1 on error
Notes: (1) This shifts na[i] --> na[i - 1] for all i > index. (2) It should not be used repeatedly on large arrays, because the function is O(n).
Input: na
index (element to be replaced)
val (new value to replace old one)
Return: 0 if OK, 1 on error
Input: nad (<optional> can be null or equal to nas)
nas (input numa)
Output: nad (reversed), or null on error
Notes: (1) Usage: numaReverse(nas, nas); // in-place nad = numaReverse(NULL, nas); // makes a new one
| LEPT_DLL l_int32 numaSelectCrossingThreshold | ( | NUMA * | nax, |
| NUMA * | nay, | ||
| l_float32 | estthresh, | ||
| l_float32 * | pbestthresh | ||
| ) |
Input: nax (<optional> numa of abscissa values; can be NULL)
nay (signal)
estthresh (estimated pixel threshold for crossing: e.g., for
images, white <--> black; typ. ~120)
&bestthresh (<return> robust estimate of threshold to use)
Return: 0 if OK, 1 on error
Note: (1) When a valid threshold is used, the number of crossings is a maximum, because none are missed. If no threshold intersects all the crossings, the crossings must be determined with numaCrossingsByPeaks(). (2) @estthresh is an input estimate of the threshold that should be used. We compute the crossings with 41 thresholds (20 below and 20 above). There is a range in which the number of crossings is a maximum. Return a threshold in the center of this stable plateau of crossings. This can then be used with numaCrossingsByThreshold() to get a good estimate of crossing locations.
Input: na
newcount
Return: 0 if OK, 1 on error
Notes: (1) If newcount <= na->nalloc, this resets na->n. Using newcount = 0 is equivalent to numaEmpty(). (2) If newcount > na->nalloc, this causes a realloc to a size na->nalloc = newcount. (3) All the previously unused values in na are set to 0.0.
Input: na
startx (x value corresponding to na[0])
delx (difference in x values for the situation where the
elements of na correspond to the evaulation of a
function at equal intervals of size @delx)
Return: 0 if OK, 1 on error
Input: na
index (to element to be set)
val (to set element)
Return: 0 if OK; 1 on error
Input: na
index (to element to change relative to the current value)
diff (increment if diff > 0 or decrement if diff < 0)
Return: 0 if OK; 1 on error
Input: na1
na2
maxdiff (use 0.0 for exact equality)
&similar (<return> 1 if similar; 0 if different)
Return: 0 if OK, 1 on error
Notes: (1) Float values can differ slightly due to roundoff and accumulated errors. Using @maxdiff > 0.0 allows similar arrays to be identified.
| LEPT_DLL l_int32 numaSimpleStats | ( | NUMA * | na, |
| l_int32 | first, | ||
| l_int32 | last, | ||
| l_float32 * | pmean, | ||
| l_float32 * | pvar, | ||
| l_float32 * | prvar | ||
| ) |
Input: na (input numa)
first (first element to use)
last (last element to use; 0 to go to the end)
&mean (<optional return> mean value)
&var (<optional return> variance)
&rvar (<optional return> rms deviation from the mean)
Return: 0 if OK, 1 on error
Input: naout (output numa; can be NULL or equal to nain)
nain (input numa)
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
Return: naout (output sorted numa), or null on error
Notes: (1) Set naout = nain for in-place; otherwise, set naout = NULL. (2) Source: Shell sort, modified from K&R, 2nd edition, p.62. Slow but simple O(n logn) sort.
Input: nas (input numa)
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
Return: naout (output sorted numa), or null on error
Notes: (1) This does either a shell sort or a bin sort, depending on the number of elements in nas and the dynamic range.
Input: nas
naindex (na that maps from the new numa to the input numa)
Return: nad (sorted), or null on error
| LEPT_DLL l_int32 numaSortGeneral | ( | NUMA * | na, |
| NUMA ** | pnasort, | ||
| NUMA ** | pnaindex, | ||
| NUMA ** | pnainvert, | ||
| l_int32 | sortorder, | ||
| l_int32 | sorttype | ||
| ) |
Input: na (source numa)
nasort (<optional> sorted numa)
naindex (<optional> index of elements in na associated
with each element of nasort)
nainvert (<optional> index of elements in nasort associated
with each element of na)
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
sorttype (L_SHELL_SORT or L_BIN_SORT)
Return: 0 if OK, 1 on error
Notes: (1) Sorting can be confusing. Here's an array of five values with the results shown for the 3 output arrays.
3 9 2 3 4 6 3 2 9 4 1 0 6 3 0 1 1 1 4 4
Note that naindex is a LUT into na for the sorted array values, and nainvert directly gives the sorted index values for the input array. It is useful to view naindex is as a map: 0 --> 2 1 --> 3 2 --> 1 3 --> 0 4 --> 4 and nainvert, the inverse of this map: 0 --> 3 1 --> 2 2 --> 0 3 --> 1 4 --> 4
We can write these relations symbolically as: nasort[i] = na[naindex[i]] na[i] = nasort[nainvert[i]]
Input: nas
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
Return: nad (indices of nas, sorted by value in nas), or null on error
Notes: (1) This does either a shell sort or a bin sort, depending on the number of elements in nas and the dynamic range.
| LEPT_DLL l_int32 numaSortPair | ( | NUMA * | nax, |
| NUMA * | nay, | ||
| l_int32 | sortorder, | ||
| NUMA ** | pnasx, | ||
| NUMA ** | pnasy | ||
| ) |
Input: nax, nay (input arrays)
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
&nasx (<return> sorted)
&naxy (<return> sorted exactly in order of nasx)
Return: 0 if OK, 1 on error
Notes: (1) This function sorts the two input arrays, nax and nay, together, using nax as the key for sorting.
| LEPT_DLL l_int32 numaSplitDistribution | ( | NUMA * | na, |
| l_float32 | scorefract, | ||
| l_int32 * | psplitindex, | ||
| l_float32 * | pave1, | ||
| l_float32 * | pave2, | ||
| l_float32 * | pnum1, | ||
| l_float32 * | pnum2, | ||
| NUMA ** | pnascore | ||
| ) |
Input: na (histogram)
scorefract (fraction of the max score, used to determine
the range over which the histogram min is searched)
&splitindex (<optional return> index for splitting)
&ave1 (<optional return> average of lower distribution)
&ave2 (<optional return> average of upper distribution)
&num1 (<optional return> population of lower distribution)
&num2 (<optional return> population of upper distribution)
&nascore (<optional return> for debugging; otherwise use null)
Return: 0 if OK, 1 on error
Notes: (1) This function is intended to be used on a distribution of values that represent two sets, such as a histogram of pixel values for an image with a fg and bg, and the goal is to determine the averages of the two sets and the best splitting point. (2) The Otsu method finds a split point that divides the distribution into two parts by maximizing a score function that is the product of two terms: (a) the square of the difference of centroids, (ave1 - ave2)^2 (b) fract1 * (1 - fract1) where fract1 is the fraction in the lower distribution. (3) This works well for images where the fg and bg are each relatively homogeneous and well-separated in color. However, if the actual fg and bg sets are very different in size, and the bg is highly varied, as can occur in some scanned document images, this will bias the split point into the larger "bump" (i.e., toward the point where the (b) term reaches its maximum of 0.25 at fract1 = 0.5. To avoid this, we define a range of values near the maximum of the score function, and choose the value within this range such that the histogram itself has a minimum value. The range is determined by scorefract: we include all abscissa values to the left and right of the value that maximizes the score, such that the score stays above (1 - scorefract) * maxscore. The intuition behind this modification is to try to find a split point that both has a high variance score and is at or near a minimum in the histogram, so that the histogram slope is small at the split point. (4) We normalize the score so that if the two distributions were of equal size and at opposite ends of the numa, the score would be 1.0.
Input: nas
subfactor (subsample factor, >= 1)
Return: nad (evenly sampled values from nas), or null on error
| LEPT_DLL NUMA* numaThresholdEdges | ( | NUMA * | nas, |
| l_float32 | thresh1, | ||
| l_float32 | thresh2, | ||
| l_float32 | maxn | ||
| ) |
Input: nas (input numa)
thresh1 (low threshold as fraction of max; in [0.0 ... 1.0])
thresh2 (high threshold as fraction of max; in [0.0 ... 1.0])
maxn (for normalizing; set maxn = 0.0 to use the max in nas)
Output: nad (edge interval triplets), or null on error
Notes: (1) For each edge interval, where where the value is less than @thresh1 on one side, greater than @thresh2 on the other, and between these thresholds throughout the interval, this records a triplet of values: the 'left' and 'right' edges, and either +1 or -1, depending on whether the edge is rising or falling. (2) No assumption is made about the value outside the array, so if the value at the array edge is between the threshold values, it is not considered part of an edge. We start looking for edge intervals only after leaving the thresholded band.
Input: nas (input numa)
nsamp (number of samples)
Output: nad (resampled array), or null on error
Notes: (1) This resamples the values in the array, using @nsamp equal divisions.
Input: na1, na2 Return: nad (with the union of the set of numbers), or null on error
Notes: (1) See sarrayUnion() for the approach. (2) Here, the key in building the sorted tree is the number itself. (3) A bucket sort approach can be used if the numbers are integers and if they are small enough, because that is O(n) instead of O(nlogn).
Input: nas
wc (half width of the convolution window)
Return: nad (after low-pass filtering), or null on error
Notes: (1) This is a convolution. The window has width = 2 * @wc + 1. (2) We add a mirrored border of size @wc to each end of the array.
Input: nas
wc (half width of the window)
Return: nad (containing windowed mean square values), or null on error
Notes: (1) The window has width = 2 * @wc + 1. (2) We add a mirrored border of size @wc to each end of the array.
Input: nas
halfwin (half width of window over which the median is found)
Return: nad (after windowed median filtering), or null on error
Notes: (1) The requested window has width = 2 * @halfwin + 1. (2) If the input nas has less then 3 elements, return a copy. (3) If the filter is too small (@halfwin <= 0), return a copy. (4) If the filter is too large, it is reduced in size. (5) We add a mirrored border of size @halfwin to each end of the array to simplify the calculation by avoiding end-effects.
| LEPT_DLL l_int32 numaWindowedStats | ( | NUMA * | nas, |
| l_int32 | wc, | ||
| NUMA ** | pnam, | ||
| NUMA ** | pnams, | ||
| NUMA ** | pnav, | ||
| NUMA ** | pnarv | ||
| ) |
Input: nas (input numa)
wc (half width of the window)
&nam (<optional return> mean value in window)
&nams (<optional return> mean square value in window)
&pnav (<optional return> variance in window)
&pnarv (<optional return> rms deviation from the mean)
Return: 0 if OK, 1 on error
Notes: (1) This is a high-level convenience function for calculating any or all of these derived arrays. (2) These statistical measures over the values in the rectangular window are:
Input: nam (windowed mean values)
nams (windowed mean square values)
&pnav (<optional return> numa of variance -- the ms deviation
from the mean)
&pnarv (<optional return> numa of rms deviation from the mean)
Return: 0 if OK, 1 on error
Notes: (1) The numas of windowed mean and mean square are precomputed, using numaWindowedMean() and numaWindowedMeanSquare(). (2) Either or both of the variance and square-root of variance are returned, where the variance is the average over the window of the mean square difference of the pixel value from the mean: <(x - <x>)*(x - <x>)> = <x*x> - <x>*<x>
Input: filename, na Return: 0 if OK, 1 on error
Input: stream, na Return: 0 if OK, 1 on error
| LEPT_DLL char* parseForProtos | ( | const char * | filein, |
| const char * | prestring | ||
| ) |
Input: string (containing numbers; not changed)
seps (string of characters that can be used between ints)
Return: numa (of numbers found), or null on error
Note: (1) The numbers can be ints or floats.
| LEPT_DLL char* pathJoin | ( | const char * | dir, |
| const char * | fname | ||
| ) |
Input: paa
box
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK, 1 on error
Notes: (1) The box can be used, for example, to hold the support region of a pixa that is being added to the pixaa.
Input: paa (input paa)
index (index of pixa in paa)
pix (to be added)
box (<optional> to be added)
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK; 1 on error
Input: paa
pixa (to be added)
copyflag:
L_INSERT inserts the pixa directly
L_COPY makes a new pixa and copies each pix and each box
L_CLONE gives a new handle to the input pixa
L_COPY_CLONE makes a new pixa and inserts clones of
all pix and boxes
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixaAccumulateSamples | ( | PIXA * | pixa, |
| PTA * | pta, | ||
| PIX ** | ppixd, | ||
| l_float32 * | px, | ||
| l_float32 * | py | ||
| ) |
Input: pixa (of samples from the same class, 1 bpp)
pta (<optional> of centroids of the samples)
&ppixd (<return> accumulated samples, 8 bpp)
&px (<optional return> average x coordinate of centroids)
&py (<optional return> average y coordinate of centroids)
Return: 0 on success, 1 on failure
Notes: (1) This generates an aligned (by centroid) sum of the input pix. (2) We use only the first 256 samples; that's plenty. (3) If pta is not input, we generate two tables, and discard after use. If this is called many times, it is better to precompute the pta.
Input: paa Return: 0 if OK, 1 on error
Notes: (1) This destroys all pixa in the pixaa, and nulls the ptrs in the pixa ptr array.
Input: n (initial number of pixa ptrs) Return: paa, or null on error
Notes: (1) A pixaa provides a 2-level hierarchy of images. A common use is for segmentation masks, which are inexpensive to store in png format. (2) For example, suppose you want a mask for each textline in a two-column page. The textline masks for each column can be represented by a pixa, of which there are 2 in the pixaa. The boxes for the textline mask components within a column can have their origin referred to the column rather than the page. Then the boxa field can be used to represent the two box (regions) for the columns, and the (x,y) components of each box can be used to get the absolute position of the textlines on the page.
Input: pixa
n (number specifying subdivision of pixa)
type (L_CHOOSE_CONSECUTIVE, L_CHOOSE_SKIP_BY)
copyflag (L_CLONE, L_COPY)
Return: paa, or null on error
Notes: (1) This subdivides a pixa into a set of smaller pixa that are accumulated into a pixaa. (2) If type == L_CHOOSE_CONSECUTIVE, the first 'n' pix are put in a pixa and added to pixaa, then the next 'n', etc. If type == L_CHOOSE_SKIP_BY, the first pixa is made by aggregating pix[0], pix[n], pix[2*n], etc. (3) The copyflag specifies if each new pix is a copy or a clone.
| LEPT_DLL PIXA* pixaAddBorderGeneral | ( | PIXA * | pixad, |
| PIXA * | pixas, | ||
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot, | ||
| l_uint32 | val | ||
| ) |
Input: pixad (can be null or equal to pixas)
pixas (containing pix of all depths; colormap ok)
left, right, top, bot (number of pixels added)
val (value of added border pixels)
Return: pixad (with border added to each pix), including on error
Notes: (1) For binary images: white: val = 0 black: val = 1 For grayscale images: white: val = 2 ** d - 1 black: val = 0 For rgb color images: white: val = 0xffffff00 black: val = 0 For colormapped images, use 'index' found this way: white: pixcmapGetRankIntensity(cmap, 1.0, &index); black: pixcmapGetRankIntensity(cmap, 0.0, &index); (2) For in-place replacement of each pix with a bordered version, use @pixad = @pixas. To make a new pixa, use @pixad = NULL. (3) In both cases, the boxa has sides adjusted as if it were expanded by the border.
Input: pixa
box
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK, 1 on error
Input: pixa
pix (to be added)
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixaAddPixWithText | ( | PIXA * | pixa, |
| PIX * | pixs, | ||
| l_int32 | reduction, | ||
| L_BMF * | bmf, | ||
| const char * | textstr, | ||
| l_uint32 | val, | ||
| l_int32 | location | ||
| ) |
Input: pixa
pixs (any depth, colormap ok)
reduction (integer subsampling factor)
bmf (<optional> bitmap font data)
textstr (<optional> text string to be added)
val (color to set the text)
location (L_ADD_ABOVE, L_ADD_BELOW, L_ADD_LEFT, L_ADD_RIGHT)
Return: 0 if OK, 1 on error.
Notes: (1) This function generates a new pix with added text, and adds it by insertion into the pixa. (2) If the input pixs is not cmapped and not 32 bpp, it is converted to 32 bpp rgb. @val is a standard 32 bpp pixel, expressed as 0xrrggbb00. If there is a colormap, this does the best it can to use the requested color, or something close. (3) if @bmf == NULL, generate an 8 pt font; this takes about 5 msec. (4) If @textstr == NULL, use the text field in the pix. (5) In general, the text string can be written in multiple lines; use newlines as the separators. (6) Typical usage is for debugging, where the pixa of labelled images is used to generate a pdf. Suggest using 1.0 for scalefactor.
| LEPT_DLL PIXA* pixaAddTextlines | ( | PIXA * | pixas, |
| L_BMF * | bmf, | ||
| SARRAY * | sa, | ||
| l_uint32 | val, | ||
| l_int32 | location | ||
| ) |
Input: pixas (input pixa; colormap ok)
bmf (bitmap font data)
sa (<optional> sarray; use text embedded in each pix if null)
val (color to set the text)
location (L_ADD_ABOVE, L_ADD_BELOW, L_ADD_LEFT, L_ADD_RIGHT)
Return: pixad (new pixa with rendered text), or null on error
Notes: (1) This function adds one or more lines of text externally to each pix, in a position given by @location. In all cases, the pix is expanded as necessary to accommodate the text. (2) @val is the pixel value to be painted through the font mask. It should be chosen to agree with the depth of pixs. If it is out of bounds, an intermediate value is chosen. For RGB, use hex notation: 0xRRGGBB00, where RR is the hex representation of the red intensity, etc. (3) If sa == NULL, use the text embedded in each pix. In all cases, newlines in the text string are used to separate the lines of text that are added to the pix. (4) If sa has a smaller count than pixa, issue a warning and do not use any embedded text. (5) If there is a colormap, this does the best it can to use the requested color, or something similar to it.
| LEPT_DLL PIXA* pixaAddTextNumber | ( | PIXA * | pixas, |
| L_BMF * | bmf, | ||
| NUMA * | na, | ||
| l_uint32 | val, | ||
| l_int32 | location | ||
| ) |
Input: pixas (input pixa; colormap ok)
bmf (bitmap font data)
numa (<optional> number array; use 1 ... n if null)
val (color to set the text)
location (L_ADD_ABOVE, L_ADD_BELOW, L_ADD_LEFT, L_ADD_RIGHT)
Return: pixad (new pixa with rendered numbers), or null on error
Notes: (1) Typical usage is for labelling each pix in a pixa with a number. (2) This function paints numbers external to each pix, in a position given by @location. In all cases, the pix is expanded on on side and the number is painted over white in the added region. (3) @val is the pixel value to be painted through the font mask. It should be chosen to agree with the depth of pixs. If it is out of bounds, an intermediate value is chosen. For RGB, use hex notation: 0xRRGGBB00, where RR is the hex representation of the red intensity, etc. (4) If na == NULL, number each pix sequentially, starting with 1. (5) If there is a colormap, this does the best it can to use the requested color, or something similar to it.
Input: &paa <to be nulled> Return: void
Input: paa
w, h (if set to 0, determines the size from the
b.b. of the components in paa)
Return: pix, or null on error
Notes: (1) Each pix of the paa is displayed at the location given by its box, translated by the box of the containing pixa if it exists.
Input: paa (with pix that may have different depths)
xspace between pix in pixa
yspace between pixa
max width of output pix
Return: pixd, or null on error
Notes: (1) Displays each pixa on a line (or set of lines), in order from top to bottom. Within each pixa, the pix are displayed in order from left to right. (2) The sizes and depths of each pix can differ. The output pix has a depth equal to the max depth of all the pix. (3) This ignores the boxa of the paa.
| LEPT_DLL PIXA* pixaaDisplayTiledAndScaled | ( | PIXAA * | paa, |
| l_int32 | outdepth, | ||
| l_int32 | tilewidth, | ||
| l_int32 | ncols, | ||
| l_int32 | background, | ||
| l_int32 | spacing, | ||
| l_int32 | border | ||
| ) |
Input: paa
outdepth (output depth: 1, 8 or 32 bpp)
tilewidth (each pix is scaled to this width)
ncols (number of tiles in each row)
background (0 for white, 1 for black; this is the color
of the spacing between the images)
spacing (between images, and on outside)
border (width of additional black border on each image;
use 0 for no border)
Return: pixa (of tiled images, one image for each pixa in
the paa), or null on error
Notes: (1) For each pixa, this generates from all the pix a tiled/scaled output pix, and puts it in the output pixa. (2) See comments in pixaDisplayTiledAndScaled().
Input: paa Return: 0 if OK; 1 on error
Input: paa
&naindex (<optional return> the pixa index in the pixaa)
copyflag (L_COPY or L_CLONE)
Return: pixa, or null on error
Notes: (1) This 'flattens' the pixaa to a pixa, taking the pix in order in the first pixa, then the second, etc. (2) If &naindex is defined, we generate a Numa that gives, for each pix in the pixaa, the index of the pixa to which it belongs.
Input: paa
accesstype (L_COPY, L_CLONE)
Return: boxa, or null on error
Notes: (1) L_COPY returns a copy; L_CLONE returns a new reference to the boxa. (2) In both cases, invoke boxaDestroy() on the returned boxa.
Input: paa
&na (<optional return> number of pix in each pixa)
Return: count, or 0 if no pixaa
Notes: (1) If paa is empty, a returned na will also be empty.
Input: paa
index (index into the pixa array in the pixaa)
ipix (index into the pix array in the pixa)
accessflag (L_COPY or L_CLONE)
Return: pix, or null on error
Input: paa
index (to the index-th pixa)
accesstype (L_COPY, L_CLONE, L_COPY_CLONE)
Return: pixa, or null on error
Notes: (1) L_COPY makes a new pixa with a copy of every pix (2) L_CLONE just makes a new reference to the pixa, and bumps the counter. You would use this, for example, when you need to extract some data from a pix within a pixa within a pixaa. (3) L_COPY_CLONE makes a new pixa with a clone of every pix and box (4) In all cases, you must invoke pixaDestroy() on the returned pixa
Input: paa (typically empty)
pixa (to be replicated into the entire pixa ptr array)
Return: 0 if OK, 1 on error
Notes: (1) This initializes a pixaa by filling up the entire pixa ptr array with copies of @pixa. Any existing pixa are destroyed. (2) Example usage. This function is useful to prepare for a random insertion (or replacement) of pixa into a pixaa. To randomly insert pixa into a pixaa, up to some index "max": Pixaa *paa = pixaaCreate(max); Pixa *pixa = pixaCreate(1); // if you want little memory pixaaInitFull(paa, pixa); // copy it to entire array pixaDestroy(&pixa); // no longer needed The initialization allows the pixaa to always be properly filled.
Input: paa
&full (<return> 1 if all pixa in the paa have full pix arrays)
Return: return 0 if OK, 1 on error
Notes: (1) Does not require boxa associated with each pixa to be full.
Input: paad (dest pixaa; add to this one)
paas (<optional> source pixaa; add from this one)
istart (starting index in pixaas)
iend (ending index in pixaas; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) This appends a clone of each indicated pixa in paas to pixaad (2) istart < 0 is taken to mean 'read from the start' (istart = 0) (3) iend < 0 means 'read to the end'
Input: pixa
&hascmap (<return> 1 if any pix has a colormap; 0 otherwise)
Return: 0 if OK; 1 on error
Input: filename Return: paa, or null on error
Notes: (1) The pix are stored in the file as png. If the png library is not linked, this will fail.
| LEPT_DLL PIXAA* pixaaReadFromFiles | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | first, | ||
| l_int32 | nfiles | ||
| ) |
Input: dirname (directory)
substr (<optional> substring filter on filenames; can be NULL)
first (0-based)
nfiles (use 0 for everything from @first to the end)
Return: paa, or null on error or if no pixa files are found.
Notes: (1) The files must be serialized pixa files (e.g., *.pa) If some files cannot be read, warnings are issued. (2) Use @substr to filter filenames in the directory. If @substr == NULL, this takes all files. (3) After filtering, use @first and @nfiles to select a contiguous set of files, that have been lexically sorted in increasing order.
Input: stream Return: paa, or null on error
Notes: (1) The pix are stored in the file as png. If the png library is not linked, this will fail.
Input: paa
index (to the index-th pixa)
pixa (insert to replace existing one)
Return: 0 if OK, 1 on error
Notes: (1) This allows random insertion of a pixa into a pixaa, with destruction of any existing pixa at that location. The input pixa is now owned by the pixaa. (2) No other pixa in the array are affected. (3) The index must be within the allowed set.
Input: paas
wd (target width; use 0 if using height as target)
hd (target height; use 0 if using width as target)
Return: paad, or null on error
Notes: (1) This guarantees that each output scaled image has the dimension(s) you specify.
Input: paas
nawd (<optional> target widths; use NULL if using height)
nahd (<optional> target height; use NULL if using width)
Return: paad, or null on error
Notes: (1) This guarantees that the scaled images in each pixa have the dimension(s) you specify in the numas.
Input: paas
first (use 0 to select from the beginning)
last (use 0 to select to the end)
copyflag (L_COPY, L_CLONE)
Return: paad, or null on error
Notes: (1) The copyflag specifies what we do with each pixa from paas. Specifically, L_CLONE inserts a clone into paad of each selected pixa from paas.
| LEPT_DLL l_int32 pixaaSizeRange | ( | PIXAA * | paa, |
| l_int32 * | pminw, | ||
| l_int32 * | pminh, | ||
| l_int32 * | pmaxw, | ||
| l_int32 * | pmaxh | ||
| ) |
Input: paa
&minw, &minh, &maxw, &maxh (<optional return> range of
dimensions of all boxes)
Return: 0 if OK, 1 on error
Input: paa Return: 0 if OK, 1 on error
Notes: (1) This identifies the largest index containing a pixa that has any pix within it, destroys all pixa above that index, and resets the count.
Input: paa
&maxdepth (<optional return> max depth of all pix in pixaa)
Return: depth (return 0 if they're not all the same, or on error)
Input: filename
paa
Return: 0 if OK, 1 on error
Notes: (1) The pix are stored in the file as png. If the png library is not linked, this will fail.
Input: stream (opened for "wb")
paa
Return: 0 if OK, 1 on error
Notes: (1) The pix are stored in the file as png. If the png library is not linked, this will fail.
| LEPT_DLL PIXA* pixaBinSort | ( | PIXA * | pixas, |
| l_int32 | sorttype, | ||
| l_int32 | sortorder, | ||
| NUMA ** | pnaindex, | ||
| l_int32 | copyflag | ||
| ) |
Input: pixas
sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER)
sortorder (L_SORT_INCREASING, L_SORT_DECREASING)
&naindex (<optional return> index of sorted order into
original array)
copyflag (L_COPY, L_CLONE)
Return: pixad (sorted version of pixas), or null on error
Notes: (1) This sorts based on the data in the boxa. If the boxa count is not the same as the pixa count, this returns an error. (2) The copyflag refers to the pix and box copies that are inserted into the sorted pixa. These are either L_COPY or L_CLONE. (3) For a large number of boxes (say, greater than 1000), this O(n) binsort is much faster than the O(nlogn) shellsort. For 5000 components, this is over 20x faster than boxaSort(). (4) Consequently, pixaSort() calls this function if it will likely go much faster.
Input: pix (8 bpp; no colormap)
box (<optional> clipping box for region; can be null)
Return: na of abs val pixel difference averages by column,
or null on error
Notes: (1) This is an average over differences of adjacent pixels along each column. (2) To resample for a bin size different from 1, use numaUniformSampling() on the result of this function.
Input: pix (8 bpp; no colormap)
box (<optional> clipping box for region; can be null)
Return: na of abs val pixel difference averages by row, or null on error
Notes: (1) This is an average over differences of adjacent pixels along each row. (2) To resample for a bin size different from 1, use numaUniformSampling() on the result of this function.
Input: pixs1, pixs2 (both either 8 or 16 bpp gray, or 32 bpp RGB) Return: pixd, or null on error
Notes: (1) The depth of pixs1 and pixs2 must be equal. (2) Clips computation to the min size, aligning the UL corners (3) For 8 and 16 bpp, assumes one gray component. (4) For 32 bpp, assumes 3 color components, and ignores the LSB of each word (the alpha channel) (5) Computes the absolute value of the difference between each component value.
Input: pix (8 bpp; not cmapped)
box (<optional> if null, use entire image)
dir (differences along L_HORIZONTAL_LINE or L_VERTICAL_LINE)
&absdiff (<return> average of abs diff pixel values in region)
Return: 0 if OK; 1 on error
Notes: (1) This gives the average over the abs val of differences of adjacent pixels values, along either each row: dir == L_HORIZONTAL_LINE column: dir == L_VERTICAL_LINE
| LEPT_DLL l_int32 pixAbsDiffOnLine | ( | PIX * | pix, |
| l_int32 | x1, | ||
| l_int32 | y1, | ||
| l_int32 | x2, | ||
| l_int32 | y2, | ||
| l_float32 * | pabsdiff | ||
| ) |
Input: pix (8 bpp; not cmapped)
x1, y1 (first point; x1 <= x2, y1 <= y2)
x2, y2 (first point)
&absdiff (<return> average of abs diff pixel values on line)
Return: 0 if OK; 1 on error
Notes: (1) This gives the average over the abs val of differences of adjacent pixels values, along a line that is either horizontal or vertical. (2) If horizontal, require x1 < x2; if vertical, require y1 < y2.
Input: pixacc
pix (to be added)
Return: 0 if OK, 1 on error
Input: w, h (of 32 bpp internal Pix)
negflag (0 if only positive numbers are involved;
1 if there will be negative numbers)
Return: pixacc, or null on error
Notes: (1) Use @negflag = 1 for safety if any negative numbers are going to be used in the chain of operations. Negative numbers arise, e.g., by subtracting a pix, or by adding a pix that has been pre-multiplied by a negative number. (2) Initializes the internal 32 bpp pix, similarly to the initialization in pixInitAccumulate().
Input: pix
negflag (0 if only positive numbers are involved;
1 if there will be negative numbers)
Return: pixacc, or null on error
Notes: (1) See pixaccCreate()
Input: pixacc
outdepth (8, 16 or 32 bpp)
Return: pixd (8 , 16 or 32 bpp), or null on error
Input: pixacc Return: offset, or -1 on error
Input: pixacc Return: pix, or null on error
Input: pixacc
factor
Return: 0 if OK, 1 on error
Input: pixacc
pix
factor
Return: 0 if OK, 1 on error
Notes: (1) This creates a temp pix that is @pix multiplied by the constant @factor. It then adds that into @pixacc.
Input: pixacc
pix (to be subtracted)
Return: 0 if OK, 1 on error
Input: pixd (32 bpp)
pixs (1, 8, 16 or 32 bpp)
op (L_ARITH_ADD or L_ARITH_SUBTRACT)
Return: 0 if OK; 1 on error
Notes: (1) This adds or subtracts each pixs value from pixd. (2) This clips to the minimum of pixs and pixd, so they do not need to be the same size. (3) The alignment is to the origin (UL corner) of pixs & pixd.
Input: pixa of components (1 or 8 bpp)
Return: pta of centroids relative to the UL corner of
each pix, or null on error
Notes: (1) An error message is returned if any pix has something other than 1 bpp or 8 bpp depth, and the centroid from that pix is saved as (0, 0).
Input: pixa Return: 0 if OK, 1 on error
Input: pixa Return: 0 if OK, 1 on error
Notes: (1) This destroys all pix in the pixa, as well as all boxes in the boxa. The ptrs in the pix ptr array are all null'd. The number of allocated pix, n, is set to 0.
Input: pixas
pixs
Return: pixad, or null on error
Notes: (1) This is intended for use in situations where pixas was originally generated from the input pixs. (2) Returns a pixad where each pix in pixas is ANDed with its associated region of the input pixs. This region is specified by the the box that is associated with the pix. (3) In a typical application of this function, pixas has a set of region masks, so this generates a pixa of the parts of pixs that correspond to each region mask component, along with the bounding box for the region.
Input: pixac
box
copyflag (L_INSERT, L_COPY)
Return: 0 if OK, 1 on error
Input: pixac
pix (to be added)
comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
Return: 0 if OK; 1 on error
Notes: (1) The array is filled up to the (n-1)-th element, and this converts the input pix to a pixcomp and adds it at the n-th position.
Input: pixac
pixc (to be added by insertion)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixaComparePhotoRegionsByHisto | ( | PIXA * | pixa, |
| l_float32 | minratio, | ||
| l_float32 | textthresh, | ||
| l_int32 | factor, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_float32 | simthresh, | ||
| NUMA ** | pnai, | ||
| l_float32 ** | pscores, | ||
| PIX ** | ppixd | ||
| ) |
pixaComparePhotoRegionsByHisto()
Input: pixa (any depth; colormap OK)
minratio (requiring sizes be compatible; < 1.0)
factor (subsampling; >= 1)
textthresh (threshold for text/photo; use 0 for default)
nx, ny (number of subregions to use for histograms; e.g. 3x3)
simthresh (threshold for similarity; use 0 for default)
&nai (<return> array giving similarity class indices)
&scores (<optional return> score matrix as 1-D array of
size N^2)
&pixd (<optional return> pix of similarity classes)
Return: 0 if OK, 1 on error
Notes: (1) This function takes a pixa of cropped photo images and compares each one to the others for similarity. Each image is first tested to see if it is a photo that can be compared by tiled histograms. If so, it is padded to put the centroid in the center of the image, and the histograms are generated. The final step of comparing each histogram with all the others is very fast. (2) An initial filter gives @score = 0 if the ratio of widths and heights (smallest / largest) does not exceed a threshold @minratio. If set at 1.0, both images must be exactly the same size. A typical value for @minratio is 0.9. (3) The comparison score between two images is a value in [0.0 .. 1.0]. If the comparison score >= @simthresh, the images are placed in the same similarity class. Default value for @simthresh is 0.25. (4) An array @nai of similarity class indices for pix in the input pixa is returned. (5) There are two debugging options:
| LEPT_DLL l_int32 pixacompConvertToPdf | ( | PIXAC * | pixac, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: pixac (containing images all at the same resolution)
res (override the resolution of each input image, in ppi;
use 0 to respect the resolution embedded in the input)
scalefactor (scaling factor applied to each image; > 0.0)
type (encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
L_FLATE_ENCODE, or L_DEFAULT_ENCODE for default)
quality (used for JPEG only; 0 for default (75))
title (<optional> pdf title)
fileout (pdf file of all images)
Return: 0 if OK, 1 on error
Notes: (1) This follows closely the function pixaConvertToPdf() in pdfio.c. (2) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without colormap and many colors, or 32 bpp; FLATE for anything else. (3) The scalefactor must be > 0.0; otherwise it is set to 1.0. (4) Specifying one of the three encoding types for @type forces all images to be compressed with that type. Use 0 to have the type determined for each image based on depth and whether or not it has a colormap.
| LEPT_DLL l_int32 pixacompConvertToPdfData | ( | PIXAC * | pixac, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
Input: pixac (containing images all at the same resolution)
res (input resolution of all images)
scalefactor (scaling factor applied to each image; > 0.0)
type (encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
L_FLATE_ENCODE, or L_DEFAULT_ENCODE for default)
quality (used for JPEG only; 0 for default (75))
title (<optional> pdf title)
&data (<return> output pdf data (of all images)
&nbytes (<return> size of output pdf data)
Return: 0 if OK, 1 on error
Notes: (1) See pixacompConvertToPdf().
Input: n (initial number of ptrs) Return: pixac, or null on error
| LEPT_DLL PIXAC* pixacompCreateFromFiles | ( | const char * | dirname, |
| const char * | substr, | ||
| l_int32 | comptype | ||
| ) |
Input: dirname
substr (<optional> substring filter on filenames; can be null)
comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
Return: pixac, or null on error
Notes: (1) @dirname is the full path for the directory. (2) @substr is the part of the file name (excluding the directory) that is to be matched. All matching filenames are read into the Pixa. If substr is NULL, all filenames are read into the Pixa. (3) Use @comptype == IFF_DEFAULT to have the compression type automatically determined for each file. (4) If the comptype is invalid for a file, the default will be substituted.
Input: pixa
comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
accesstype (L_COPY, L_CLONE, L_COPY_CLONE)
Return: 0 if OK, 1 on error
Notes: (1) If @format == IFF_DEFAULT, the conversion format for each image is chosen automatically. Otherwise, we use the specified format unless it can't be done (e.g., jpeg for a 1, 2 or 4 bpp pix, or a pix with a colormap), in which case we use the default (assumed best) compression. (2) @accesstype is used to extract a boxa from @pixa.
Input: sarray (full pathnames for all files)
comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
Return: pixac, or null on error
Notes: (1) Use @comptype == IFF_DEFAULT to have the compression type automatically determined for each file. (2) If the comptype is invalid for a file, the default will be substituted.
Input: n (initial number of ptrs)
offset (difference: accessor index - pixacomp array index)
pix (<optional> initialize each ptr in pixacomp to this pix;
can be NULL)
comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
Return: pixac, or null on error
Notes: (1) Initializes a pixacomp to be fully populated with @pix, compressed using @comptype. If @pix == NULL, @comptype is ignored. (2) Typically, the array is initialized with a tiny pix. This is most easily done by setting @pix == NULL, causing initialization of each array element with a tiny placeholder pix (w = h = d = 1), using comptype = IFF_TIFF_G4 . (3) Example usage: // Generate pixacomp for pages 30 - 49. This has an array // size of 20 and the page number offset is 30. PixaComp *pixac = pixacompCreateWithInit(20, 30, NULL, IFF_TIFF_G4); // Now insert png-compressed images into the initialized array for (pageno = 30; pageno < 50; pageno++) { Pix *pixt = ... // derived from image[pageno] if (pixt) pixacompReplacePix(pixac, pageno, pixt, IFF_PNG); pixDestroy(&pixt); } The result is a pixac with 20 compressed strings, and with selected pixt replacing the placeholders. To extract the image for page 38, which is decompressed from element 8 in the array, use: pixt = pixacompGetPix(pixac, 38);
Input: &pixac (<to be nulled>) Return: void
Notes: (1) Always nulls the input ptr.
| LEPT_DLL PIX* pixacompDisplayTiledAndScaled | ( | PIXAC * | pixac, |
| l_int32 | outdepth, | ||
| l_int32 | tilewidth, | ||
| l_int32 | ncols, | ||
| l_int32 | background, | ||
| l_int32 | spacing, | ||
| l_int32 | border | ||
| ) |
pixacompDisplayTiledAndScaled()
Input: pixac
outdepth (output depth: 1, 8 or 32 bpp)
tilewidth (each pix is scaled to this width)
ncols (number of tiles in each row)
background (0 for white, 1 for black; this is the color
of the spacing between the images)
spacing (between images, and on outside)
border (width of additional black border on each image;
use 0 for no border)
Return: pix of tiled images, or null on error
Notes: (1) This is the same function as pixaDisplayTiledAndScaled(), except it works on a Pixacomp instead of a Pix. It is particularly useful for showing the images in a Pixacomp at reduced resolution. (2) This can be used to tile a number of renderings of an image that are at different scales and depths. (3) Each image, after scaling and optionally adding the black border, has width 'tilewidth'. Thus, the border does not affect the spacing between the image tiles. The maximum allowed border width is tilewidth / 5.
Input: pixac
index (caller's view of index within pixac; includes offset)
accesstype (L_COPY or L_CLONE)
Return: box (if null, not automatically an error), or null on error
Notes: (1) The @index includes the offset, which must be subtracted to get the actual index into the ptr array. (2) There is always a boxa with a pixac, and it is initialized so that each box ptr is NULL. (3) In general, we expect that there is either a box associated with each pixc, or no boxes at all in the boxa. (4) Having no boxes is thus not an automatic error. Whether it is an actual error is determined by the calling program. If the caller expects to get a box, it is an error; see, e.g., pixacGetBoxGeometry().
Input: pixac
accesstype (L_COPY, L_CLONE, L_COPY_CLONE)
Return: boxa, or null on error
Input: pixac Return: count, or 0 on error
| LEPT_DLL l_int32 pixacompGetBoxGeometry | ( | PIXAC * | pixac, |
| l_int32 | index, | ||
| l_int32 * | px, | ||
| l_int32 * | py, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph | ||
| ) |
Input: pixac
index (caller's view of index within pixac; includes offset)
&x, &y, &w, &h (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Notes: (1) The @index includes the offset, which must be subtracted to get the actual index into the ptr array.
Input: pixac Return: count, or 0 if no pixa
Input: pixac Return: offset, or 0 on error
Notes: (1) The offset is the difference between the caller's view of the index into the array and the actual array index. By default it is 0.
Input: pixac
index (caller's view of index within pixac; includes offset)
Return: pix, or null on error
Notes: (1) The @index includes the offset, which must be subtracted to get the actual index into the ptr array.
Input: pixac
index (caller's view of index within pixac; includes offset)
Return: pixc, or null on error
Notes: (1) The @index includes the offset, which must be subtracted to get the actual index into the ptr array. (2) Important: this is just a ptr to the pixc owned by the pixac. Do not destroy unless you are replacing the pixc.
| LEPT_DLL l_int32 pixacompGetPixDimensions | ( | PIXAC * | pixac, |
| l_int32 | index, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pd | ||
| ) |
Input: pixa
index (caller's view of index within pixac; includes offset)
&w, &h, &d (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Notes: (1) The @index includes the offset, which must be subtracted to get the actual index into the ptr array.
Input: filename Return: pixac, or null on error
Notes: (1) Unlike the situation with serialized Pixa, where the image data is stored in png format, the Pixacomp image data can be stored in tiffg4, png and jpg formats.
Input: stream Return: pixac, or null on error
Input: pixac
index (caller's view of index within pixac; includes offset)
pix (owned by the caller)
comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
Return: 0 if OK; 1 on error
Notes: (1) The @index includes the offset, which must be subtracted to get the actual index into the ptr array. (2) The input @pix is converted to a pixc, which is then inserted into the pixac.
Input: pixac
index (caller's view of index within pixac; includes offset)
pixc (to replace existing one, which is destroyed)
Return: 0 if OK; 1 on error
Notes: (1) The @index includes the offset, which must be subtracted to get the actual index into the ptr array. (2) The inserted @pixc is now owned by the pixac. The caller must not destroy it.
Input: pixac
offset (non-negative)
Return: 0 if OK, 1 on error
Notes: (1) The offset is the difference between the caller's view of the index into the array and the actual array index. By default it is 0.
Input: filename
pixac
Return: 0 if OK, 1 on error
Notes: (1) Unlike the situation with serialized Pixa, where the image data is stored in png format, the Pixacomp image data can be stored in tiffg4, png and jpg formats.
Input: stream
pixac
Return: 0 if OK, 1 on error
Input: fp (file stream)
pixac
text (<optional> identifying string; can be null)
Return: 0 if OK, 1 on error
Input: pixas
thresh (threshold for final binarization from 8 bpp gray)
Return: pixad, or null on error
Input: pixas Return: pixad (32 bpp rgb), or null on error
Notes: (1) See notes for pixConvertTo32(), applied to each pix in pixas.
Input: pixas
cmapflag (1 to give pixd a colormap; 0 otherwise)
Return: pixad (each pix is 8 bpp), or null on error
Notes: (1) See notes for pixConvertTo8(), applied to each pix in pixas.
Input: pixas
ditherflag (1 to dither if necessary; 0 otherwise)
Return: pixad (each pix is 8 bpp), or null on error
Notes: (1) See notes for pixConvertTo8Color(), applied to each pix in pixas.
| LEPT_DLL l_int32 pixaConvertToPdf | ( | PIXA * | pixa, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: pixa (containing images all at the same resolution)
res (override the resolution of each input image, in ppi;
use 0 to respect the resolution embedded in the input)
scalefactor (scaling factor applied to each image; > 0.0)
type (encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
L_FLATE_ENCODE, or 0 for default)
quality (used for JPEG only; 0 for default (75))
title (<optional> pdf title; if null, taken from the first
image filename)
fileout (pdf file of all images)
Return: 0 if OK, 1 on error
Notes: (1) The images are encoded with G4 if 1 bpp; JPEG if 8 bpp without colormap and many colors, or 32 bpp; FLATE for anything else. (2) The scalefactor must be > 0.0; otherwise it is set to 1.0. (3) Specifying one of the three encoding types for @type forces all images to be compressed with that type. Use 0 to have the type determined for each image based on depth and whether or not it has a colormap.
| LEPT_DLL l_int32 pixaConvertToPdfData | ( | PIXA * | pixa, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
Input: pixa (containing images all at the same resolution)
res (input resolution of all images)
scalefactor (scaling factor applied to each image; > 0.0)
type (encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
L_FLATE_ENCODE, or 0 for default)
quality (used for JPEG only; 0 for default (75))
title (<optional> pdf title)
&data (<return> output pdf data (of all images)
&nbytes (<return> size of output pdf data)
Return: 0 if OK, 1 on error
Notes: (1) See pixaConvertToPdf().
Input: pixas Return: pixad, or null on error
Notes: (1) If any pix has a colormap, they are all converted to rgb. Otherwise, they are all converted to the maximum depth of all the pix. (2) This can be used to allow lossless rendering onto a single pix.
Input: pixas
copyflag (see pix.h for details):
L_COPY makes a new pixa and copies each pix and each box
L_CLONE gives a new ref-counted handle to the input pixa
L_COPY_CLONE makes a new pixa and inserts clones of
all pix and boxes
Return: new pixa, or null on error
Input: pixa (array of 1 bpp pix) Return: na of ON pixels in each pix, or null on error
Input: pixa
&ntext (<return> number of pix with non-empty text strings)
Return: 0 if OK, 1 on error.
Notes: (1) All pix have non-empty text strings if the returned value @ntext equals the pixa count.
Input: n (initial number of ptrs) Return: pixa, or null on error
Notes: (1) This creates an empty boxa.
Input: pixs
boxa
&cropwarn (<optional return> TRUE if the boxa extent
is larger than pixs.
Return: pixad, or null on error
Notes: (1) This simply extracts from pixs the region corresponding to each box in the boxa. (2) The 3rd arg is optional. If the extent of the boxa exceeds the size of the pixa, so that some boxes are either clipped or entirely outside the pix, a warning is returned as TRUE. (3) pixad will have only the properly clipped elements, and the internal boxa will be correct.
Input: pixs (with individual components on a lattice)
n (number of components)
cellw (width of each cell)
cellh (height of each cell)
Return: pixa, or null on error
Notes: (1) For bpp = 1, we truncate each retrieved pix to the ON pixels, which we assume for now start at (0,0)
Input: pixac
accesstype (L_COPY, L_CLONE, L_COPY_CLONE; for boxa)
Return: pixa if OK, or null on error
Input: pixs (8 bpp)
pixm (<optional> 1 bpp image mask; can be null)
gamma (gamma correction; must be > 0.0; typically ~1.0)
Return: pixd (1 bpp), or null on error
Notes: (1) This is a simple convenience function for doing adaptive thresholding on a grayscale image with variable background. It uses default parameters appropriate for typical text images. (2) @pixm is a 1 bpp mask over "image" regions, which are not expected to have a white background. The mask inhibits background finding under the fg pixels of the mask. For images with both text and image, the image regions would be binarized (or quantized) by a different set of operations. (3) As @gamma is increased, the foreground pixels are reduced. (4) Under the covers: The default background value for normalization is 200, so we choose 170 for 'maxval' in pixGammaTRC. Likewise, the default foreground threshold for normalization is 60, so we choose 50 for 'minval' in pixGammaTRC. Because 170 was mapped to 255, choosing 200 for the threshold is quite safe for avoiding speckle noise from the background.
| LEPT_DLL PIX* pixAdaptThresholdToBinaryGen | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_float32 | gamma, | ||
| l_int32 | blackval, | ||
| l_int32 | whiteval, | ||
| l_int32 | thresh | ||
| ) |
pixAdaptThresholdToBinaryGen()
Input: pixs (8 bpp)
pixm (<optional> 1 bpp image mask; can be null)
gamma (gamma correction; must be > 0.0; typically ~1.0)
blackval (dark value to set to black (0))
whiteval (light value to set to white (255))
thresh (final threshold for binarization)
Return: pixd (1 bpp), or null on error
Notes: (1) This is a convenience function for doing adaptive thresholding on a grayscale image with variable background. Also see notes in pixAdaptThresholdToBinary(). (2) Reducing @gamma increases the foreground (text) pixels. Use a low value (e.g., 0.5) for images with light text. (3) For normal images, see default args in pixAdaptThresholdToBinary(). For images with very light text, these values are appropriate: gamma ~0.5 blackval ~70 whiteval ~190 thresh ~200
Input: pixd (<optional> 1 bpp, can be null or equal to pixs
pixs (1 bpp)
Return: pixd (1 bpp with colormap and non-opaque alpha),
or null on error
Notes: (1) We don't use 1 bpp colormapped images with alpha in leptonica, but we support generating them (here), writing to png, and reading the png. On reading, they are converted to 32 bpp RGBA. (2) The background (0) pixels in pixs become fully transparent, and the foreground (1) pixels are fully opaque. Thus, pixd is a 1 bpp representation of a stencil, that can be used to paint over pixels of a backing image that are masked by the foreground in pixs.
Input: pixs (any depth)
fract (fade fraction in the alpha component)
invert (1 to photometrically invert pixs)
Return: pixd (32 bpp with alpha), or null on error
Notes: (1) This is a simple alpha layer generator, where typically white has maximum transparency and black has minimum. (2) If @invert == 1, generate the same alpha layer but invert the input image photometrically. This is useful for blending over dark images, where you want dark regions in pixs, such as text, to be lighter in the blended image. (3) The fade @fract gives the minimum transparency (i.e., maximum opacity). A small fraction is useful for adding a watermark to an image. (4) If pixs has a colormap, it is removed to rgb. (5) If pixs already has an alpha layer, it is overwritten.
| LEPT_DLL PIX* pixAddBlackOrWhiteBorder | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot, | ||
| l_int32 | op | ||
| ) |
Input: pixs (all depths; colormap ok)
left, right, top, bot (number of pixels added)
op (L_GET_BLACK_VAL, L_GET_WHITE_VAL)
Return: pixd (with the added exterior pixels), or null on error
Notes: (1) See pixGetBlackOrWhiteVal() for possible side effect (adding a color to a colormap). (2) The only complication is that pixs may have a colormap. There are two ways to add the black or white border: (a) As done here (simplest, most efficient) (b) l_int32 ws, hs, d; pixGetDimensions(pixs, &ws, &hs, &d); Pix *pixd = pixCreate(ws + left + right, hs + top + bot, d); PixColormap *cmap = pixGetColormap(pixs); if (cmap != NULL) pixSetColormap(pixd, pixcmapCopy(cmap)); pixSetBlackOrWhite(pixd, L_SET_WHITE); // uses cmap pixRasterop(pixd, left, top, ws, hs, PIX_SET, pixs, 0, 0);
Input: pixs (all depths; colormap ok)
npix (number of pixels to be added to each side)
val (value of added border pixels)
Return: pixd (with the added exterior pixels), or null on error
Notes: (1) See pixGetBlackOrWhiteVal() for values of black and white pixels.
| LEPT_DLL PIX* pixAddBorderGeneral | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot, | ||
| l_uint32 | val | ||
| ) |
Input: pixs (all depths; colormap ok)
left, right, top, bot (number of pixels added)
val (value of added border pixels)
Return: pixd (with the added exterior pixels), or null on error
Notes: (1) For binary images: white: val = 0 black: val = 1 For grayscale images: white: val = 2 ** d - 1 black: val = 0 For rgb color images: white: val = 0xffffff00 black: val = 0 For colormapped images, set val to the appropriate colormap index. (2) If the added border is either black or white, you can use pixAddBlackOrWhiteBorder() The black and white values for all images can be found with pixGetBlackOrWhiteVal() which, if pixs is cmapped, may add an entry to the colormap. Alternatively, if pixs has a colormap, you can find the index of the pixel whose intensity is closest to white or black: white: pixcmapGetRankIntensity(cmap, 1.0, &index); black: pixcmapGetRankIntensity(cmap, 0.0, &index); and use that for val.
Input: pixs (8, 16 or 32 bpp)
val (amount to add to each pixel)
Return: 0 if OK, 1 on error
Notes: (1) In-place operation. (2) No clipping for 32 bpp. (3) For 8 and 16 bpp, if val > 0 the result is clipped to 0xff and 0xffff, rsp. (4) For 8 and 16 bpp, if val < 0 the result is clipped to 0.
| LEPT_DLL PIX* pixAddContinuedBorder | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: pixs
left, right, top, bot (pixels on each side to be added)
Return: pixd, or null on error
Notes: (1) This adds pixels on each side whose values are equal to the value on the closest boundary pixel.
Input: pixs (8 bpp gray or 32 bpp rgb; no colormap)
stdev (of noise)
Return: pixd (8 or 32 bpp), or null on error
Notes: (1) This adds noise to each pixel, taken from a normal distribution with zero mean and specified standard deviation.
Input: pixd (<optional>; this can be null, equal to pixs1, or
different from pixs1)
pixs1 (can be == to pixd)
pixs2
Return: pixd always
Notes: (1) Arithmetic addition of two 8, 16 or 32 bpp images. (2) For 8 and 16 bpp, we do explicit clipping to 0xff and 0xffff, respectively. (3) Alignment is to UL corner. (4) There are 3 cases. The result can go to a new dest, in-place to pixs1, or to an existing input dest:
Input: pixs (8 bpp) Return: 0 if OK, 1 on error
Notes: (1) If pixs has a colormap, this is a no-op.
Input: pixs (8 bpp) Return: 0 if OK, 1 on error
Notes: (1) This generates a colormapped version of the input image that has the same number of colormap entries as the input image has unique gray levels.
| LEPT_DLL PIX* pixAddMirroredBorder | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: pixs (all depths; colormap ok)
left, right, top, bot (number of pixels added)
Return: pixd, or null on error
Notes: (1) This applies what is effectively mirror boundary conditions. For the added border pixels in pixd, the pixels in pixs near the border are mirror-copied into the border region. (2) This is useful for avoiding special operations near boundaries when doing image processing operations such as rank filters and convolution. In use, one first adds mirrored pixels to each side of the image. The number of pixels added on each side is half the filter dimension. Then the image processing operations proceed over a region equal to the size of the original image, and write directly into a dest pix of the same size as pixs. (3) The general pixRasterop() is used for an in-place operation here because there is no overlap between the src and dest rectangles.
| LEPT_DLL PIX* pixAddMixedBorder | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: pixs (all depths; colormap ok)
left, right, top, bot (number of pixels added)
Return: pixd, or null on error
Notes: (1) This applies mirrored boundary conditions horizontally and repeated b.c. vertically. (2) It is specifically used for avoiding special operations near boundaries when convolving a hue-saturation histogram with a given window size. The repeated b.c. are used vertically for hue, and the mirrored b.c. are used horizontally for saturation. The number of pixels added on each side is approximately (but not quite) half the filter dimension. The image processing operations can then proceed over a region equal to the size of the original image, and write directly into a dest pix of the same size as pixs. (3) The general pixRasterop() can be used for an in-place operation here because there is no overlap between the src and dest rectangles.
| LEPT_DLL PIX* pixAddRepeatedBorder | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: pixs (all depths; colormap ok)
left, right, top, bot (number of pixels added)
Return: pixd, or null on error
Notes: (1) This applies a repeated border, as if the central part of the image is tiled over the plane. So, for example, the pixels in the left border come from the right side of the image. (2) The general pixRasterop() is used for an in-place operation here because there is no overlap between the src and dest rectangles.
Input: pixs1, pixs2 (32 bpp RGB, or colormapped) Return: pixd, or null on error
Notes: (1) Clips computation to the minimum size, aligning the UL corners. (2) Removes any colormap to RGB, and ignores the LSB of each pixel word (the alpha channel). (3) Adds each component value, pixelwise, clipping to 255. (4) This is useful to combine two images where most of the pixels are essentially black, such as in pixPerceptualDiff().
| LEPT_DLL PIX* pixAddSingleTextblock | ( | PIX * | pixs, |
| L_BMF * | bmf, | ||
| const char * | textstr, | ||
| l_uint32 | val, | ||
| l_int32 | location, | ||
| l_int32 * | poverflow | ||
| ) |
Input: pixs (input pix; colormap ok)
bmf (bitmap font data)
textstr (<optional> text string to be added)
val (color to set the text)
location (L_ADD_ABOVE, L_ADD_AT_TOP, L_ADD_AT_BOT, L_ADD_BELOW)
&overflow (<optional return> 1 if text overflows
allocated region and is clipped; 0 otherwise)
Return: pixd (new pix with rendered text), or either a copy
or null on error
Notes: (1) This function paints a set of lines of text over an image. If @location is L_ADD_ABOVE or L_ADD_BELOW, the pix size is expanded with a border and rendered over the border. (2) @val is the pixel value to be painted through the font mask. It should be chosen to agree with the depth of pixs. If it is out of bounds, an intermediate value is chosen. For RGB, use hex notation: 0xRRGGBB00, where RR is the hex representation of the red intensity, etc. (3) If textstr == NULL, use the text field in the pix. (4) If there is a colormap, this does the best it can to use the requested color, or something similar to it. (5) Typical usage is for labelling a pix with some text data.
Input: pix
textstring
Return: 0 if OK, 1 on error
Notes: (1) This adds the new textstring to any existing text. (2) Either or both the existing text and the new text string can be null.
| LEPT_DLL PIX* pixAddTextlines | ( | PIX * | pixs, |
| L_BMF * | bmf, | ||
| const char * | textstr, | ||
| l_uint32 | val, | ||
| l_int32 | location | ||
| ) |
Input: pixs (input pix; colormap ok)
bmf (bitmap font data)
textstr (<optional> text string to be added)
val (color to set the text)
location (L_ADD_ABOVE, L_ADD_BELOW, L_ADD_LEFT, L_ADD_RIGHT)
Return: pixd (new pix with rendered text), or either a copy
or null on error
Notes: (1) This function expands an image as required to paint one or more lines of text adjacent to the image. If @bmf == NULL, this returns a copy. If above or below, the lines are centered with respect to the image; if left or right, they are left justified. (2) @val is the pixel value to be painted through the font mask. It should be chosen to agree with the depth of pixs. If it is out of bounds, an intermediate value is chosen. For RGB, use hex notation: 0xRRGGBB00, where RR is the hex representation of the red intensity, etc. (3) If textstr == NULL, use the text field in the pix. The text field contains one or most "lines" of text, where newlines are used as line separators. (4) If there is a colormap, this does the best it can to use the requested color, or something similar to it. (5) Typical usage is for labelling a pix with some text data.
Input: pixs (1 bpp pix from which components are added; in-place)
pixa (of connected components, some of which will be put
into pixs)
na (numa indicator: add components corresponding to 1s)
Return: 0 if OK, 1 on error
Notes: (1) This complements pixRemoveWithIndicator(). Here, the selected components are added to pixs.
Input: &pixa (<can be nulled>) Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the pixa. (2) Always nulls the input ptr.
Input: pixa
w, h (if set to 0, determines the size from the
b.b. of the components in pixa)
Return: pix, or null on error
Notes: (1) This uses the boxes to place each pix in the rendered composite. (2) Set w = h = 0 to use the b.b. of the components to determine the size of the returned pix. (3) Uses the first pix in pixa to determine the depth. (4) The background is written "white". On 1 bpp, each successive pix is "painted" (adding foreground), whereas for grayscale or color each successive pix is blitted with just the src. (5) If the pixa is empty, returns an empty 1 bpp pix.
| LEPT_DLL PIX* pixaDisplayLinearly | ( | PIXA * | pixas, |
| l_int32 | direction, | ||
| l_float32 | scalefactor, | ||
| l_int32 | background, | ||
| l_int32 | spacing, | ||
| l_int32 | border, | ||
| BOXA ** | pboxa | ||
| ) |
Input: pixa
direction (L_HORIZ or L_VERT)
scalefactor (applied to every pix; use 1.0 for no scaling)
background (0 for white, 1 for black; this is the color
of the spacing between the images)
spacing (between images, and on outside)
border (width of black border added to each image;
use 0 for no border)
&boxa (<optional return> location of images in output pix
Return: pix of composite images, or null on error
Notes: (1) This puts each pix, sequentially, in a line, either horizontally or vertically. (2) If any pix has a colormap, all pix are rendered in rgb. (3) The boxa gives the location of each image.
Input: pixa
w, h (if set to 0, determines the size from the
b.b. of the components in pixa)
color (background color to use)
Return: pix, or null on error
Notes: (1) This uses the boxes to place each pix in the rendered composite. (2) Set w = h = 0 to use the b.b. of the components to determine the size of the returned pix. (3) If any pix in @pixa are colormapped, or if the pix have different depths, it returns a 32 bpp pix. Otherwise, the depth of the returned pixa equals that of the pix in @pixa. (4) If the pixa is empty, return null.
| LEPT_DLL PIX* pixaDisplayOnLattice | ( | PIXA * | pixa, |
| l_int32 | cellw, | ||
| l_int32 | cellh, | ||
| l_int32 * | pncols, | ||
| BOXA ** | pboxa | ||
| ) |
Input: pixa
cellw (lattice cell width)
cellh (lattice cell height)
&ncols (<optional return> number of columns in output lattice)
&boxa (<optional return> location of images in lattice)
Return: pix of composite images, or null on error
Notes: (1) This places each pix on sequentially on a regular lattice in the rendered composite. If a pix is too large to fit in the allocated lattice space, it is not rendered. (2) If any pix has a colormap, all pix are rendered in rgb. (3) This is useful when putting bitmaps of components, such as characters, into a single image. (4) The boxa gives the location of each image. The UL corner of each image is on a lattice cell corner. Omitted images (due to size) are assigned an invalid width and height of 0.
Input: pixa (of 1 bpp components, with boxa)
w, h (if set to 0, determines the size from the
b.b. of the components in pixa)
Return: pix (8 bpp, cmapped, with random colors on the components),
or null on error
Notes: (1) This uses the boxes to place each pix in the rendered composite. (2) By default, the background color is: black, cmap index 0. This can be changed by pixcmapResetColor()
| LEPT_DLL PIX* pixaDisplayTiled | ( | PIXA * | pixa, |
| l_int32 | maxwidth, | ||
| l_int32 | background, | ||
| l_int32 | spacing | ||
| ) |
Input: pixa
maxwidth (of output image)
background (0 for white, 1 for black)
spacing
Return: pix of tiled images, or null on error
Notes: (1) This renders a pixa to a single image of width not to exceed maxwidth, with background color either white or black, and with each subimage spaced on a regular lattice. (2) The lattice size is determined from the largest width and height, separately, of all pix in the pixa. (3) All pix in the pixa must be of equal depth. (4) If any pix has a colormap, all pix are rendered in rgb. (5) Careful: because no components are omitted, this is dangerous if there are thousands of small components and one or more very large one, because the size of the resulting pix can be huge!
| LEPT_DLL PIX* pixaDisplayTiledAndScaled | ( | PIXA * | pixa, |
| l_int32 | outdepth, | ||
| l_int32 | tilewidth, | ||
| l_int32 | ncols, | ||
| l_int32 | background, | ||
| l_int32 | spacing, | ||
| l_int32 | border | ||
| ) |
Input: pixa
outdepth (output depth: 1, 8 or 32 bpp)
tilewidth (each pix is scaled to this width)
ncols (number of tiles in each row)
background (0 for white, 1 for black; this is the color
of the spacing between the images)
spacing (between images, and on outside)
border (width of additional black border on each image;
use 0 for no border)
Return: pix of tiled images, or null on error
Notes: (1) This can be used to tile a number of renderings of an image that are at different scales and depths. (2) Each image, after scaling and optionally adding the black border, has width 'tilewidth'. Thus, the border does not affect the spacing between the image tiles. The maximum allowed border width is tilewidth / 5.
| LEPT_DLL PIX* pixaDisplayTiledByIndex | ( | PIXA * | pixa, |
| NUMA * | na, | ||
| l_int32 | width, | ||
| l_int32 | spacing, | ||
| l_int32 | border, | ||
| l_int32 | fontsize, | ||
| l_uint32 | textcolor | ||
| ) |
Input: pixa
numa (with indices corresponding to the pix in pixa)
width (each pix is scaled to this width)
spacing (between images, and on outside)
border (width of black border added to each image;
use 0 for no border)
fontsize (4, 6, ... 20)
textcolor (0xrrggbb00)
Return: pixd (of tiled images), or null on error
Notes: (1) This renders a pixa to a single image with white background color, where the pix are placed in columns given by the index value in the numa. Each pix is separated by @spacing from the adjacent ones, and an optional border is placed around them. (2) Up to 127 chars of text in the pix text field are rendered below each pix. Use newlines in the text field to write the text in multiple lines that fit within the pix width. (3) To avoid having empty columns, if there are N different index values, they should be in [0 ... N-1]. (4) All pix are converted to 32 bpp.
| LEPT_DLL PIX* pixaDisplayTiledInRows | ( | PIXA * | pixa, |
| l_int32 | outdepth, | ||
| l_int32 | maxwidth, | ||
| l_float32 | scalefactor, | ||
| l_int32 | background, | ||
| l_int32 | spacing, | ||
| l_int32 | border | ||
| ) |
Input: pixa
outdepth (output depth: 1, 8 or 32 bpp)
maxwidth (of output image)
scalefactor (applied to every pix; use 1.0 for no scaling)
background (0 for white, 1 for black; this is the color
of the spacing between the images)
spacing (between images, and on outside)
border (width of black border added to each image;
use 0 for no border)
Return: pixd (of tiled images), or null on error
Notes: (1) This renders a pixa to a single image of width not to exceed maxwidth, with background color either white or black, and with each row tiled such that the top of each pix is aligned and separated by 'spacing' from the next one. A black border can be added to each pix. (2) All pix are converted to outdepth; existing colormaps are removed. (3) This does a reasonably spacewise-efficient job of laying out the individual pix images into a tiled composite. (4) A serialized boxa giving the location in pixd of each input pix (without added border) is stored in the text string of pixd. This allows, e.g., regeneration of a pixa from pixd, using pixaCreateFromBoxa(). If there is no scaling and the depth of each input pix in the pixa is the same, this tiling operation can be inverted using the boxa (except for loss of text in each of the input pix): pix1 = pixaDisplayTiledInRows(pixa1, 1, 1500, 1.0, 0, 30, 0); char *boxatxt = pixGetText(pix1); boxa1 = boxaReadMem((l_uint8 *)boxatxt, strlen(boxatxt)); pixa2 = pixaCreateFromBoxa(pix1, boxa1, NULL);
| LEPT_DLL PIX* pixaDisplayTiledWithText | ( | PIXA * | pixa, |
| l_int32 | maxwidth, | ||
| l_float32 | scalefactor, | ||
| l_int32 | spacing, | ||
| l_int32 | border, | ||
| l_int32 | fontsize, | ||
| l_uint32 | textcolor | ||
| ) |
Input: pixa
maxwidth (of output image)
scalefactor (applied to every pix; use 1.0 for no scaling)
spacing (between images, and on outside)
border (width of black border added to each image;
use 0 for no border)
fontsize (4, 6, ... 20)
textcolor (0xrrggbb00)
Return: pixd (of tiled images), or null on error
Notes: (1) This is a version of pixaDisplayTiledInRows() that prints, below each pix, the text in the pix text field. Up to 127 chars of text in the pix text field are rendered below each pix. (2) It renders a pixa to a single image of width not to exceed @maxwidth, with white background color, with each row tiled such that the top of each pix is aligned and separated by @spacing from the next one. (3) All pix are converted to 32 bpp. (4) This does a reasonably spacewise-efficient job of laying out the individual pix images into a tiled composite.
| LEPT_DLL PIX* pixaDisplayUnsplit | ( | PIXA * | pixa, |
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_int32 | borderwidth, | ||
| l_uint32 | bordercolor | ||
| ) |
Input: pixa
nx (number of mosaic cells horizontally)
ny (number of mosaic cells vertically)
borderwidth (of added border on all sides)
bordercolor (in our RGBA format: 0xrrggbbaa)
Return: pix of tiled images, or null on error
Notes: (1) This is a logical inverse of pixaSplitPix(). It constructs a pix from a mosaic of tiles, all of equal size. (2) For added generality, a border of arbitrary color can be added to each of the tiles. (3) In use, pixa will typically have either been generated from pixaSplitPix() or will derived from a pixa that was so generated. (4) All pix in the pixa must be of equal depth, and, if colormapped, have the same colormap.
| LEPT_DLL l_int32 pixaEqual | ( | PIXA * | pixa1, |
| PIXA * | pixa2, | ||
| l_int32 | maxdist, | ||
| NUMA ** | pnaindex, | ||
| l_int32 * | psame | ||
| ) |
Input: pixa1
pixa2
maxdist
&naindex (<optional return> index array of correspondences
&same (<return> 1 if equal; 0 otherwise)
Return 0 if OK, 1 on error
Notes: (1) The two pixa are the "same" if they contain the same boxa and the same ordered set of pix. However, if they have boxa, the pix in each pixa can differ in ordering by an amount given by the parameter @maxdist. If they don't have a boxa, the @maxdist parameter is ignored, and the ordering must be identical. (2) This applies only to boxa geometry, pixels and ordering; other fields in the pix are ignored. (3) naindex[i] gives the position of the box in pixa2 that corresponds to box i in pixa1. It is only returned if the pixa have boxa and the boxa are equal. (4) In situations where the ordering is very different, so that a large @maxdist is required for "equality", this should be implemented with a hash function for efficiency.
Input: pixa Return: 0 if OK; 1 on error
Notes: (1) If necessary, reallocs new pixa and boxa ptrs arrays to @size. The pixa and boxa ptr arrays must always be equal in size.
| LEPT_DLL PIXA* pixaExtendIterative | ( | PIXA * | pixas, |
| l_int32 | type, | ||
| l_int32 | niters, | ||
| SEL * | sel, | ||
| l_int32 | include | ||
| ) |
Input: pixas
type (L_MORPH_DILATE, L_MORPH_ERODE)
niters
sel (used for dilation, erosion); uses 2x2 if null
include (1 to include a copy of the input pixas in pixad;
0 to omit)
Return: pixad (of derived pix, using all iterations), or null on error
Notes: (1) This dilates or erodes every pix in @pixas, iteratively, using the input Sel (or, if null, a 2x2 Sel by default), and puts the results in @pixad. (2) If @niters <= 0, this is a no-op; it returns a clone of pixas. (3) If = 1, the output @pixad contains all the pix in @pixas. Otherwise, it doesn't, but pixaJoin() can be used later to join pixas with pixad.
pixaExtractColumnFromEachPix()
Input: pixa (of identically sized, 8 bpp; not cmapped)
col (column index)
pixd (pix into which each column is inserted)
Return: 0 if OK, 1 on error
Input: pixs (all depths; colormap ok)
vc (vector of 6 coefficients for affine transformation)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary (2) Removes any existing colormap, if necessary, before transforming
Input: pixs (32 bpp)
vc (vector of 6 coefficients for affine transformation)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Input: pixs (8 bpp)
vc (vector of 6 coefficients for affine transformation)
grayval (0 to bring in BLACK, 255 for WHITE)
Return: pixd, or null on error
Input: pixs (all depths; colormap ok)
ptad (3 pts of final coordinate space)
ptas (3 pts of initial coordinate space)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary (2) Removes any existing colormap, if necessary, before transforming
Input: pixs (32 bpp)
ptad (3 pts of final coordinate space)
ptas (3 pts of initial coordinate space)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Input: pixs (8 bpp)
ptad (3 pts of final coordinate space)
ptas (3 pts of initial coordinate space)
grayval (0 to bring in BLACK, 255 for WHITE)
Return: pixd, or null on error
| LEPT_DLL PIX* pixAffinePtaWithAlpha | ( | PIX * | pixs, |
| PTA * | ptad, | ||
| PTA * | ptas, | ||
| PIX * | pixg, | ||
| l_float32 | fract, | ||
| l_int32 | border | ||
| ) |
Input: pixs (32 bpp rgb)
ptad (3 pts of final coordinate space)
ptas (3 pts of initial coordinate space)
pixg (<optional> 8 bpp, can be null)
fract (between 0.0 and 1.0, with 0.0 fully transparent
and 1.0 fully opaque)
border (of pixels added to capture transformed source pixels)
Return: pixd, or null on error
Notes: (1) The alpha channel is transformed separately from pixs, and aligns with it, being fully transparent outside the boundary of the transformed pixs. For pixels that are fully transparent, a blending function like pixBlendWithGrayMask() will give zero weight to corresponding pixels in pixs. (2) If pixg is NULL, it is generated as an alpha layer that is partially opaque, using @fract. Otherwise, it is cropped to pixs if required and @fract is ignored. The alpha channel in pixs is never used. (3) Colormaps are removed. (4) When pixs is transformed, it doesn't matter what color is brought in because the alpha channel will be transparent (0) there. (5) To avoid losing source pixels in the destination, it may be necessary to add a border to the source pix before doing the affine transformation. This can be any non-negative number. (6) The input @ptad and @ptas are in a coordinate space before the border is added. Internally, we compensate for this before doing the affine transform on the image after the border is added. (7) The default setting for the border values in the alpha channel is 0 (transparent) for the outermost ring of pixels and (0.5 * fract * 255) for the second ring. When blended over a second image, this (a) shrinks the visible image to make a clean overlap edge with an image below, and (b) softens the edges by weakening the aliasing there. Use l_setAlphaMaskBorder() to change these values.
Input: pixs (all depths)
vc (vector of 6 coefficients for affine transformation)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary. (2) Retains colormap, which you can do for a sampled transform.. (3) For 8 or 32 bpp, much better quality is obtained by the somewhat slower pixAffine(). See that function for relative timings between sampled and interpolated.
Input: pixs (all depths)
ptad (3 pts of final coordinate space)
ptas (3 pts of initial coordinate space)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary. (2) Retains colormap, which you can do for a sampled transform.. (3) The 3 points must not be collinear. (4) The order of the 3 points is arbitrary; however, to compare with the sequential transform they must be in these locations and in this order: origin, x-axis, y-axis. (5) For 1 bpp images, this has much better quality results than pixAffineSequential(), particularly for text. It is about 3x slower, but does not require additional border pixels. The poor quality of pixAffineSequential() is due to repeated quantized transforms. It is strongly recommended that pixAffineSampled() be used for 1 bpp images. (6) For 8 or 32 bpp, much better quality is obtained by the somewhat slower pixAffinePta(). See that function for relative timings between sampled and interpolated. (7) To repeat, use of the sequential transform, pixAffineSequential(), for any images, is discouraged.
Input: pixs
ptad (3 pts of final coordinate space)
ptas (3 pts of initial coordinate space)
bw (pixels of additional border width during computation)
bh (pixels of additional border height during computation)
Return: pixd, or null on error
Notes: (1) The 3 pts must not be collinear. (2) The 3 pts must be given in this order:
Input: pixa (of 1 bpp pix) Return: na (of area fractions for each pix), or null on error
Notes: (1) This is typically used for a pixa consisting of 1 bpp connected components.
Input: pixa (of 1 bpp pix)
pixm (mask image)
debug (1 for output, 0 to suppress)
Return: na (of ratio masked/total fractions for each pix),
or null on error
Notes: (1) This is typically used for a pixa consisting of 1 bpp connected components, which has an associated boxa giving the location of the components relative to the mask origin. (2) The debug flag displays in green and red the masked and unmasked parts of the image from which pixa was derived.
Input: pixa
&naw (<optional return> numa of pix widths)
&nah (<optional return> numa of pix heights)
Return: 0 if OK, 1 on error
Input: pixa (of 1 bpp pix)
Return: na (of fg perimeter/(2*(w+h)) ratio for each pix),
or null on error
Notes: (1) This is typically used for a pixa consisting of 1 bpp connected components. (2) This has a minimum value for a circle of pi/4; a value for a rectangle component of approx. 1.0; and a value much larger than 1.0 for a component with a highly irregular boundary.
Input: pixa (of 1 bpp pix) Return: na (of perimeter/arear ratio for each pix), or null on error
Notes: (1) This is typically used for a pixa consisting of 1 bpp connected components.
Input: pixa (of 1 bpp pix) Return: na (of width*height products for each pix), or null on error
Notes: (1) This is typically used for a pixa consisting of 1 bpp connected components.
Input: pixa (of 1 bpp pix) Return: na (of width/height ratios for each pix), or null on error
Notes: (1) This is typically used for a pixa consisting of 1 bpp connected components.
| LEPT_DLL PIXA* pixaGenerateFont | ( | PIX * | pixs, |
| l_int32 | fontsize, | ||
| l_int32 * | pbl0, | ||
| l_int32 * | pbl1, | ||
| l_int32 * | pbl2 | ||
| ) |
Input: pix (of 95 characters in 3 rows)
fontsize (4, 6, 8, ... , 20, in pts at 300 ppi)
&bl1 (<return> baseline of row 1)
&bl2 (<return> baseline of row 2)
&bl3 (<return> baseline of row 3)
Return: pixa of font bitmaps for 95 characters, or null on error
Notes: (1) This does all the work. See pixaGenerateFontFromFile() for an overview. (2) The pix is for one of the 9 fonts. @fontsize is only used here for debugging.
| LEPT_DLL PIXA* pixaGenerateFontFromFile | ( | const char * | dir, |
| l_int32 | fontsize, | ||
| l_int32 * | pbl0, | ||
| l_int32 * | pbl1, | ||
| l_int32 * | pbl2 | ||
| ) |
Input: dir (directory holding image of character set)
fontsize (4, 6, 8, ... , 20, in pts at 300 ppi)
&bl1 (<return> baseline of row 1)
&bl2 (<return> baseline of row 2)
&bl3 (<return> baseline of row 3)
Return: pixa of font bitmaps for 95 characters, or null on error
These font generation functions use 9 sets, each with bitmaps of 94 ascii characters, all in Palatino-Roman font. Each input bitmap has 3 rows of characters. The range of ascii values in each row is as follows: row 0: 32-57 (32 is a space) row 1: 58-91 (92, '\', is not represented in this font) row 2: 93-126 We LR flip the '/' char to generate a bitmap for the missing '\' character, so that we have representations of all 95 printable chars.
Typically, use pixaGetFont() to generate the character bitmaps in memory for a bmf. This will simply access the bitmap files in a serialized pixa that were produced in prog/genfonts.c using this function.
| LEPT_DLL PIXA* pixaGenerateFontFromString | ( | l_int32 | fontsize, |
| l_int32 * | pbl0, | ||
| l_int32 * | pbl1, | ||
| l_int32 * | pbl2 | ||
| ) |
Input: fontsize (4, 6, 8, ... , 20, in pts at 300 ppi)
&bl1 (<return> baseline of row 1)
&bl2 (<return> baseline of row 2)
&bl3 (<return> baseline of row 3)
Return: pixa of font bitmaps for 95 characters, or null on error
Notes: (1) See pixaGenerateFontFromFile() for details.
Input: pixa (of identically sized, 8 bpp pix; not cmapped)
type (L_MEAN_ABSVAL, L_MEDIAN_VAL, L_MODE_VAL, L_MODE_COUNT)
nbins (of histogram for median and mode; ignored for mean)
thresh (on histogram for mode val; ignored for all other types)
Return: pix (with pixelwise aligned stats), or null on error.
Notes: (1) Each pixel in the returned pix represents an average (or median, or mode) over the corresponding pixels in each pix in the pixa. (2) The @thresh parameter works with L_MODE_VAL only, and sets a minimum occupancy of the mode bin. If the occupancy of the mode bin is less than @thresh, the mode value is returned as 0. To always return the actual mode value, set @thresh = 0. See pixGetRowStats().
Input: pixa
index (to the index-th pix)
accesstype (L_COPY or L_CLONE)
Return: box (if null, not automatically an error), or null on error
Notes: (1) There is always a boxa with a pixa, and it is initialized so that each box ptr is NULL. (2) In general, we expect that there is either a box associated with each pix, or no boxes at all in the boxa. (3) Having no boxes is thus not an automatic error. Whether it is an actual error is determined by the calling program. If the caller expects to get a box, it is an error; see, e.g., pixaGetBoxGeometry().
Input: pixa
accesstype (L_COPY, L_CLONE, L_COPY_CLONE)
Return: boxa, or null on error
Input: pixa Return: count, or 0 on error
| LEPT_DLL l_int32 pixaGetBoxGeometry | ( | PIXA * | pixa, |
| l_int32 | index, | ||
| l_int32 * | px, | ||
| l_int32 * | py, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph | ||
| ) |
Input: pixa
index (to the index-th box)
&x, &y, &w, &h (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Input: pixa Return: count, or 0 if no pixa
Input: pixa
&maxdepth (<optional return> max pixel depth of pix in pixa)
&same (<optional return> true if all depths are equal)
Return: 0 if OK; 1 on error
| LEPT_DLL PIXA* pixaGetFont | ( | const char * | dir, |
| l_int32 | fontsize, | ||
| l_int32 * | pbl0, | ||
| l_int32 * | pbl1, | ||
| l_int32 * | pbl2 | ||
| ) |
Input: dir (directory holding pixa of character set)
fontsize (4, 6, 8, ... , 20)
&bl1 (<return> baseline of row 1)
&bl2 (<return> baseline of row 2)
&bl3 (<return> baseline of row 3)
Return: pixa of font bitmaps for 95 characters, or null on error
Notes: (1) This reads a pre-computed pixa file with the 95 ascii chars.
Input: pixa (of pix that all have the same depth)
&size (<optional return> number of pix in the pixa)
Return: array of array of line ptrs, or null on error
Notes: (1) See pixGetLinePtrs() for details. (2) It is best if all pix in the pixa are the same size. The size of each line ptr array is equal to the height of the pix that it refers to. (3) This is an array of arrays. To destroy it: for (i = 0; i < size; i++) LEPT_FREE(lineset[i]); LEPT_FREE(lineset);
Input: pixa
index (to the index-th pix)
accesstype (L_COPY or L_CLONE)
Return: pix, or null on error
Input: pixa Return: pix array, or null on error
Notes: (1) This returns a ptr to the actual array. The array is owned by the pixa, so it must not be destroyed. (2) The caller should always check if the return value is NULL before accessing any of the pix ptrs in this array!
| LEPT_DLL l_int32 pixaGetPixDimensions | ( | PIXA * | pixa, |
| l_int32 | index, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pd | ||
| ) |
Input: pixa
index (to the index-th box)
&w, &h, &d (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Input: pixa
&depth (<return> depth required to render if all
colormaps are removed)
Return: 0 if OK; 1 on error
Input: pixa
&hascolor (<return> 1 if any pix is rgb or has
a colormap with color; 0 otherwise)
Return: 0 if OK; 1 on error
Input: pixa (typically empty)
pix (<optional> to be replicated into the entire pixa ptr array)
box (<optional> to be replicated into the entire boxa ptr array)
Return: 0 if OK, 1 on error
Notes: (1) This initializes a pixa by filling up the entire pix ptr array with copies of @pix. If @pix == NULL, we use a tiny placeholder pix (w = h = d = 1). Any existing pix are destroyed. It also optionally fills the boxa with copies of @box. After this operation, the numbers of pix and (optionally) boxes are equal to the number of allocated ptrs. (2) Note that we use pixaReplacePix() instead of pixaInsertPix(). They both have the same effect when inserting into a NULL ptr in the pixa ptr array: (3) If the boxa is not initialized (i.e., filled with boxes), later insertion of boxes will cause an error, because the 'n' field is 0. (4) Example usage. This function is useful to prepare for a random insertion (or replacement) of pix into a pixa. To randomly insert pix into a pixa, without boxes, up to some index "max": Pixa *pixa = pixaCreate(max); pixaInitFull(pixa, NULL, NULL); An existing pixa with a smaller ptr array can also be reused: pixaExtendArrayToSize(pixa, max); pixaInitFull(pixa, NULL, NULL); The initialization allows the pixa to always be properly filled, even if all pix (and boxes) are not later replaced.
Input: pixa
index (at which pix is to be inserted)
pixs (new pix to be inserted)
box (<optional> new box to be inserted)
Return: 0 if OK, 1 on error
Notes: (1) This shifts pixa[i] --> pixa[i + 1] for all i >= index, and then inserts at pixa[index]. (2) To insert at the beginning of the array, set index = 0. (3) It should not be used repeatedly on large arrays, because the function is O(n). (4) To append a pix to a pixa, it's easier to use pixaAddPix().
Input: pixa
&fullpa (<optional return> 1 if pixa is full)
&fullba (<optional return> 1 if boxa is full)
Return: 0 if OK, 1 on error
Notes: (1) A pixa is "full" if the array of pix is fully occupied from index 0 to index (pixa->n - 1).
Input: pixad (dest pixa; add to this one)
pixas (<optional> source pixa; add from this one)
istart (starting index in pixas)
iend (ending index in pixas; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) This appends a clone of each indicated pix in pixas to pixad (2) istart < 0 is taken to mean 'read from the start' (istart = 0) (3) iend < 0 means 'read to the end' (4) If pixas is NULL or contains no pix, this is a no-op.
Input: pixs (32 bpp rgba, with alpha)
color (32 bit color in 0xrrggbb00 format)
Return: pixd (32 bpp rgb: pixs blended over uniform color @color),
a clone of pixs if no alpha, and null on error
Notes: (1) This is a convenience function that renders 32 bpp RGBA images (with an alpha channel) over a uniform background of value @color. To render over a white background, use @color = 0xffffff00. The result is an RGB image. (2) If pixs does not have an alpha channel, it returns a clone of pixs.
Input: pix (32 bpp, spp == 4)
&opaque (<return> 1 if spp == 4 and all alpha component
values are 255 (opaque); 0 otherwise)
Return: 0 if OK, 1 on error
Notes:
(1) On error, opaque is returned as 0 (FALSE).
| LEPT_DLL NUMA* pixaMakeSizeIndicator | ( | PIXA * | pixa, |
| l_int32 | width, | ||
| l_int32 | height, | ||
| l_int32 | type, | ||
| l_int32 | relation | ||
| ) |
Input: pixa
width, height (threshold dimensions)
type (L_SELECT_WIDTH, L_SELECT_HEIGHT,
L_SELECT_IF_EITHER, L_SELECT_IF_BOTH)
relation (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
Return: na (indicator array), or null on error
Notes: (1) The args specify constraints on the size of the components that are kept. (2) If the selection type is L_SELECT_WIDTH, the input height is ignored, and v.v. (3) To keep small components, use relation = L_SELECT_IF_LT or L_SELECT_IF_LTE. To keep large components, use relation = L_SELECT_IF_GT or L_SELECT_IF_GTE.
| LEPT_DLL PIXA* pixaMorphSequenceByComponent | ( | PIXA * | pixas, |
| const char * | sequence, | ||
| l_int32 | minw, | ||
| l_int32 | minh | ||
| ) |
pixaMorphSequenceByComponent()
Input: pixas (of 1 bpp pix)
sequence (string specifying sequence)
minw (minimum width to consider; use 0 or 1 for any width)
minh (minimum height to consider; use 0 or 1 for any height)
Return: pixad, or null on error
Notes: (1) See pixMorphSequence() for composing operation sequences. (2) This operates separately on each c.c. in the input pixa. (3) You can specify that the width and/or height must equal or exceed a minimum size for the operation to take place. (4) The input pixa should have a boxa giving the locations of the pix components.
| LEPT_DLL PIXA* pixaMorphSequenceByRegion | ( | PIX * | pixs, |
| PIXA * | pixam, | ||
| const char * | sequence, | ||
| l_int32 | minw, | ||
| l_int32 | minh | ||
| ) |
Input: pixs (1 bpp)
pixam (of 1 bpp mask elements)
sequence (string specifying sequence)
minw (minimum width to consider; use 0 or 1 for any width)
minh (minimum height to consider; use 0 or 1 for any height)
Return: pixad, or null on error
Notes: (1) See pixMorphSequence() for composing operation sequences. (2) This operates separately on each region in the input pixs defined by the components in pixam. (3) You can specify that the width and/or height of a mask component must equal or exceed a minimum size for the operation to take place. (4) The input pixam should have a boxa giving the locations of the regions in pixs.
Input: pixd (<optional>; this can be null, equal to pixs1,
different from pixs1)
pixs1 (can be == pixd)
pixs2 (must be != pixd)
Return: pixd always
Notes: (1) This gives the intersection of two images with equal depth, aligning them to the the UL corner. pixs1 and pixs2 need not have the same width and height. (2) There are 3 cases: (a) pixd == null, (src1 & src2) --> new pixd (b) pixd == pixs1, (src1 & src2) --> src1 (in-place) (c) pixd != pixs1, (src1 & src2) --> input pixd (3) For clarity, if the case is known, use these patterns: (a) pixd = pixAnd(NULL, pixs1, pixs2); (b) pixAnd(pixs1, pixs1, pixs2); (c) pixAnd(pixd, pixs1, pixs2); (4) The size of the result is determined by pixs1. (5) The depths of pixs1 and pixs2 must be equal. (6) Note carefully that the order of pixs1 and pixs2 only matters for the in-place case. For in-place, you must have pixd == pixs1. Setting pixd == pixs2 gives an incorrect result: the copy puts pixs1 image data in pixs2, and the rasterop is then between pixs2 and pixs2 (a no-op).
pixApplyInvBackgroundGrayMap()
Input: pixs (8 bpp grayscale; no colormap)
pixm (16 bpp, inverse background map)
sx (tile width in pixels)
sy (tile height in pixels)
Return: pixd (8 bpp), or null on error
| LEPT_DLL PIX* pixApplyInvBackgroundRGBMap | ( | PIX * | pixs, |
| PIX * | pixmr, | ||
| PIX * | pixmg, | ||
| PIX * | pixmb, | ||
| l_int32 | sx, | ||
| l_int32 | sy | ||
| ) |
Input: pixs (32 bpp rbg)
pixmr (16 bpp, red inverse background map)
pixmg (16 bpp, green inverse background map)
pixmb (16 bpp, blue inverse background map)
sx (tile width in pixels)
sy (tile height in pixels)
Return: pixd (32 bpp rbg), or null on error
Input: pixs (8 bpp grayscale; not colormapped)
pixth (8 bpp array of local thresholds)
redfactor ( ... )
Return: pixd (1 bpp, thresholded image), or null on error
Input: pixs (8 bpp)
pixg (8 bpp, variable map)
target (typ. 128 for threshold)
Return: pixd (8 bpp), or null on error
Notes: (1) Suppose you have an image that you want to transform based on some photometric measurement at each point, such as the threshold value for binarization. Representing the photometric measurement as an image pixg, you can threshold in input image using pixVarThresholdToBinary(). Alternatively, you can map the input image pointwise so that the threshold over the entire image becomes a constant, such as 128. For example, if a pixel in pixg is 150 and the target is 128, the corresponding pixel in pixs is mapped linearly to a value (128/150) of the input value. If the resulting mapped image pixd were then thresholded at 128, you would obtain the same result as a direct binarization using pixg with pixVarThresholdToBinary(). (2) The sizes of pixs and pixg must be equal.
Input: filename Return: pixa, or null on error
Notes: (1) The pix are stored in the file as png. If the png library is not linked, this will fail.
Input: dirname
substr (<optional> substring filter on filenames; can be null)
Return: pixa, or null on error
Notes: (1) @dirname is the full path for the directory. (2) @substr is the part of the file name (excluding the directory) that is to be matched. All matching filenames are read into the Pixa. If substr is NULL, all filenames are read into the Pixa.
Input: sarray (full pathnames for all files) Return: pixa, or null on error
Input: stream Return: pixa, or null on error
Notes: (1) The pix are stored in the file as png. If the png library is not linked, this will fail.
Input: pixa
index (of pix to be removed)
Return: 0 if OK, 1 on error
Notes: (1) This shifts pixa[i] --> pixa[i - 1] for all i > index. (2) It should not be used repeatedly on large arrays, because the function is O(n). (3) The corresponding box is removed as well, if it exists.
Input: pixa
index (of pix to be removed)
&pix (<optional return> removed pix)
&box (<optional return> removed box)
Return: 0 if OK, 1 on error
Notes: (1) This shifts pixa[i] --> pixa[i - 1] for all i > index. (2) It should not be used repeatedly on large arrays, because the function is O(n). (3) The corresponding box is removed as well, if it exists. (4) The removed pix and box can either be retained or destroyed.
Input: pixs (<optional> 1 bpp pix)
pixa (of 1 bpp connected components, one of which will
be rendered in pixs, with its origin determined
by the associated box.)
index (of component to be rendered)
Return: pixd, or null on error
Notes: (1) If pixs is null, this generates an empty pix of a size determined by union of the component bounding boxes, and including the origin. (2) The selected component is blitted into pixs.
Input: pixa
index (to the index-th pix)
pix (insert to replace existing one)
box (<optional> insert to replace existing)
Return: 0 if OK, 1 on error
Notes: (1) In-place replacement of one pix. (2) The previous pix at that location is destroyed.
Input: pixas
rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
all rotations are clockwise)
Return: pixad, or null on error
Notes: (1) Rotates each pix in the pixa. Rotates and saves the boxes in the boxa if the boxa is full.
Input: pixas
wd (target width; use 0 if using height as target)
hd (target height; use 0 if using width as target)
Return: pixad, or null on error
Notes: (1) See pixaaScaleToSize()
| LEPT_DLL PIXA* pixaSelectByAreaFraction | ( | PIXA * | pixas, |
| l_float32 | thresh, | ||
| l_int32 | type, | ||
| l_int32 * | pchanged | ||
| ) |
Input: pixas
thresh (threshold ratio of fg pixels to (w * h))
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixad, or null on error
Notes: (1) Returns a pixa clone if no components are removed. (2) Uses pix and box clones in the new pixa. (3) This filters components based on the fraction of fg pixels of the component in its bounding box. (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save components with less than the threshold fraction of foreground, and L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
| LEPT_DLL PIXA* pixaSelectByPerimSizeRatio | ( | PIXA * | pixas, |
| l_float32 | thresh, | ||
| l_int32 | type, | ||
| l_int32 * | pchanged | ||
| ) |
Input: pixas
thresh (threshold ratio of fg boundary to b.b. circumference)
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixad, or null on error
Notes: (1) Returns a pixa clone if no components are removed. (2) Uses pix and box clones in the new pixa. (3) See pixSelectByPerimSizeRatio().
| LEPT_DLL PIXA* pixaSelectByPerimToAreaRatio | ( | PIXA * | pixas, |
| l_float32 | thresh, | ||
| l_int32 | type, | ||
| l_int32 * | pchanged | ||
| ) |
pixaSelectByPerimToAreaRatio()
Input: pixas
thresh (threshold ratio of fg boundary to fg pixels)
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixad, or null on error
Notes: (1) Returns a pixa clone if no components are removed. (2) Uses pix and box clones in the new pixa. (3) See pixSelectByPerimToAreaRatio().
| LEPT_DLL PIXA* pixaSelectBySize | ( | PIXA * | pixas, |
| l_int32 | width, | ||
| l_int32 | height, | ||
| l_int32 | type, | ||
| l_int32 | relation, | ||
| l_int32 * | pchanged | ||
| ) |
Input: pixas
width, height (threshold dimensions)
type (L_SELECT_WIDTH, L_SELECT_HEIGHT,
L_SELECT_IF_EITHER, L_SELECT_IF_BOTH)
relation (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 otherwise)
Return: pixad, or null on error
Notes: (1) The args specify constraints on the size of the components that are kept. (2) Uses pix and box clones in the new pixa. (3) If the selection type is L_SELECT_WIDTH, the input height is ignored, and v.v. (4) To keep small components, use relation = L_SELECT_IF_LT or L_SELECT_IF_LTE. To keep large components, use relation = L_SELECT_IF_GT or L_SELECT_IF_GTE.
| LEPT_DLL PIXA* pixaSelectByWidthHeightRatio | ( | PIXA * | pixas, |
| l_float32 | thresh, | ||
| l_int32 | type, | ||
| l_int32 * | pchanged | ||
| ) |
pixaSelectByWidthHeightRatio()
Input: pixas
thresh (threshold ratio of width/height)
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixad, or null on error
Notes: (1) Returns a pixa clone if no components are removed. (2) Uses pix and box clones in the new pixa. (3) This filters components based on the width-to-height ratio of each pix. (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save components with less than the threshold ratio, and L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
Input: pixas
first (use 0 to select from the beginning)
last (use 0 to select to the end)
copyflag (L_COPY, L_CLONE)
Return: pixad, or null on error
Notes: (1) The copyflag specifies what we do with each pix from pixas. Specifically, L_CLONE inserts a clone into pixad of each selected pix from pixas.
Input: pixas
na (indicator numa)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixad, or null on error
Notes: (1) Returns a pixa clone if no components are removed. (2) Uses pix and box clones in the new pixa. (3) The indicator numa has values 0 (ignore) and 1 (accept). (4) If the source boxa is not fully populated, it is left empty in the dest pixa.
Input: pixas
str (string of indices into pixa, giving the pix to be selected)
&error (<optional return> 1 if any indices are invalid;
0 if all indices are valid)
Return: pixad, or null on error
Notes: (1) Returns a pixa with copies of selected pix. (2) Associated boxes are also copied, if fully populated.
Input: pixa
boxa
accesstype (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK, 1 on error
Notes: (1) This destroys the existing boxa in the pixa.
| LEPT_DLL l_int32 pixaSizeRange | ( | PIXA * | pixa, |
| l_int32 * | pminw, | ||
| l_int32 * | pminh, | ||
| l_int32 * | pmaxw, | ||
| l_int32 * | pmaxh | ||
| ) |
Input: pixa
&minw, &minh, &maxw, &maxh (<optional return> range of
dimensions of pix in the array)
Return: 0 if OK, 1 on error
| LEPT_DLL PIXA* pixaSort | ( | PIXA * | pixas, |
| l_int32 | sorttype, | ||
| l_int32 | sortorder, | ||
| NUMA ** | pnaindex, | ||
| l_int32 | copyflag | ||
| ) |
Input: pixas
sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
L_SORT_BY_HEIGHT, L_SORT_BY_MIN_DIMENSION,
L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER,
L_SORT_BY_AREA, L_SORT_BY_ASPECT_RATIO)
sortorder (L_SORT_INCREASING, L_SORT_DECREASING)
&naindex (<optional return> index of sorted order into
original array)
copyflag (L_COPY, L_CLONE)
Return: pixad (sorted version of pixas), or null on error
Notes: (1) This sorts based on the data in the boxa. If the boxa count is not the same as the pixa count, this returns an error. (2) The copyflag refers to the pix and box copies that are inserted into the sorted pixa. These are either L_COPY or L_CLONE.
Input: pixas
naa (numaa that maps from the new pixaa to the input pixas)
copyflag (L_CLONE or L_COPY)
Return: paa (sorted), or null on error
Input: pixas
naindex (na that maps from the new pixa to the input pixa)
copyflag (L_COPY, L_CLONE)
Return: pixad (sorted), or null on error
| LEPT_DLL PIXA* pixaSplitPix | ( | PIX * | pixs, |
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_int32 | borderwidth, | ||
| l_uint32 | bordercolor | ||
| ) |
Input: pixs (with individual components on a lattice)
nx (number of mosaic cells horizontally)
ny (number of mosaic cells vertically)
borderwidth (of added border on all sides)
bordercolor (in our RGBA format: 0xrrggbbaa)
Return: pixa, or null on error
Notes: (1) This is a variant on pixaCreateFromPix(), where we simply divide the image up into (approximately) equal subunits. If you want the subimages to have essentially the same aspect ratio as the input pix, use nx = ny. (2) If borderwidth is 0, we ignore the input bordercolor and redefine it to white. (3) The bordercolor is always used to initialize each tiled pix, so that if the src is clipped, the unblitted part will be this color. This avoids 1 pixel wide black stripes at the left and lower edges.
| LEPT_DLL l_int32 pixAssignToNearestColor | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| PIX * | pixm, | ||
| l_int32 | level, | ||
| l_int32 * | countarray | ||
| ) |
Input: pixd (8 bpp, colormapped)
pixs (32 bpp; 24-bit color)
pixm (<optional> 1 bpp)
level (of octcube used for finding nearest color in cmap)
countarray (<optional> ptr to array, in which we can store
the number of pixels found in each color in
the colormap in pixd)
Return: 0 if OK, 1 on error
Notes: (1) This is used in phase 2 of color segmentation, where pixs is the original input image to pixColorSegment(), and pixd is the colormapped image returned from pixColorSegmentCluster(). It is also used, with a mask, in phase 4. (2) This is an in-place operation. (3) The colormap in pixd is unchanged. (4) pixs and pixd must be the same size (w, h). (5) The selection mask pixm can be null. If it exists, it must be the same size as pixs and pixd, and only pixels corresponding to fg in pixm are assigned. Set to NULL if all pixels in pixd are to be assigned. (6) The countarray can be null. If it exists, it is pre-allocated and of a size at least equal to the size of the colormap in pixd. (7) This does a best-fit (non-greedy) assignment of pixels to existing clusters. Specifically, it assigns each pixel in pixd to the color index in the pixd colormap that has a color closest to the corresponding rgb pixel in pixs. (8) 'level' is the octcube level used to quickly find the nearest color in the colormap for each pixel. For color segmentation, this parameter is set to LEVEL_IN_OCTCUBE. (9) We build a mapping table from octcube to colormap index so that this function can run in a time (otherwise) independent of the number of colors in the colormap. This avoids a brute-force search for the closest colormap color to each pixel in the image.
Input: pix (8 or 16 bpp; no colormap)
box (<optional> clipping box for sum; can be null)
type (L_WHITE_IS_MAX, L_BLACK_IS_MAX)
Return: na of pixel averages by column, or null on error
Notes: (1) To resample for a bin size different from 1, use numaUniformSampling() on the result of this function. (2) If type == L_BLACK_IS_MAX, black pixels get the maximum value (0xff for 8 bpp, 0xffff for 16 bpp) and white get 0.
Input: pix (8 or 16 bpp; no colormap)
box (<optional> clipping box for sum; can be null)
type (L_WHITE_IS_MAX, L_BLACK_IS_MAX)
Return: na of pixel averages by row, or null on error
Notes: (1) To resample for a bin size different from 1, use numaUniformSampling() on the result of this function. (2) If type == L_BLACK_IS_MAX, black pixels get the maximum value (0xff for 8 bpp, 0xffff for 16 bpp) and white get 0.
Input: pix (1, 2, 4, 8 bpp; not cmapped)
box (<optional> if null, use entire image)
&ave (<return> average of pixel values in region)
Return: 0 if OK; 1 on error
| LEPT_DLL NUMA* pixAverageIntensityProfile | ( | PIX * | pixs, |
| l_float32 | fract, | ||
| l_int32 | dir, | ||
| l_int32 | first, | ||
| l_int32 | last, | ||
| l_int32 | factor1, | ||
| l_int32 | factor2 | ||
| ) |
| LEPT_DLL l_float32 pixAverageOnLine | ( | PIX * | pixs, |
| l_int32 | x1, | ||
| l_int32 | y1, | ||
| l_int32 | x2, | ||
| l_int32 | y2, | ||
| l_int32 | factor | ||
| ) |
Input: pixs (1 bpp or 8 bpp; no colormap)
x1, y1 (starting pt for line)
x2, y2 (end pt for line)
factor (sampling; >= 1)
Return: average of pixel values along line, or null on error.
Notes: (1) The line must be either horizontal or vertical, so either y1 == y2 (horizontal) or x1 == x2 (vertical). (2) If horizontal, x1 must be <= x2. If vertical, y1 must be <= y2. characterize the intensity smoothness along a line. (3) Input end points are clipped to the pix.
Input: pixa
&maxdepth (<optional return> max depth of all pix)
Return: depth (return 0 if they're not all the same, or on error)
Notes: (1) It is considered to be an error if there are no pix.
Input: filename
pixa
Return: 0 if OK, 1 on error
Notes: (1) The pix are stored in the file as png. If the png library is not linked, this will fail.
| LEPT_DLL l_int32 pixaWriteCompressedToPS | ( | PIXA * | pixa, |
| const char * | fileout, | ||
| l_int32 | res, | ||
| l_int32 | level | ||
| ) |
Input: rootname
pixa
format (defined in imageio.h; see notes for default)
Return: 0 if OK; 1 on error
Notes: (1) Use @format = IFF_DEFAULT to decide the output format individually for each pix.
Input: stream (opened for "wb")
pixa
Return: 0 if OK, 1 on error
Notes: (1) The pix are stored in the file as png. If the png library is not linked, this will fail.
Input: stream
pixa
Return: 0 if OK, 1 on error.
Notes: (1) For each pix in the pixa, write out the pix dimensions, spp, text string (if it exists), and cmap info.
| LEPT_DLL PIX* pixBackgroundNorm | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| PIX * | pixg, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | thresh, | ||
| l_int32 | mincount, | ||
| l_int32 | bgval, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy | ||
| ) |
Input: pixs (8 bpp grayscale or 32 bpp rgb)
pixim (<optional> 1 bpp 'image' mask; can be null)
pixg (<optional> 8 bpp grayscale version; can be null)
sx, sy (tile size in pixels)
thresh (threshold for determining foreground)
mincount (min threshold on counts in a tile)
bgval (target bg val; typ. > 128)
smoothx (half-width of block convolution kernel width)
smoothy (half-width of block convolution kernel height)
Return: pixd (8 bpp or 32 bpp rgb), or null on error
Notes: (1) This is a top-level interface for normalizing the image intensity by mapping the image so that the background is near the input value 'bgval'. (2) The input image is either grayscale or rgb. (3) For each component in the input image, the background value in each tile is estimated using the values in the tile that are not part of the foreground, where the foreground is determined by the input 'thresh' argument. (4) An optional binary mask can be specified, with the foreground pixels typically over image regions. The resulting background map values will be determined by surrounding pixels that are not under the mask foreground. The origin (0,0) of this mask is assumed to be aligned with the origin of the input image. This binary mask must not fully cover pixs, because then there will be no pixels in the input image available to compute the background. (5) An optional grayscale version of the input pixs can be supplied. The only reason to do this is if the input is RGB and this grayscale version can be used elsewhere. If the input is RGB and this is not supplied, it is made internally using only the green component, and destroyed after use. (6) The dimensions of the pixel tile (sx, sy) give the amount by by which the map is reduced in size from the input image. (7) The threshold is used to binarize the input image, in order to locate the foreground components. If this is set too low, some actual foreground may be used to determine the maps; if set too high, there may not be enough background to determine the map values accurately. Typically, it's better to err by setting the threshold too high. (8) A 'mincount' threshold is a minimum count of pixels in a tile for which a background reading is made, in order for that pixel in the map to be valid. This number should perhaps be at least 1/3 the size of the tile. (9) A 'bgval' target background value for the normalized image. This should be at least 128. If set too close to 255, some clipping will occur in the result. (10) Two factors, 'smoothx' and 'smoothy', are input for smoothing the map. Each low-pass filter kernel dimension is is 2 * (smoothing factor) + 1, so a value of 0 means no smoothing. A value of 1 or 2 is recommended.
| LEPT_DLL PIX* pixBackgroundNormFlex | ( | PIX * | pixs, |
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy, | ||
| l_int32 | delta | ||
| ) |
Input: pixs (8 bpp grayscale; not colormapped)
sx, sy (desired tile dimensions; actual size may vary; use
values between 3 and 10)
smoothx, smoothy (half-width of convolution kernel applied to
threshold array: use values between 1 and 3)
delta (difference parameter in basin filling; use 0
to skip)
Return: pixd (8 bpp, background-normalized), or null on error)
Notes: (1) This does adaptation flexibly to a quickly varying background. For that reason, all input parameters should be small. (2) sx and sy give the tile size; they should be in [5 - 7]. (3) The full width and height of the convolution kernel are (2 * smoothx + 1) and (2 * smoothy + 1). They should be in [1 - 2]. (4) Basin filling is used to fill the large fg regions. The parameter @delta measures the height that the black background is raised from the local minima. By raising the background, it is possible to threshold the large fg regions to foreground. If @delta is too large, bg regions will be lifted, causing thickening of the fg regions. Use 0 to skip.
| LEPT_DLL l_int32 pixBackgroundNormGrayArray | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | thresh, | ||
| l_int32 | mincount, | ||
| l_int32 | bgval, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (8 bpp grayscale)
pixim (<optional> 1 bpp 'image' mask; can be null)
sx, sy (tile size in pixels)
thresh (threshold for determining foreground)
mincount (min threshold on counts in a tile)
bgval (target bg val; typ. > 128)
smoothx (half-width of block convolution kernel width)
smoothy (half-width of block convolution kernel height)
&pixd (<return> 16 bpp array of inverted background value)
Return: 0 if OK, 1 on error
Notes: (1) See notes in pixBackgroundNorm(). (2) This returns a 16 bpp pix that can be used by pixApplyInvBackgroundGrayMap() to generate a normalized version of the input pixs.
| LEPT_DLL l_int32 pixBackgroundNormGrayArrayMorph | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | reduction, | ||
| l_int32 | size, | ||
| l_int32 | bgval, | ||
| PIX ** | ppixd | ||
| ) |
pixBackgroundNormGrayArrayMorph()
Input: pixs (8 bpp grayscale)
pixim (<optional> 1 bpp 'image' mask; can be null)
reduction (at which morph closings are done; between 2 and 16)
size (of square Sel for the closing; use an odd number)
bgval (target bg val; typ. > 128)
&pixd (<return> 16 bpp array of inverted background value)
Return: 0 if OK, 1 on error
Notes: (1) See notes in pixBackgroundNormMorph(). (2) This returns a 16 bpp pix that can be used by pixApplyInvBackgroundGrayMap() to generate a normalized version of the input pixs.
| LEPT_DLL PIX* pixBackgroundNormMorph | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | reduction, | ||
| l_int32 | size, | ||
| l_int32 | bgval | ||
| ) |
Input: pixs (8 bpp grayscale or 32 bpp rgb)
pixim (<optional> 1 bpp 'image' mask; can be null)
reduction (at which morph closings are done; between 2 and 16)
size (of square Sel for the closing; use an odd number)
bgval (target bg val; typ. > 128)
Return: pixd (8 bpp), or null on error
Notes: (1) This is a top-level interface for normalizing the image intensity by mapping the image so that the background is near the input value 'bgval'. (2) The input image is either grayscale or rgb. (3) For each component in the input image, the background value is estimated using a grayscale closing; hence the 'Morph' in the function name. (4) An optional binary mask can be specified, with the foreground pixels typically over image regions. The resulting background map values will be determined by surrounding pixels that are not under the mask foreground. The origin (0,0) of this mask is assumed to be aligned with the origin of the input image. This binary mask must not fully cover pixs, because then there will be no pixels in the input image available to compute the background. (5) The map is computed at reduced size (given by 'reduction') from the input pixs and optional pixim. At this scale, pixs is closed to remove the background, using a square Sel of odd dimension. The product of reduction * size should be large enough to remove most of the text foreground. (6) No convolutional smoothing needs to be done on the map before inverting it. (7) A 'bgval' target background value for the normalized image. This should be at least 128. If set too close to 255, some clipping will occur in the result.
| LEPT_DLL l_int32 pixBackgroundNormRGBArrays | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| PIX * | pixg, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | thresh, | ||
| l_int32 | mincount, | ||
| l_int32 | bgval, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy, | ||
| PIX ** | ppixr, | ||
| PIX ** | ppixg, | ||
| PIX ** | ppixb | ||
| ) |
Input: pixs (32 bpp rgb)
pixim (<optional> 1 bpp 'image' mask; can be null)
pixg (<optional> 8 bpp grayscale version; can be null)
sx, sy (tile size in pixels)
thresh (threshold for determining foreground)
mincount (min threshold on counts in a tile)
bgval (target bg val; typ. > 128)
smoothx (half-width of block convolution kernel width)
smoothy (half-width of block convolution kernel height)
&pixr (<return> 16 bpp array of inverted R background value)
&pixg (<return> 16 bpp array of inverted G background value)
&pixb (<return> 16 bpp array of inverted B background value)
Return: 0 if OK, 1 on error
Notes: (1) See notes in pixBackgroundNorm(). (2) This returns a set of three 16 bpp pix that can be used by pixApplyInvBackgroundGrayMap() to generate a normalized version of each component of the input pixs.
| LEPT_DLL l_int32 pixBackgroundNormRGBArraysMorph | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | reduction, | ||
| l_int32 | size, | ||
| l_int32 | bgval, | ||
| PIX ** | ppixr, | ||
| PIX ** | ppixg, | ||
| PIX ** | ppixb | ||
| ) |
pixBackgroundNormRGBArraysMorph()
Input: pixs (32 bpp rgb)
pixim (<optional> 1 bpp 'image' mask; can be null)
reduction (at which morph closings are done; between 2 and 16)
size (of square Sel for the closing; use an odd number)
bgval (target bg val; typ. > 128)
&pixr (<return> 16 bpp array of inverted R background value)
&pixg (<return> 16 bpp array of inverted G background value)
&pixb (<return> 16 bpp array of inverted B background value)
Return: 0 if OK, 1 on error
Notes: (1) See notes in pixBackgroundNormMorph(). (2) This returns a set of three 16 bpp pix that can be used by pixApplyInvBackgroundGrayMap() to generate a normalized version of each component of the input pixs.
Input: pixs (8 bpp grayscale or 32 bpp rgb)
pixim (<optional> 1 bpp 'image' mask; can be null)
pixg (<optional> 8 bpp grayscale version; can be null)
Return: pixd (8 bpp or 32 bpp rgb), or null on error
Notes: (1) This is a simplified interface to pixBackgroundNorm(), where seven parameters are defaulted. (2) The input image is either grayscale or rgb. (3) See pixBackgroundNorm() for usage and function.
| LEPT_DLL l_int32 pixBestCorrelation | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | area1, | ||
| l_int32 | area2, | ||
| l_int32 | etransx, | ||
| l_int32 | etransy, | ||
| l_int32 | maxshift, | ||
| l_int32 * | tab8, | ||
| l_int32 * | pdelx, | ||
| l_int32 * | pdely, | ||
| l_float32 * | pscore, | ||
| l_int32 | debugflag | ||
| ) |
Input: pix1 (1 bpp)
pix2 (1 bpp)
area1 (number of on pixels in pix1)
area2 (number of on pixels in pix2)
etransx (estimated x translation of pix2 to align with pix1)
etransy (estimated y translation of pix2 to align with pix1)
maxshift (max x and y shift of pix2, around the estimated
alignment location, relative to pix1)
tab8 (<optional> sum tab for ON pixels in byte; can be NULL)
&delx (<optional return> best x shift of pix2 relative to pix1
&dely (<optional return> best y shift of pix2 relative to pix1
&score (<optional return> maximum score found; can be NULL)
debugflag (<= 0 to skip; positive to generate output.
The integer is used to label the debug image.)
Return: 0 if OK, 1 on error
Notes: (1) This maximizes the correlation score between two 1 bpp images, by starting with an estimate of the alignment (@etransx, @etransy) and computing the correlation around this. It optionally returns the shift (@delx, @dely) that maximizes the correlation score when pix2 is shifted by this amount relative to pix1. (2) Get the centroids of pix1 and pix2, using pixCentroid(), to compute (@etransx, @etransy). Get the areas using pixCountPixels(). (3) The centroid of pix2 is shifted with respect to the centroid of pix1 by all values between -maxshiftx and maxshiftx, and likewise for the y shifts. Therefore, the number of correlations computed is: (2 * maxshiftx + 1) * (2 * maxshifty + 1) Consequently, if pix1 and pix2 are large, you should do this in a coarse-to-fine sequence. See the use of this function in pixCompareWithTranslation().
| LEPT_DLL PIX* pixBilateral | ( | PIX * | pixs, |
| l_float32 | spatial_stdev, | ||
| l_float32 | range_stdev, | ||
| l_int32 | ncomps, | ||
| l_int32 | reduction | ||
| ) |
Input: pixs (8 bpp gray or 32 bpp rgb, no colormap)
spatial_stdev (of gaussian kernel; in pixels, > 0.5)
range_stdev (of gaussian range kernel; > 5.0; typ. 50.0)
ncomps (number of intermediate sums J(k,x); in [4 ... 30])
reduction (1, 2 or 4)
Return: pixd (bilateral filtered image), or null on error
Notes: (1) This performs a relatively fast, separable bilateral filtering operation. The time is proportional to ncomps and varies inversely approximately as the cube of the reduction factor. See bilateral.h for algorithm details. (2) We impose minimum values for range_stdev and ncomps to avoid nasty artifacts when either are too small. We also impose a constraint on their product: ncomps * range_stdev >= 100. So for values of range_stdev >= 25, ncomps can be as small as 4. Here is a qualitative, intuitive explanation for this constraint. Call the difference in k values between the J(k) == 'delta', where 'delta' ~ 200 / ncomps Then this constraint is roughly equivalent to the condition: 'delta' < 2 * range_stdev Note that at an intensity difference of (2 * range_stdev), the range part of the kernel reduces the effect by the factor 0.14. This constraint requires that we have a sufficient number of PCBs (i.e, a small enough 'delta'), so that for any value of image intensity I, there exists a k (and a PCB, J(k), such that |I - k| < range_stdev Any fewer PCBs and we don't have enough to support this condition. (3) The upper limit of 30 on ncomps is imposed because the gain in accuracy is not worth the extra computation. (4) The size of the gaussian kernel is twice the spatial_stdev on each side of the origin. The minimum value of spatial_stdev, 0.5, is required to have a finite sized spatial kernel. In practice, a much larger value is used. (5) Computation of the intermediate images goes inversely as the cube of the reduction factor. If you can use a reduction of 2 or 4, it is well-advised. (6) The range kernel is defined over the absolute value of pixel grayscale differences, and hence must have size 256 x 1. Values in the array represent the multiplying weight depending on the absolute gray value difference between the source pixel and the neighboring pixel, and should be monotonically decreasing. (7) Interesting observation. Run this on prog/fish24.jpg, with range_stdev = 60, ncomps = 6, and spatial_dev = {10, 30, 50}. As spatial_dev gets larger, we get the counter-intuitive result that the body of the red fish becomes less blurry.
Input: pixs (8 bpp gray or 32 bpp rgb)
spatial_kel (gaussian kernel)
range_kel (<optional> 256 x 1, monotonically decreasing)
Return: pixd (8 bpp bilateral filtered image)
Notes: (1) The spatial_kel is a conventional smoothing kernel, typically a 2-d Gaussian kernel or other block kernel. It can be either normalized or not, but must be everywhere positive. (2) The range_kel is defined over the absolute value of pixel grayscale differences, and hence must have size 256 x 1. Values in the array represent the multiplying weight for each gray value difference between the target pixel and center of the kernel, and should be monotonically decreasing. (3) If range_kel == NULL, a constant weight is applied regardless of the range value difference. This degenerates to a regular pixConvolve() with a normalized kernel.
| LEPT_DLL PIX* pixBilateralGray | ( | PIX * | pixs, |
| l_float32 | spatial_stdev, | ||
| l_float32 | range_stdev, | ||
| l_int32 | ncomps, | ||
| l_int32 | reduction | ||
| ) |
Input: pixs (8 bpp gray)
spatial_stdev (of gaussian kernel; in pixels, > 0.5)
range_stdev (of gaussian range kernel; > 5.0; typ. 50.0)
ncomps (number of intermediate sums J(k,x); in [4 ... 30])
reduction (1, 2 or 4)
Return: pixd (8 bpp bilateral filtered image), or null on error
Notes: (1) See pixBilateral() for constraints on the input parameters. (2) See pixBilateral() for algorithm details.
Input: pixs (8 bpp gray)
spatial_kel (gaussian kernel)
range_kel (<optional> 256 x 1, monotonically decreasing)
Return: pixd (8 bpp bilateral filtered image)
Notes: (1) See pixBilateralExact().
Input: pixs (all depths; colormap ok)
vc (vector of 8 coefficients for bilinear transformation)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary (2) Removes any existing colormap, if necessary, before transforming
Input: pixs (32 bpp)
vc (vector of 8 coefficients for bilinear transformation)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Input: pixs (8 bpp)
vc (vector of 8 coefficients for bilinear transformation)
grayval (0 to bring in BLACK, 255 for WHITE)
Return: pixd, or null on error
Input: pixs (all depths; colormap ok)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary (2) Removes any existing colormap, if necessary, before transforming
Input: pixs (32 bpp)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Input: pixs (8 bpp)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
grayval (0 to bring in BLACK, 255 for WHITE)
Return: pixd, or null on error
| LEPT_DLL PIX* pixBilinearPtaWithAlpha | ( | PIX * | pixs, |
| PTA * | ptad, | ||
| PTA * | ptas, | ||
| PIX * | pixg, | ||
| l_float32 | fract, | ||
| l_int32 | border | ||
| ) |
Input: pixs (32 bpp rgb)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
pixg (<optional> 8 bpp, can be null)
fract (between 0.0 and 1.0, with 0.0 fully transparent
and 1.0 fully opaque)
border (of pixels added to capture transformed source pixels)
Return: pixd, or null on error
Notes: (1) The alpha channel is transformed separately from pixs, and aligns with it, being fully transparent outside the boundary of the transformed pixs. For pixels that are fully transparent, a blending function like pixBlendWithGrayMask() will give zero weight to corresponding pixels in pixs. (2) If pixg is NULL, it is generated as an alpha layer that is partially opaque, using @fract. Otherwise, it is cropped to pixs if required and @fract is ignored. The alpha channel in pixs is never used. (3) Colormaps are removed. (4) When pixs is transformed, it doesn't matter what color is brought in because the alpha channel will be transparent (0) there. (5) To avoid losing source pixels in the destination, it may be necessary to add a border to the source pix before doing the bilinear transformation. This can be any non-negative number. (6) The input @ptad and @ptas are in a coordinate space before the border is added. Internally, we compensate for this before doing the bilinear transform on the image after the border is added. (7) The default setting for the border values in the alpha channel is 0 (transparent) for the outermost ring of pixels and (0.5 * fract * 255) for the second ring. When blended over a second image, this (a) shrinks the visible image to make a clean overlap edge with an image below, and (b) softens the edges by weakening the aliasing there. Use l_setAlphaMaskBorder() to change these values.
Input: pixs (all depths)
vc (vector of 8 coefficients for bilinear transformation)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary. (2) Retains colormap, which you can do for a sampled transform.. (3) For 8 or 32 bpp, much better quality is obtained by the somewhat slower pixBilinear(). See that function for relative timings between sampled and interpolated.
Input: pixs (all depths)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary. (2) Retains colormap, which you can do for a sampled transform.. (3) No 3 of the 4 points may be collinear. (4) For 8 and 32 bpp pix, better quality is obtained by the somewhat slower pixBilinearPta(). See that function for relative timings between sampled and interpolated.
Input: pixs1 (blendee)
pixs2 (blender; typ. smaller)
x,y (origin (UL corner) of pixs2 relative to
the origin of pixs1; can be < 0)
fract (blending fraction)
Return: pixd (blended image), or null on error
Notes: (1) This is a simple top-level interface. For more flexibility, call directly into pixBlendMask(), etc.
| LEPT_DLL PIX* pixBlendBackgroundToColor | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| BOX * | box, | ||
| l_uint32 | color, | ||
| l_float32 | gamma, | ||
| l_int32 | minval, | ||
| l_int32 | maxval | ||
| ) |
Input: pixd (can be NULL or pixs)
pixs (32 bpp rgb)
box (region for blending; can be NULL))
color (32 bit color in 0xrrggbb00 format)
gamma, minval, maxval (args for grayscale TRC mapping)
Return: pixd always
Notes: (1) This in effect replaces light background pixels in pixs by the input color. It does it by alpha blending so that there are no visible artifacts from hard cutoffs. (2) If pixd == pixs, this is done in-place. (3) If box == NULL, this is performed on all of pixs. (4) The alpha component for blending is derived from pixs, by converting to grayscale and enhancing with a TRC. (5) The last three arguments specify the TRC operation. Suggested values are: @gamma = 0.3, @minval = 50, @maxval = 200. To skip the TRC, use @gamma == 1, @minval = 0, @maxval = 255. See pixGammaTRC() for details.
Input: pixs (any depth; can be cmapped)
boxa (of boxes, to blend/paint)
fract (of box color to use)
Return: pixd (32 bpp, with blend/painted boxes), or null on error
Notes: (1) pixs is converted to 32 bpp. (2) This differs from pixPaintBoxaRandom(), in that the colors here are blended with the color of pixs. (3) We use up to 254 different colors for painting the regions. (4) If boxes overlap, the final color depends only on the last rect that is used.
Input: pixs (2, 4 or 8 bpp, with colormap)
pixb (colormapped blender)
x, y (UL corner of blender relative to pixs)
sindex (colormap index of pixels in pixs to be changed)
Return: 0 if OK, 1 on error
Note: (1) This function combines two colormaps, and replaces the pixels in pixs that have a specified color value with those in pixb. (2) sindex must be in the existing colormap; otherwise an error is returned. In use, sindex will typically be the index for white (255, 255, 255). (3) Blender colors that already exist in the colormap are used; others are added. If any blender colors cannot be stored in the colormap, an error is returned. (4) In the implementation, a mapping is generated from each original blender colormap index to the corresponding index in the expanded colormap for pixs. Then for each pixel in pixs with value sindex, and which is covered by a blender pixel, the new index corresponding to the blender pixel is substituted for sindex.
| LEPT_DLL PIX* pixBlendColor | ( | PIX * | pixd, |
| PIX * | pixs1, | ||
| PIX * | pixs2, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | fract, | ||
| l_int32 | transparent, | ||
| l_uint32 | transpix | ||
| ) |
Input: pixd (<optional>; either NULL or equal to pixs1 for in-place)
pixs1 (blendee; depth > 1)
pixs2 (blender, any depth;; typ. smaller in size than pixs1)
x,y (origin (UL corner) of pixs2 relative to
the origin of pixs1)
fract (blending fraction)
transparent (1 to use transparency; 0 otherwise)
transpix (pixel color in pixs2 that is to be transparent)
Return: pixd, or null on error
Notes: (1) For inplace operation (pixs1 must be 32 bpp), call it this way: pixBlendColor(pixs1, pixs1, pixs2, ...) (2) For generating a new pixd: pixd = pixBlendColor(NULL, pixs1, pixs2, ...) (3) If pixs2 is not 32 bpp rgb, it is converted. (4) Clipping of pixs2 to pixs1 is done in the inner pixel loop. (5) If pixs1 has a colormap, it is removed to generate a 32 bpp pix. (6) If pixs1 has depth < 32, it is unpacked to generate a 32 bpp pix. (7) If transparent = 0, the blending fraction (fract) is applied equally to all pixels. (8) If transparent = 1, all pixels of value transpix (typically either 0 or 0xffffff00) in pixs2 are transparent in the blend.
| LEPT_DLL PIX* pixBlendColorByChannel | ( | PIX * | pixd, |
| PIX * | pixs1, | ||
| PIX * | pixs2, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | rfract, | ||
| l_float32 | gfract, | ||
| l_float32 | bfract, | ||
| l_int32 | transparent, | ||
| l_uint32 | transpix | ||
| ) |
| LEPT_DLL PIX* pixBlendGray | ( | PIX * | pixd, |
| PIX * | pixs1, | ||
| PIX * | pixs2, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | fract, | ||
| l_int32 | type, | ||
| l_int32 | transparent, | ||
| l_uint32 | transpix | ||
| ) |
Input: pixd (<optional>; either NULL or equal to pixs1 for in-place)
pixs1 (blendee, depth > 1)
pixs2 (blender, any depth; typ. smaller in size than pixs1)
x,y (origin (UL corner) of pixs2 relative to
the origin of pixs1; can be < 0)
fract (blending fraction)
type (L_BLEND_GRAY, L_BLEND_GRAY_WITH_INVERSE)
transparent (1 to use transparency; 0 otherwise)
transpix (pixel grayval in pixs2 that is to be transparent)
Return: pixd if OK; pixs1 on error
Notes: (1) For inplace operation (pixs1 not cmapped), call it this way: pixBlendGray(pixs1, pixs1, pixs2, ...) (2) For generating a new pixd: pixd = pixBlendGray(NULL, pixs1, pixs2, ...) (3) Clipping of pixs2 to pixs1 is done in the inner pixel loop. (4) If pixs1 has a colormap, it is removed; otherwise, if pixs1 has depth < 8, it is unpacked to generate a 8 bpp pix. (5) If transparent = 0, the blending fraction (fract) is applied equally to all pixels. (6) If transparent = 1, all pixels of value transpix (typically either 0 or 0xff) in pixs2 are transparent in the blend. (7) After processing pixs1, it is either 8 bpp or 32 bpp:
| LEPT_DLL PIX* pixBlendGrayAdapt | ( | PIX * | pixd, |
| PIX * | pixs1, | ||
| PIX * | pixs2, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | fract, | ||
| l_int32 | shift | ||
| ) |
Input: pixd (<optional>; either NULL or equal to pixs1 for in-place)
pixs1 (blendee, depth > 1)
pixs2 (blender, any depth; typ. smaller in size than pixs1)
x,y (origin (UL corner) of pixs2 relative to
the origin of pixs1; can be < 0)
fract (blending fraction)
shift (>= 0 but <= 128: shift of zero blend value from
median source; use -1 for default value; )
Return: pixd if OK; pixs1 on error
Notes: (1) For inplace operation (pixs1 not cmapped), call it this way: pixBlendGrayAdapt(pixs1, pixs1, pixs2, ...) For generating a new pixd: pixd = pixBlendGrayAdapt(NULL, pixs1, pixs2, ...) (2) Clipping of pixs2 to pixs1 is done in the inner pixel loop. (3) If pixs1 has a colormap, it is removed. (4) If pixs1 has depth < 8, it is unpacked to generate a 8 bpp pix. (5) This does a blend with inverse. Whereas in pixGlendGray(), the zero blend point is where the blendee pixel is 128, here the zero blend point is found adaptively, with respect to the median of the blendee region. If the median is < 128, the zero blend point is found from median + shift. Otherwise, if the median >= 128, the zero blend point is median - shift. The purpose of shifting the zero blend point away from the median is to prevent a situation in pixBlendGray() where the median is 128 and the blender is not visible. The default value of shift is 64. (6) After processing pixs1, it is either 8 bpp or 32 bpp:
| LEPT_DLL PIX* pixBlendGrayInverse | ( | PIX * | pixd, |
| PIX * | pixs1, | ||
| PIX * | pixs2, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | fract | ||
| ) |
Input: pixd (<optional>; either NULL or equal to pixs1 for in-place)
pixs1 (blendee, depth > 1)
pixs2 (blender, any depth; typ. smaller in size than pixs1)
x,y (origin (UL corner) of pixs2 relative to
the origin of pixs1; can be < 0)
fract (blending fraction)
Return: pixd if OK; pixs1 on error
Notes: (1) For inplace operation (pixs1 not cmapped), call it this way: pixBlendGrayInverse(pixs1, pixs1, pixs2, ...) (2) For generating a new pixd: pixd = pixBlendGrayInverse(NULL, pixs1, pixs2, ...) (3) Clipping of pixs2 to pixs1 is done in the inner pixel loop. (4) If pixs1 has a colormap, it is removed; otherwise if pixs1 has depth < 8, it is unpacked to generate a 8 bpp pix. (5) This is a no-nonsense blender. It changes the src1 pixel except when the src1 pixel is midlevel gray. Use fract == 1 for the most aggressive blending, where, if the gray pixel in pixs2 is 0, we get a complete inversion of the color of the src pixel in pixs1. (6) The basic logic is that each component transforms by: d --> c * d + (1 - c ) * (f * (1 - d) + d * (1 - f)) where c is the blender pixel from pixs2, f is @fract, c and d are normalized to [0...1] This has the property that for f == 0 (no blend) or c == 1 (white): d --> d For c == 0 (black) we get maximum inversion: d --> f * (1 - d) + d * (1 - f) [inversion by fraction f]
| LEPT_DLL PIX* pixBlendHardLight | ( | PIX * | pixd, |
| PIX * | pixs1, | ||
| PIX * | pixs2, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | fract | ||
| ) |
Input: pixs (32 bpp rgb)
box (<optional> in which all pixels will be blended)
val (blend value; 0xrrggbb00)
fract (fraction of color to be blended with each pixel in pixs)
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place function. It blends the input color @val with the pixels in pixs in the specified rectangle. If no rectangle is specified, it blends over the entire image.
| LEPT_DLL PIX* pixBlendMask | ( | PIX * | pixd, |
| PIX * | pixs1, | ||
| PIX * | pixs2, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 | fract, | ||
| l_int32 | type | ||
| ) |
Input: pixd (<optional>; either NULL or equal to pixs1 for in-place)
pixs1 (blendee, depth > 1)
pixs2 (blender, 1 bpp; typ. smaller in size than pixs1)
x,y (origin (UL corner) of pixs2 relative to
the origin of pixs1; can be < 0)
fract (blending fraction)
type (L_BLEND_WITH_INVERSE, L_BLEND_TO_WHITE, L_BLEND_TO_BLACK)
Return: pixd if OK; null on error
Notes: (1) Clipping of pixs2 to pixs1 is done in the inner pixel loop. (2) If pixs1 has a colormap, it is removed. (3) For inplace operation (pixs1 not cmapped), call it this way: pixBlendMask(pixs1, pixs1, pixs2, ...) (4) For generating a new pixd: pixd = pixBlendMask(NULL, pixs1, pixs2, ...) (5) Only call in-place if pixs1 does not have a colormap. (6) Invalid @fract defaults to 0.5 with a warning. Invalid @type defaults to L_BLEND_WITH_INVERSE with a warning.
Input: pixs1 (8 bpp gray, rgb, rgba or colormapped)
pixs2 (8 bpp gray, rgb, rgba or colormapped)
pixg (<optional> 8 bpp gray, for transparency of pixs2;
can be null)
x, y (UL corner of pixs2 and pixg with respect to pixs1)
Return: pixd (blended image), or null on error
Notes: (1) The result is 8 bpp grayscale if both pixs1 and pixs2 are 8 bpp gray. Otherwise, the result is 32 bpp rgb. (2) pixg is an 8 bpp transparency image, where 0 is transparent and 255 is opaque. It determines the transparency of pixs2 when applied over pixs1. It can be null if pixs2 is rgba, in which case we use the alpha component of pixs2. (3) If pixg exists, it need not be the same size as pixs2. However, we assume their UL corners are aligned with each other, and placed at the location (x, y) in pixs1. (4) The pixels in pixd are a combination of those in pixs1 and pixs2, where the amount from pixs2 is proportional to the value of the pixel (p) in pixg, and the amount from pixs1 is proportional to (255 - p). Thus pixg is a transparency image (usually called an alpha blender) where each pixel can be associated with a pixel in pixs2, and determines the amount of the pixs2 pixel in the final result. For example, if pixg is all 0, pixs2 is transparent and the result in pixd is simply pixs1. (5) A typical use is for the pixs2/pixg combination to be a small watermark that is applied to pixs1.
Input: pixs (8 bpp gray or 32 bpp rgb)
spatial_stdev (> 0.0)
range_stdev (> 0.0)
Return: pixd (8 bpp or 32 bpp bilateral filtered image)
Notes: (1) See pixBilateralExact(). This provides an interface using the standard deviations of the spatial and range filters. (2) The convolution window halfwidth is 2 * spatial_stdev, and the square filter size is 4 * spatial_stdev + 1. The kernel captures 95% of total energy. This is compensated by normalization. (3) The range_stdev is analogous to spatial_halfwidth in the grayscale domain [0...255], and determines how much damping of the smoothing operation is applied across edges. The larger this value is, the smaller the damping. The smaller the value, the more edge details are preserved. These approximations are useful for deciding the appropriate cutoff. kernel[1 * stdev] ~= 0.6 * kernel[0] kernel[2 * stdev] ~= 0.14 * kernel[0] kernel[3 * stdev] ~= 0.01 * kernel[0] If range_stdev is infinite there is no damping, and this becomes a conventional gaussian smoothing. This value does not affect the run time. (4) If range_stdev is negative or zero, the range kernel is ignored and this degenerates to a straight gaussian convolution. (5) This is very slow for large spatial filters. The time on a 3GHz pentium is roughly T = 1.2 * 10^-8 * (A * sh^2) sec where A = # of pixels, sh = spatial halfwidth of filter.
Input: pix (8 or 32 bpp; or 2, 4 or 8 bpp with colormap)
wc, hc (half width/height of convolution kernel)
Return: pixd, or null on error
Notes: (1) The full width and height of the convolution kernel are (2 * wc + 1) and (2 * hc + 1) (2) Returns a copy if both wc and hc are 0 (3) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1, where (w,h) are the dimensions of pixs.
Input: pixs (1, 8 or 32 bpp) Return: accum pix (32 bpp), or null on error.
Notes: (1) The general recursion relation is a(i,j) = v(i,j) + a(i-1, j) + a(i, j-1) - a(i-1, j-1) For the first line, this reduces to the special case a(i,j) = v(i,j) + a(i, j-1) For the first column, the special case is a(i,j) = v(i,j) + a(i-1, j)
Input: pix (8 bpp)
accum pix (32 bpp; can be null)
wc, hc (half width/height of convolution kernel)
Return: pix (8 bpp), or null on error
Notes: (1) If accum pix is null, make one and destroy it before returning; otherwise, just use the input accum pix. (2) The full width and height of the convolution kernel are (2 * wc + 1) and (2 * hc + 1). (3) Returns a copy if both wc and hc are 0. (4) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1, where (w,h) are the dimensions of pixs.
Input: pixs (8 bpp gray)
pixacc (32 bpp accum pix)
wc, hc (half width/height of convolution kernel)
Return: pixd, or null on error
Notes: (1) The full width and height of the convolution kernel are (2 * wc + 1) and (2 * hc + 1) (2) Assumes that the input pixs is padded with (wc + 1) pixels on left and right, and with (hc + 1) pixels on top and bottom. The returned pix has these stripped off; they are only used for computation. (3) Returns a copy if both wc and hc are 0 (4) Require that w > 2 * wc + 1 and h > 2 * hc + 1, where (w,h) are the dimensions of pixs.
pixBlockconvGrayUnnormalized()
Input: pixs (8 bpp)
wc, hc (half width/height of convolution kernel)
Return: pix (32 bpp; containing the convolution without normalizing
for the window size), or null on error
Notes: (1) The full width and height of the convolution kernel are (2 * wc + 1) and (2 * hc + 1). (2) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1, where (w,h) are the dimensions of pixs. (3) Returns a copy if both wc and hc are 0. (3) Adds mirrored border to avoid treating the boundary pixels specially. Note that we add wc + 1 pixels to the left and wc to the right. The added width is 2 * wc + 1 pixels, and the particular choice simplifies the indexing in the loop. Likewise, add hc + 1 pixels to the top and hc to the bottom. (4) To get the normalized result, divide by the area of the convolution kernel: (2 * wc + 1) * (2 * hc + 1) Specifically, do this: pixc = pixBlockconvGrayUnnormalized(pixs, wc, hc); fract = 1. / ((2 * wc + 1) * (2 * hc + 1)); pixMultConstantGray(pixc, fract); pixd = pixGetRGBComponent(pixc, L_ALPHA_CHANNEL); (5) Unlike pixBlockconvGray(), this always computes the accumulation pix because its size is tied to wc and hc. (6) Compare this implementation with pixBlockconvGray(), where most of the code in blockconvLow() is special casing for efficiently handling the boundary. Here, the use of mirrored borders and destination indexing makes the implementation very simple.
Input: pix (8 or 32 bpp; or 2, 4 or 8 bpp with colormap)
wc, hc (half width/height of convolution kernel)
nx, ny (subdivision into tiles)
Return: pixd, or null on error
Notes: (1) The full width and height of the convolution kernel are (2 * wc + 1) and (2 * hc + 1) (2) Returns a copy if both wc and hc are 0 (3) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1, where (w,h) are the dimensions of pixs. (4) For nx == ny == 1, this defaults to pixBlockconv(), which is typically about twice as fast, and gives nearly identical results as pixBlockconvGrayTile(). (5) If the tiles are too small, nx and/or ny are reduced a minimum amount so that the tiles are expanded to the smallest workable size in the problematic direction(s). (6) Why a tiled version? Three reasons: (a) Because the accumulator is a uint32, overflow can occur for an image with more than 16M pixels. (b) The accumulator array for 16M pixels is 64 MB; using tiles reduces the size of this array. (c) Each tile can be processed independently, in parallel, on a multicore processor.
Input: pixs (1 bpp)
accum pix (<optional> 32 bpp)
wc, hc (half width/height of block sum/rank kernel)
rank (between 0.0 and 1.0; 0.5 is median filter)
Return: pixd (1 bpp)
Notes: (1) The full width and height of the convolution kernel are (2 * wc + 1) and (2 * hc + 1) (2) This returns a pixd where each pixel is a 1 if the neighborhood (2 * wc + 1) x (2 * hc + 1)) pixels contains the rank fraction of 1 pixels. Otherwise, the returned pixel is 0. Note that the special case of rank = 0.0 is always satisfied, so the returned pixd has all pixels with value 1. (3) If accum pix is null, make one, use it, and destroy it before returning; otherwise, just use the input accum pix (4) If both wc and hc are 0, returns a copy unless rank == 0.0, in which case this returns an all-ones image. (5) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1, where (w,h) are the dimensions of pixs.
Input: pixs (1 bpp)
accum pix (<optional> 32 bpp)
wc, hc (half width/height of block sum/rank kernel)
Return: pixd (8 bpp)
Notes: (1) If accum pix is null, make one and destroy it before returning; otherwise, just use the input accum pix (2) The full width and height of the convolution kernel are (2 * wc + 1) and (2 * hc + 1) (3) Use of wc = hc = 1, followed by pixInvert() on the 8 bpp result, gives a nice anti-aliased, and somewhat darkened, result on text. (4) Require that w >= 2 * wc + 1 and h >= 2 * hc + 1, where (w,h) are the dimensions of pixs. (5) Returns in each dest pixel the sum of all src pixels that are within a block of size of the kernel, centered on the dest pixel. This sum is the number of src ON pixels in the block at each location, normalized to 255 for a block containing all ON pixels. For pixels near the boundary, where the block is not entirely contained within the image, we then multiply by a second normalization factor that is greater than one, so that all results are normalized by the number of participating pixels within the block.
Input: pixs (8 bpp)
halfsize (of square over which neighbors are averaged)
accum pix (<optional> 32 bpp)
Return: pixd (1 bpp)
Notes: (1) The Census transform was invented by Ramin Zabih and John Woodfill ("Non-parametric local transforms for computing visual correspondence", Third European Conference on Computer Vision, Stockholm, Sweden, May 1994); see publications at http://www.cs.cornell.edu/~rdz/index.htm This compares each pixel against the average of its neighbors, in a square of odd dimension centered on the pixel. If the pixel is greater than the average of its neighbors, the output pixel value is 1; otherwise it is 0. (2) This can be used as an encoding for an image that is fairly robust against slow illumination changes, with applications in image comparison and mosaicing. (3) The size of the convolution kernel is (2 * halfsize + 1) on a side. The halfsize parameter must be >= 1. (4) If accum pix is null, make one, use it, and destroy it before returning; otherwise, just use the input accum pix
| LEPT_DLL l_int32 pixCentroid | ( | PIX * | pix, |
| l_int32 * | centtab, | ||
| l_int32 * | sumtab, | ||
| l_float32 * | pxave, | ||
| l_float32 * | pyave | ||
| ) |
Input: pix (1 or 8 bpp)
centtab (<optional> table for finding centroids; can be null)
sumtab (<optional> table for finding pixel sums; can be null)
&xave, &yave (<return> coordinates of centroid, relative to
the UL corner of the pix)
Return: 0 if OK, 1 on error
Notes: (1) Any table not passed in will be made internally and destroyed after use.
Input: pixs (8 bpp)
factor (subsampling; >= 1)
&cx (<return> x value of centroid)
&cy (<return> y value of centroid)
Return: 0 if OK, 1 on error
Notes: (1) This first does a photometric inversion (black = 255, white = 0). It then finds the centroid of the result. The inversion is done because white is usually background, so the centroid is computed based on the "foreground" gray pixels, and the darker the pixel, the more weight it is given.
Input: pix Return: output format, or 0 on error
Notes: (1) This should only be called if the requested format is IFF_DEFAULT. (2) If the pix wasn't read from a file, its input format value will be IFF_UNKNOWN, and in that case it is written out in a compressed but lossless format.
| LEPT_DLL PIX* pixCleanBackgroundToWhite | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| PIX * | pixg, | ||
| l_float32 | gamma, | ||
| l_int32 | blackval, | ||
| l_int32 | whiteval | ||
| ) |
Input: pixs (8 bpp grayscale or 32 bpp rgb)
pixim (<optional> 1 bpp 'image' mask; can be null)
pixg (<optional> 8 bpp grayscale version; can be null)
gamma (gamma correction; must be > 0.0; typically ~1.0)
blackval (dark value to set to black (0))
whiteval (light value to set to white (255))
Return: pixd (8 bpp or 32 bpp rgb), or null on error
Notes: (1) This is a simplified interface for cleaning an image. For comparison, see pixAdaptThresholdToBinaryGen(). (2) The suggested default values for the input parameters are: gamma: 1.0 (reduce this to increase the contrast; e.g., for light text) blackval 70 (a bit more than 60) whiteval 190 (a bit less than 200)
Input: pix (8 bpp, no colormap)
lineptrs (ptrs to the beginning of each raster line of data)
Return: 0 if OK, 1 on error
Notes: (1) This must be called after processing that was initiated by pixSetupByteProcessing() has finished.
Input: pix (all depths; use cmapped with caution) Return: 0 if OK, 1 on error
Notes: (1) Clears all data to 0. For 1 bpp, this is white; for grayscale or color, this is black. (2) Caution: for colormapped pix, this sets the color to the first one in the colormap. Be sure that this is the intended color!
Input: pix (all depths; can be cmapped)
box (in which all pixels will be cleared)
Return: 0 if OK, 1 on error
Notes: (1) Clears all data in rect to 0. For 1 bpp, this is white; for grayscale or color, this is black. (2) Caution: for colormapped pix, this sets the color to the first one in the colormap. Be sure that this is the intended color!
Input: pix
(x,y) pixel coords
Return: 0 if OK; 1 on error.
| LEPT_DLL l_int32 pixClipBoxToEdges | ( | PIX * | pixs, |
| BOX * | boxs, | ||
| l_int32 | lowthresh, | ||
| l_int32 | highthresh, | ||
| l_int32 | maxwidth, | ||
| l_int32 | factor, | ||
| PIX ** | ppixd, | ||
| BOX ** | pboxd | ||
| ) |
Input: pixs (1 bpp)
boxs (<optional> ; use full image if null)
lowthresh (threshold to choose clipping location)
highthresh (threshold required to find an edge)
maxwidth (max allowed width between low and high thresh locs)
factor (sampling factor along pixel counting direction)
&pixd (<optional return> clipped pix returned)
&boxd (<optional return> bounding box)
Return: 0 if OK; 1 on error or if a fg edge is not found from
all four sides.
Notes: (1) At least one of {&pixd, &boxd} must be specified. (2) If there are no fg pixels, the returned ptrs are null. (3) This function attempts to locate rectangular "image" regions of high-density fg pixels, that have well-defined edges on the four sides. (4) Edges are searched for on each side, iterating in order from left, right, top and bottom. As each new edge is found, the search box is resized to use that location. Once an edge is found, it is held. If no more edges are found in one iteration, the search fails. (5) See pixScanForEdge() for usage of the thresholds and @maxwidth. (6) The thresholds must be at least 1, and the low threshold cannot be larger than the high threshold. (7) If the low and high thresholds are both 1, this is equivalent to pixClipBoxToForeground().
Input: pixs (1 bpp)
boxs (<optional> ; use full image if null)
&pixd (<optional return> clipped pix returned)
&boxd (<optional return> bounding box)
Return: 0 if OK; 1 on error or if there are no fg pixels
Notes: (1) At least one of {&pixd, &boxd} must be specified. (2) If there are no fg pixels, the returned ptrs are null. (3) Do not use &pixs for the 3rd arg or &boxs for the 4th arg; this will leak memory.
Input: pixs (1, 2, 4, 8, 16, 32 bpp; colormap ok)
pixm (clipping mask, 1 bpp)
x, y (origin of clipping mask relative to pixs)
outval (val to use for pixels that are outside the mask)
Return: pixd, (clipped pix) or null on error or if pixm doesn't
intersect pixs
Notes: (1) If pixs has a colormap, it is preserved in pixd. (2) The depth of pixd is the same as that of pixs. (3) If the depth of pixs is 1, use @outval = 0 for white background and 1 for black; otherwise, use the max value for white and 0 for black. If pixs has a colormap, the max value for @outval is 0xffffffff; otherwise, it is 2^d - 1. (4) When using 1 bpp pixs, this is a simple clip and blend operation. For example, if both pix1 and pix2 are black text on white background, and you want to OR the fg on the two images, let pixm be the inverse of pix2. Then the operation takes all of pix1 that's in the bg of pix2, and for the remainder (which are the pixels corresponding to the fg of the pix2), paint them black (1) in pix1. The function call looks like pixClipMasked(pix2, pixInvert(pix1, pix1), x, y, 1);
Input: pixs
box (requested clipping region; const)
&boxc (<optional return> actual box of clipped region)
Return: clipped pix, or null on error or if rectangle
doesn't intersect pixs
Notes:
This should be simple, but there are choices to be made. The box is defined relative to the pix coordinates. However, if the box is not contained within the pix, we have two choices:
(1) clip the box to the pix
(2) make a new pix equal to the full box dimensions,
but let rasterop do the clipping and positioning
of the src with respect to the dest
Choice (2) immediately brings up the problem of what pixel values to use that were not taken from the src. For example, on a grayscale image, do you want the pixels not taken from the src to be black or white or something else? To implement choice 2, one needs to specify the color of these extra pixels.
So we adopt (1), and clip the box first, if necessary, before making the dest pix and doing the rasterop. But there is another issue to consider. If you want to paste the clipped pix back into pixs, it must be properly aligned, and it is necessary to use the clipped box for alignment. Accordingly, this function has a third (optional) argument, which is the input box clipped to the src pix.
Input: pixs
boxa (requested clipping regions)
Return: pixa (consisting of requested regions), or null on error
Notes: (1) The returned pixa includes the actual regions clipped out from the input pixs.
Input: pixs (1 bpp)
&pixd (<optional return> clipped pix returned)
&box (<optional return> bounding box)
Return: 0 if OK; 1 on error or if there are no fg pixels
Notes: (1) At least one of {&pixd, &box} must be specified. (2) If there are no fg pixels, the returned ptrs are null.
Input: pix Return: same pix (ptr), or null on error
Notes: (1) A "clone" is simply a handle (ptr) to an existing pix. It is implemented because (a) images can be large and hence expensive to copy, and (b) extra handles to a data structure need to be made with a simple policy to avoid both double frees and memory leaks. Pix are reference counted. The side effect of pixClone() is an increase by 1 in the ref count. (2) The protocol to be used is: (a) Whenever you want a new handle to an existing image, call pixClone(), which just bumps a ref count. (b) Always call pixDestroy() on all handles. This decrements the ref count, nulls the handle, and only destroys the pix when pixDestroy() has been called on all handles.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
sel
Return: pixd
Notes: (1) Generic morphological closing, using hits in the Sel. (2) This implementation is a strict dual of the opening if symmetric boundary conditions are used (see notes at top of this file). (3) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (4) For clarity, if the case is known, use these patterns: (a) pixd = pixClose(NULL, pixs, ...); (b) pixClose(pixs, pixs, ...); (c) pixClose(pixd, pixs, ...); (5) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do separably if both hsize and vsize are > 1. (4) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (5) For clarity, if the case is known, use these patterns: (a) pixd = pixCloseBrick(NULL, pixs, ...); (b) pixCloseBrick(pixs, pixs, ...); (c) pixCloseBrick(pixd, pixs, ...); (6) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) This is a 'safe' closing; we add an extra border of 32 OFF pixels for the standard asymmetric b.c. (2) These implement 2D brick Sels, using linear Sels generated with selaAddBasic(). (3) A brick Sel has hits for all elements. (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2) (5) Do separably if both hsize and vsize are > 1. (6) It is necessary that both horizontal and vertical Sels of the input size are defined in the basic sela. (7) Note that we must always set or clear the border pixels before each operation, depending on the the b.c. (symmetric or asymmetric). (8) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (9) For clarity, if the case is known, use these patterns: (a) pixd = pixCloseBrickDwa(NULL, pixs, ...); (b) pixCloseBrickDwa(pixs, pixs, ...); (c) pixCloseBrickDwa(pixd, pixs, ...); (10) The size of the result is determined by pixs. (11) If either linear Sel is not found, this calls the appropriate decomposible function.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do compositely for each dimension > 1. (4) Do separably if both hsize and vsize are > 1. (5) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (6) For clarity, if the case is known, use these patterns: (a) pixd = pixCloseCompBrick(NULL, pixs, ...); (b) pixCloseCompBrick(pixs, pixs, ...); (c) pixCloseCompBrick(pixd, pixs, ...); (7) The dimensions of the resulting image are determined by pixs. (8) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) This implements a separable composite safe closing with 2D brick Sels. (2) For efficiency, it may decompose each linear morphological operation into two (brick + comb). (3) A brick Sel has hits for all elements. (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2) (5) Do separably if both hsize and vsize are > 1. (6) It is necessary that both horizontal and vertical Sels of the input size are defined in the basic sela. (7) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (8) For clarity, if the case is known, use these patterns: (a) pixd = pixCloseCompBrickDwa(NULL, pixs, ...); (b) pixCloseCompBrickDwa(pixs, pixs, ...); (c) pixCloseCompBrickDwa(pixd, pixs, ...); (9) The size of pixd is determined by pixs. (10) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
(1) There are three cases:
(a) pixd == null (result into new pixd)
(b) pixd == pixs (in-place; writes result back to pixs)
(c) pixd != pixs (puts result into existing pixd)
(2) There is no need to call this directly: pixCloseCompBrickDwa()
calls this function if either brick dimension exceeds 63.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
sel
Return: pixd
Notes: (1) Generalized morphological closing, using both hits and misses in the Sel. (2) This does a dilation using the hits, followed by a hit-miss transform. (3) This operation is a dual of the generalized opening. (4) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (5) For clarity, if the case is known, use these patterns: (a) pixd = pixCloseGeneralized(NULL, pixs, ...); (b) pixCloseGeneralized(pixs, pixs, ...); (c) pixCloseGeneralized(pixd, pixs, ...); (6) The size of the result is determined by pixs.
Input: pixs
hsize (of Sel; must be odd; origin implicitly in center)
vsize (ditto)
Return: pixd
Notes: (1) Sel is a brick with all elements being hits (2) If hsize = vsize = 1, just returns a copy.
Input: pixs (8 bpp, not cmapped)
hsize (1 or 3)
vsize (1 or 3)
Return: pixd, or null on error
Notes: (1) Special case for 1x3, 3x1 or 3x3 brick sel (all hits) (2) If hsize = vsize = 1, just returns a copy.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
sel
Return: pixd
Notes: (1) Generic morphological closing, using hits in the Sel. (2) If non-symmetric boundary conditions are used, this function adds a border of OFF pixels that is of sufficient size to avoid losing pixels from the dilation, and it removes the border after the operation is finished. It thus enforces a correct extensive result for closing. (3) If symmetric b.c. are used, it is not necessary to add and remove this border. (4) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (5) For clarity, if the case is known, use these patterns: (a) pixd = pixCloseSafe(NULL, pixs, ...); (b) pixCloseSafe(pixs, pixs, ...); (c) pixCloseSafe(pixd, pixs, ...); (6) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do separably if both hsize and vsize are > 1. (4) Safe closing adds a border of 0 pixels, of sufficient size so that all pixels in input image are processed within 32-bit words in the expanded image. As a result, there is no special processing for pixels near the boundary, and there are no boundary effects. The border is removed at the end. (5) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (6) For clarity, if the case is known, use these patterns: (a) pixd = pixCloseBrick(NULL, pixs, ...); (b) pixCloseBrick(pixs, pixs, ...); (c) pixCloseBrick(pixd, pixs, ...); (7) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do compositely for each dimension > 1. (4) Do separably if both hsize and vsize are > 1. (5) Safe closing adds a border of 0 pixels, of sufficient size so that all pixels in input image are processed within 32-bit words in the expanded image. As a result, there is no special processing for pixels near the boundary, and there are no boundary effects. The border is removed at the end. (6) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (7) For clarity, if the case is known, use these patterns: (a) pixd = pixCloseSafeCompBrick(NULL, pixs, ...); (b) pixCloseSafeCompBrick(pixs, pixs, ...); (c) pixCloseSafeCompBrick(pixd, pixs, ...); (8) The dimensions of the resulting image are determined by pixs. (9) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: cmap
color (0 for black, 1 for white)
&index (<optional return> index of color; can be null)
Return: 0 if OK, 1 on error
Notes: (1) This only adds color if not already there. (2) The alpha component is 255 (opaque) (3) This sets index to the requested color. (4) If there is no room in the colormap, returns the index of the closest color.
Input: cmap
rval, gval, bval (colormap entry to be added; each number
is in range [0, ... 255])
Return: 0 if OK, 1 on error
Notes: (1) This always adds the color if there is room. (2) The alpha component is 255 (opaque)
| LEPT_DLL l_int32 pixcmapAddNearestColor | ( | PIXCMAP * | cmap, |
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 * | pindex | ||
| ) |
Input: cmap
rval, gval, bval (colormap entry to be added; each number
is in range [0, ... 255])
&index (<return> index of color)
Return: 0 if OK, 1 on error
Notes: (1) This only adds color if not already there. (2) The alpha component is 255 (opaque) (3) If it's not in the colormap and there is no room to add another color, this returns the index of the nearest color.
| LEPT_DLL l_int32 pixcmapAddNewColor | ( | PIXCMAP * | cmap, |
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 * | pindex | ||
| ) |
Input: cmap
rval, gval, bval (colormap entry to be added; each number
is in range [0, ... 255])
&index (<return> index of color)
Return: 0 if OK, 1 on error; 2 if unable to add color
Notes: (1) This only adds color if not already there. (2) The alpha component is 255 (opaque) (3) This returns the index of the new (or existing) color. (4) Returns 2 with a warning if unable to add this color; the caller should check the return value.
| LEPT_DLL l_int32 pixcmapAddRGBA | ( | PIXCMAP * | cmap, |
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 | aval | ||
| ) |
Input: cmap
rval, gval, bval, aval (colormap entry to be added;
each number is in range [0, ... 255])
Return: 0 if OK, 1 on error
Notes: (1) This always adds the color if there is room.
Input: cmap Return: 0 if OK, 1 on error
Note: this removes the colors by setting the count to 0.
| LEPT_DLL PIXCMAP* pixcmapColorToGray | ( | PIXCMAP * | cmaps, |
| l_float32 | rwt, | ||
| l_float32 | gwt, | ||
| l_float32 | bwt | ||
| ) |
Input: cmap
rwt, gwt, bwt (non-negative; these should add to 1.0)
Return: cmap (gray), or null on error
Notes: (1) This creates a gray colormap from an arbitrary colormap. (2) In use, attach the output gray colormap to the pix (or a copy of it) that provided the input colormap.
Input: colormap
factor (generally between 0.0 (no enhancement)
and 1.0, but can be larger than 1.0)
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place transform (2) See pixContrastTRC() and numaContrastTRC() in enhance.c for description and use of transform
Input: colormap Return: 0 if OK; 1 on error
Notes:
Input: colormap Return: 0 if OK; 1 on error
Notes:
Input: colormap Return: 0 if OK; 1 on error
Notes:
Input: data (binary serialized data)
ncolors (in colormap)
Return: hexdata (bracketed, space-separated ascii hex string),
or null on error.
Notes: (1) The number of bytes in @data is 3 * ncolors. (2) Output is in form: < r0g0b0 r1g1b1 ... rngnbn > where r0, g0, b0 ... are each 2 bytes of hex ascii (3) This is used in pdf files to express the colormap as an array in ascii (human-readable) format.
Input: colormap Return: 0 if OK; 1 on error
Notes:
Input: cmaps Return: cmapd, or null on error
Input: cmap
&ngray (<return> number of gray colors)
Return: 0 if OK, 1 on error
Notes: (1) This counts the unique gray colors, including black and white.
Input: depth (bpp, of pix) Return: cmap, or null on error
Input: d (depth of pix for this colormap; 1, 2, 4 or 8)
nlevels (valid in range [2, 2^d])
Return: cmap, or null on error
Notes: (1) Colormap has equally spaced gray color values from black (0, 0, 0) to white (255, 255, 255).
Input: depth (bpp, of pix; 2, 4 or 8)
hasblack (1 if the first color is black; 0 if no black)
haswhite (1 if the last color is white; 0 if no white)
Return: cmap, or null on error
Notes: (1) This sets up a colormap with random colors, where the first color is optionally black, the last color is optionally white, and the remaining colors are chosen randomly. (2) The number of randomly chosen colors is: 2^(depth) - haswhite - hasblack (3) Because rand() is seeded, it might disrupt otherwise deterministic results if also used elsewhere in a program. (4) rand() is not threadsafe, and will generate garbage if run on multiple threads at once – though garbage is generally what you want from a random number generator! (5) Modern rand()s have equal randomness in low and high order bits, but older ones don't. Here, we're just using rand() to choose colors for output.
pixcmapDeserializeFromMemory()
Input: data (binary string, 3 or 4 bytes per color)
cpc (components/color: 3 for rgb, 4 for rgba)
ncolors
Return: cmap, or null on error
Input: &cmap (<set to null>) Return: void
| LEPT_DLL l_int32 pixcmapGammaTRC | ( | PIXCMAP * | cmap, |
| l_float32 | gamma, | ||
| l_int32 | minval, | ||
| l_int32 | maxval | ||
| ) |
Input: colormap
gamma (gamma correction; must be > 0.0)
minval (input value that gives 0 for output; can be < 0)
maxval (input value that gives 255 for output; can be > 255)
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place transform (2) See pixGammaTRC() and numaGammaTRC() in enhance.c for description and use of transform
| LEPT_DLL l_int32 pixcmapGetColor | ( | PIXCMAP * | cmap, |
| l_int32 | index, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: cmap
index
&rval, &gval, &bval (<return> each color value)
Return: 0 if OK, 1 if not accessible (caller should check)
Input: cmap
index
&val32 (<return> 32-bit rgb color value)
Return: 0 if OK, 1 if not accessible (caller should check)
Notes: (1) The returned alpha channel value is 255.
| LEPT_DLL l_int32 pixcmapGetComponentRange | ( | PIXCMAP * | cmap, |
| l_int32 | color, | ||
| l_int32 * | pminval, | ||
| l_int32 * | pmaxval | ||
| ) |
Input: cmap
color (L_SELECT_RED, L_SELECT_GREEN or L_SELECT_BLUE)
&minval (<optional return> minimum value of component)
&maxval (<optional return> minimum value of component)
Return: 0 if OK, 1 on error
Notes: (1) Returns for selected components the extreme value (either min or max) of the color component that is found in the colormap.
Input: cmap Return: count, or 0 on error
Input: cmap Return: depth, or 0 on error
| LEPT_DLL l_int32 pixcmapGetExtremeValue | ( | PIXCMAP * | cmap, |
| l_int32 | type, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: cmap
type (L_SELECT_MIN or L_SELECT_MAX)
&rval (<optional return> red component)
&gval (<optional return> green component)
&bval (<optional return> blue component)
Return: 0 if OK, 1 on error
Notes: (1) Returns for selected components the extreme value (either min or max) of the color component that is found in the colormap.
Input: cmap Return: free entries, or 0 on error
| LEPT_DLL l_int32 pixcmapGetIndex | ( | PIXCMAP * | cmap, |
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 * | pindex | ||
| ) |
Input: cmap
rval, gval, bval (colormap colors to search for; each number
is in range [0, ... 255])
&index (<return>)
Return: 0 if found, 1 if not found (caller must check)
Input: cmap
&mindepth (<return> minimum depth to support the colormap)
Return: 0 if OK, 1 on error
Notes: (1) On error, &mindepth is returned as 0.
Input: cmap
val (gray value to search for; in range [0, ... 255])
&index (<return> the index of the nearest color)
Return: 0 if OK, 1 on error (caller must check)
Notes: (1) This should be used on gray colormaps. It uses only the green value of the colormap. (2) Returns the index of the exact color if possible, otherwise the index of the color closest to the target color.
| LEPT_DLL l_int32 pixcmapGetNearestIndex | ( | PIXCMAP * | cmap, |
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 * | pindex | ||
| ) |
Input: cmap
rval, gval, bval (colormap colors to search for; each number
is in range [0, ... 255])
&index (<return> the index of the nearest color)
Return: 0 if OK, 1 on error (caller must check)
Notes: (1) Returns the index of the exact color if possible, otherwise the index of the color closest to the target color. (2) Nearest color is that which is the least sum-of-squares distance from the target color.
Input: cmap
rankval (0.0 for darkest, 1.0 for lightest color)
&index (<return> the index into the colormap that
corresponds to the rank intensity color)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixcmapGetRGBA | ( | PIXCMAP * | cmap, |
| l_int32 | index, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval, | ||
| l_int32 * | paval | ||
| ) |
Input: cmap
index
&rval, &gval, &bval, &aval (<return> each color value)
Return: 0 if OK, 1 if not accessible (caller should check)
Input: cmap
index
&val32 (<return> 32-bit rgba color value)
Return: 0 if OK, 1 if not accessible (caller should check)
Input: color Return: cmap, or null on error
Notes: (1) This creates a colormap that maps from gray to a specific color. In the mapping, each component is faded to white, depending on the gray value. (2) In use, this is simply attached to a grayscale pix to give it the input color.
Input: cmap
&color (<return> TRUE if cmap has color; FALSE otherwise)
Return: 0 if OK, 1 on error
Input: cmap
&blackwhite (<return> TRUE if the cmap has only two colors:
black (0,0,0) and white (255,255,255))
Return: 0 if OK, 1 on error
Input: cmap
&opaque (<return> TRUE if fully opaque: all entries are 255)
Return: 0 if OK, 1 on error
Input: filename Return: cmap, or null on error
Input: stream Return: cmap, or null on error
| LEPT_DLL l_int32 pixcmapResetColor | ( | PIXCMAP * | cmap, |
| l_int32 | index, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: cmap
index
rval, gval, bval (colormap entry to be reset; each number
is in range [0, ... 255])
Return: 0 if OK, 1 if not accessible (caller should check)
Notes: (1) This resets sets the color of an entry that has already been set and included in the count of colors. (2) The alpha component is 255 (opaque)
| LEPT_DLL l_int32 pixcmapSerializeToMemory | ( | PIXCMAP * | cmap, |
| l_int32 | cpc, | ||
| l_int32 * | pncolors, | ||
| l_uint8 ** | pdata | ||
| ) |
Input: colormap
cpc (components/color: 3 for rgb, 4 for rgba)
&ncolors (<return> number of colors in table)
&data (<return> binary string, cpc bytes per color)
Return: 0 if OK; 1 on error
Notes: (1) When serializing to store in a pdf, use @cpc = 3.
Input: cmap
index
aval (in range [0, ... 255])
Return: 0 if OK, 1 on error
Notes: (1) This modifies the transparency of one entry in a colormap. The alpha component by default is 255 (opaque). This is used when extracting the colormap from a PNG file without decoding the image.
Input: cmap
setblack (0 for no operation; 1 to set darkest color to black)
setwhite (0 for no operation; 1 to set lightest color to white)
Return: 0 if OK, 1 on error
Input: colormap
srcval (source color: 0xrrggbb00)
dstval (target color: 0xrrggbb00)
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place transform (2) It implements pixelShiftByComponent() for each color. The mapping is specified by srcval and dstval. (3) If a component decreases, the component in the colormap decreases by the same ratio. Likewise for increasing, except all ratios are taken with respect to the distance from 255.
Input: colormap
fraction (between -1.0 and +1.0)
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place transform (2) It does a proportional shift of the intensity for each color. (3) If fraction < 0.0, it moves all colors towards (0,0,0). This darkens the image. If fraction > 0.0, it moves all colors towards (255,255,255) This fades the image. (4) The equivalent transform can be accomplished with pixcmapGammaTRC(), but it is considerably more difficult (see numaGammaTRC()).
| LEPT_DLL l_int32 pixcmapToArrays | ( | PIXCMAP * | cmap, |
| l_int32 ** | prmap, | ||
| l_int32 ** | pgmap, | ||
| l_int32 ** | pbmap, | ||
| l_int32 ** | pamap | ||
| ) |
Input: colormap
&rmap, &gmap, &bmap (<return> colormap arrays)
&amap (<optional return> alpha array)
Return: 0 if OK; 1 on error
Input: cmap
level (significant bits for each of RGB; valid in [1...6])
metric (L_MANHATTAN_DISTANCE, L_EUCLIDEAN_DISTANCE)
Return: tab[2**(3 * level)]
Notes: (1) This function is used to quickly find the colormap color that is closest to any rgb color. It is used to assign rgb colors to an existing colormap. It can be very expensive to search through the entire colormap for the closest color to each pixel. Instead, we first set up this table, which is populated by the colormap index nearest to each octcube color. Then we go through the image; for each pixel, do two table lookups: first to generate the octcube index from rgb and second to use this table to read out the colormap index. (2) Do a slight modification for white and black. For level = 4, each octcube size is 16. The center of the whitest octcube is at (248, 248, 248), which is closer to 242 than 255. Consequently, any gray color between 242 and 254 will be selected, even if white (255, 255, 255) exists. This is typically not optimal, because the original color was likely white. Therefore, if white exists in the colormap, use it for any rgb color that falls into the most white octcube. Do the similar thing for black. (3) Here are the actual function calls for quantizing to a specified colormap:
Input: colormap
&tab (<return> table of rgba values for the colormap)
&ncolors (<optional return> size of table)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixcmapUsableColor | ( | PIXCMAP * | cmap, |
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 * | pusable | ||
| ) |
Input: cmap
rval, gval, bval (colormap entry to be added; each number
is in range [0, ... 255])
usable (<return> 1 if usable; 0 if not)
Return: 0 if OK, 1 on error
Notes: (1) This checks if the color already exists or if there is room to add it. It makes no change in the colormap.
Input: filename
cmap
Return: 0 if OK, 1 on error
Input: stream, cmap Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixColorContent | ( | PIX * | pixs, |
| l_int32 | rwhite, | ||
| l_int32 | gwhite, | ||
| l_int32 | bwhite, | ||
| l_int32 | mingray, | ||
| PIX ** | ppixr, | ||
| PIX ** | ppixg, | ||
| PIX ** | ppixb | ||
| ) |
Input: pixs (32 bpp rgb or 8 bpp colormapped)
rwhite, gwhite, bwhite (color value associated with white point)
mingray (min gray value for which color is measured)
&pixr (<optional return> 8 bpp red 'content')
&pixg (<optional return> 8 bpp green 'content')
&pixb (<optional return> 8 bpp blue 'content')
Return: 0 if OK, 1 on error
Notes: (1) This returns the color content in each component, which is a measure of the deviation from gray, and is defined as the difference between the component and the average of the other two components. See the discussion at the top of this file. (2) The three numbers (rwhite, gwhite and bwhite) can be thought of as the values in the image corresponding to white. They are used to compensate for an unbalanced color white point. They must either be all 0 or all non-zero. To turn this off, set them all to 0. (3) If the maximum component after white point correction, max(r,g,b), is less than mingray, all color components for that pixel are set to zero. Use mingray = 0 to turn off this filtering of dark pixels. (4) Therefore, use 0 for all four input parameters if the color magnitude is to be calculated without either white balance correction or dark filtering.
| LEPT_DLL l_int32 pixColorFraction | ( | PIX * | pixs, |
| l_int32 | darkthresh, | ||
| l_int32 | lightthresh, | ||
| l_int32 | diffthresh, | ||
| l_int32 | factor, | ||
| l_float32 * | ppixfract, | ||
| l_float32 * | pcolorfract | ||
| ) |
Input: pixs (32 bpp rgb)
darkthresh (threshold near black; if the lightest component
is below this, the pixel is not considered in
the statistics; typ. 20)
lightthresh (threshold near white; if the darkest component
is above this, the pixel is not considered in
the statistics; typ. 244)
diffthresh (thresh for the maximum difference between
component value; below this the pixel is not
considered to have sufficient color)
factor (subsampling factor)
&pixfract (<return> fraction of pixels in intermediate
brightness range that were considered
for color content)
&colorfract (<return> fraction of pixels that meet the
criterion for sufficient color; 0.0 on error)
Return: 0 if OK, 1 on error
Notes: (1) This function is asking the question: to what extent does the image appear to have color? The amount of color a pixel appears to have depends on both the deviation of the individual components from their average and on the average intensity itself. For example, the color will be much more obvious with a small deviation from white than the same deviation from black. (2) Any pixel that meets these three tests is considered a colorful pixel: (a) the lightest component must equal or exceed @darkthresh (b) the darkest component must not exceed @lightthresh (c) the max difference between components must equal or exceed @diffthresh. (3) The dark pixels are removed from consideration because they don't appear to have color. (4) The very lightest pixels are removed because if an image has a lot of "white", the color fraction will be artificially low, even if all the other pixels are colorful. (5) If pixfract is very small, there are few pixels that are neither black nor white. If colorfract is very small, the pixels that are neither black nor white have very little color content. The product 'pixfract * colorfract' gives the fraction of pixels with significant color content. (6) One use of this function is as a preprocessing step for median cut quantization (colorquant2.c), which does a very poor job splitting the color space into rectangular volume elements when all the pixels are near the diagonal of the color cube. For octree quantization of an image with only gray values, the 2^(level) octcubes on the diagonal are the only ones that can be occupied.
| LEPT_DLL l_int32 pixColorGray | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (8 bpp gray, rgb or colormapped image)
box (<optional> region in which to apply color; can be NULL)
type (L_PAINT_LIGHT, L_PAINT_DARK)
thresh (average value below/above which pixel is unchanged)
rval, gval, bval (new color to paint)
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place operation; pixs is modified. If pixs is colormapped, the operation will add colors to the colormap. Otherwise, pixs will be converted to 32 bpp rgb if it is initially 8 bpp gray. (2) If type == L_PAINT_LIGHT, it colorizes non-black pixels, preserving antialiasing. If type == L_PAINT_DARK, it colorizes non-white pixels, preserving antialiasing. (3) If box is NULL, applies function to the entire image; otherwise, clips the operation to the intersection of the box and pix. (4) If colormapped, calls pixColorGrayCmap(), which applies the coloring algorithm only to pixels that are strictly gray. (5) For RGB, determines a "gray" value by averaging; then uses this value, plus the input rgb target, to generate the output pixel values. (6) thresh is only used for rgb; it is ignored for colormapped pix. If type == L_PAINT_LIGHT, use thresh = 0 if all pixels are to be colored (black pixels will be unaltered). In situations where there are a lot of black pixels, setting thresh > 0 will make the function considerably more efficient without affecting the final result. If type == L_PAINT_DARK, use thresh = 255 if all pixels are to be colored (white pixels will be unaltered). In situations where there are a lot of white pixels, setting thresh < 255 will make the function considerably more efficient without affecting the final result.
| LEPT_DLL l_int32 pixColorGrayCmap | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_int32 | type, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (2, 4 or 8 bpp, with colormap)
box (<optional> region to set color; can be NULL)
type (L_PAINT_LIGHT, L_PAINT_DARK)
rval, gval, bval (target color)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place operation. (2) If type == L_PAINT_LIGHT, it colorizes non-black pixels, preserving antialiasing. If type == L_PAINT_DARK, it colorizes non-white pixels, preserving antialiasing. (3) box gives the region to apply color; if NULL, this colorizes the entire image. (4) If the cmap is only 2 or 4 bpp, pixs is converted in-place to an 8 bpp cmap. A 1 bpp cmap is not a valid input pix. (5) This can also be called through pixColorGray(). (6) This operation increases the colormap size by the number of different gray (non-black or non-white) colors in the input colormap. If there is not enough room in the colormap for this expansion, it returns 1 (error), and the caller should check the return value. (7) Using the darkness of each original pixel in the rect, it generates a new color (based on the input rgb values). If type == L_PAINT_LIGHT, the new color is a (generally) darken-to-black version of the input rgb color, where the amount of darkening increases with the darkness of the original pixel color. If type == L_PAINT_DARK, the new color is a (generally) faded-to-white version of the input rgb color, where the amount of fading increases with the brightness of the original pixel color.
| LEPT_DLL PIX* pixColorGrayMasked | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (8 bpp gray, rgb or colormapped image)
pixm (1 bpp mask, through which to apply color)
type (L_PAINT_LIGHT, L_PAINT_DARK)
thresh (average value below/above which pixel is unchanged)
rval, gval, bval (new color to paint)
Return: pixd (colorized), or null on error
Notes: (1) This generates a new image, where some of the pixels under FG in the mask are colorized. (2) See pixColorGray() for usage with @type and @thresh. Note that @thresh is only used for rgb; it is ignored for colormapped images. In most cases, the mask will be over the darker parts and @type == L_PAINT_DARK. (3) If pixs is colormapped this calls pixColorMaskedCmap(), which adds colors to the colormap for pixd; it only adds colors corresponding to strictly gray colors in the colormap. Otherwise, if pixs is 8 bpp gray, pixd will be 32 bpp rgb. (4) If pixs is 32 bpp rgb, for each pixel a "gray" value is found by averaging. This average is then used with the input rgb target to generate the output pixel values. (5) This can be used in conjunction with pixFindColorRegions() to add highlight color to a grayscale image.
| LEPT_DLL l_int32 pixColorGrayMaskedCmap | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | type, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (8 bpp, with colormap)
pixm (1 bpp mask, through which to apply color)
type (L_PAINT_LIGHT, L_PAINT_DARK)
rval, gval, bval (target color)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place operation. (2) If type == L_PAINT_LIGHT, it colorizes non-black pixels, preserving antialiasing. If type == L_PAINT_DARK, it colorizes non-white pixels, preserving antialiasing. See pixColorGrayCmap() for details. (3) This increases the colormap size by the number of different gray (non-black or non-white) colors in the input colormap. If there is not enough room in the colormap for this expansion, it returns 1 (error).
| LEPT_DLL PIX* pixColorGrayRegions | ( | PIX * | pixs, |
| BOXA * | boxa, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (2, 4 or 8 bpp gray, rgb, or colormapped)
boxa (of regions in which to apply color)
type (L_PAINT_LIGHT, L_PAINT_DARK)
thresh (average value below/above which pixel is unchanged)
rval, gval, bval (new color to paint)
Return: pixd, or null on error
Notes: (1) This generates a new image, where some of the pixels in each box in the boxa are colorized. See pixColorGray() for usage with @type and @thresh. Note that @thresh is only used for rgb; it is ignored for colormapped images. (2) If the input image is colormapped, the new image will be 8 bpp colormapped if possible; otherwise, it will be converted to 32 bpp rgb. Only pixels that are strictly gray will be colorized. (3) If the input image is not colormapped, it is converted to rgb. A "gray" value for a pixel is determined by averaging the components, and the output rgb value is determined from this. (4) This can be used in conjunction with pixFindColorRegions() to add highlight color to a grayscale image.
| LEPT_DLL l_int32 pixColorGrayRegionsCmap | ( | PIX * | pixs, |
| BOXA * | boxa, | ||
| l_int32 | type, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (8 bpp, with colormap)
boxa (of regions in which to apply color)
type (L_PAINT_LIGHT, L_PAINT_DARK)
rval, gval, bval (target color)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place operation. (2) If type == L_PAINT_LIGHT, it colorizes non-black pixels, preserving antialiasing. If type == L_PAINT_DARK, it colorizes non-white pixels, preserving antialiasing. See pixColorGrayCmap() for details. (3) This can also be called through pixColorGrayRegions(). (4) This increases the colormap size by the number of different gray (non-black or non-white) colors in the selected regions of pixs. If there is not enough room in the colormap for this expansion, it returns 1 (error), and the caller should check the return value. (5) Because two boxes in the boxa can overlap, pixels that are colorized in the first box must be excluded in the second because their value exceeds the size of the map.
Input: pixs (8 bpp gray; 2, 4 or 8 bpp colormapped)
color (32 bit rgba pixel)
cmapflag (1 for result to have colormap; 0 for RGB)
Return: pixd (8 bpp colormapped or 32 bpp rgb), or null on error
Notes: (1) This applies the specific color to the grayscale image. (2) If pixs already has a colormap, it is removed to gray before colorizing.
| LEPT_DLL PIX* pixColorMagnitude | ( | PIX * | pixs, |
| l_int32 | rwhite, | ||
| l_int32 | gwhite, | ||
| l_int32 | bwhite, | ||
| l_int32 | type | ||
| ) |
Input: pixs (32 bpp rgb or 8 bpp colormapped)
rwhite, gwhite, bwhite (color value associated with white point)
type (chooses the method for calculating the color magnitude:
L_MAX_DIFF_FROM_AVERAGE_2, L_MAX_MIN_DIFF_FROM_2,
L_MAX_DIFF)
Return: pixd (8 bpp, amount of color in each source pixel),
or NULL on error
Notes: (1) For an RGB image, a gray pixel is one where all three components are equal. We define the amount of color in an RGB pixel as a function depending on the absolute value of the differences between the three color components. Consider the two largest of these differences. The pixel component in common to these two differences is the color farthest from the other two. The color magnitude in an RGB pixel can be taken as one of these three definitions: (a) The average of these two differences. This is the average distance from the two components that are nearest to each other to the third component. (b) The minimum value of these two differences. This is the intermediate value of the three distances between component values. Stated otherwise, it is the maximum over all components of the minimum distance from that component to the other two components. (c) The maximum difference between component values. (2) As an example, suppose that R and G are the closest in magnitude. Then the color is determined as either: (a) The average distance of B from these two: (|B - R| + |B - G|) / 2 (b) The minimum distance of B from these two: min(|B - R|, |B - G|). (c) The maximum distance of B from these two: max(|B - R|, |B - G|) (3) The three methods for choosing the color magnitude from the components are selected with these flags: (a) L_MAX_DIFF_FROM_AVERAGE_2 (b) L_MAX_MIN_DIFF_FROM_2 (c) L_MAX_DIFF (4) The three numbers (rwhite, gwhite and bwhite) can be thought of as the values in the image corresponding to white. They are used to compensate for an unbalanced color white point. They must either be all 0 or all non-zero. To turn this off, set them all to 0.
Input: pixs
type (L_MORPH_DILATE, L_MORPH_ERODE, L_MORPH_OPEN,
or L_MORPH_CLOSE)
hsize (of Sel; must be odd; origin implicitly in center)
vsize (ditto)
Return: pixd
Notes: (1) This does the morph operation on each component separately, and recombines the result. (2) Sel is a brick with all elements being hits. (3) If hsize = vsize = 1, just returns a copy.
| LEPT_DLL PIX* pixColorMorphSequence | ( | PIX * | pixs, |
| const char * | sequence, | ||
| l_int32 | dispsep, | ||
| l_int32 | dispy | ||
| ) |
Input: pixs
sequence (string specifying sequence)
dispsep (controls debug display of each result in the sequence:
0: no output
> 0: gives horizontal separation in pixels between
successive displays
< 0: pdf output; abs(dispsep) is used for naming)
dispy (if dispsep > 0, this gives the y-value of the
UL corner for display; otherwise it is ignored)
Return: pixd, or null on error
Notes: (1) This works on 32 bpp rgb images. (2) Each component is processed separately. (3) This runs a pipeline of operations; no branching is allowed. (4) This only uses brick SELs. (5) A new image is always produced; the input image is not changed. (6) This contains an interpreter, allowing sequences to be generated and run. (7) Sel sizes (width, height) must each be odd numbers. (8) The format of the sequence string is defined below. (9) Intermediate results can optionally be displayed. (10) The sequence string is formatted as follows:
| LEPT_DLL PIX* pixColorSegment | ( | PIX * | pixs, |
| l_int32 | maxdist, | ||
| l_int32 | maxcolors, | ||
| l_int32 | selsize, | ||
| l_int32 | finalcolors | ||
| ) |
Input: pixs (32 bpp; 24-bit color)
maxdist (max euclidean dist to existing cluster)
maxcolors (max number of colors allowed in first pass)
selsize (linear size of sel for closing to remove noise)
finalcolors (max number of final colors allowed after 4th pass)
Return: pixd (8 bit with colormap), or null on error
Color segmentation proceeds in four phases:
Phase 1: pixColorSegmentCluster() The image is traversed in raster order. Each pixel either becomes the representative for a new cluster or is assigned to an existing cluster. Assignment is greedy. The data is stored in a colormapped image. Three auxiliary arrays are used to hold the colors of the representative pixels, for fast lookup. The average color in each cluster is computed.
Phase 2. pixAssignToNearestColor() A second (non-greedy) clustering pass is performed, where each pixel is assigned to the nearest cluster (average). We also keep track of how many pixels are assigned to each cluster.
Phase 3. pixColorSegmentClean() For each cluster, starting with the largest, do a morphological closing to eliminate small components within larger ones.
Phase 4. pixColorSegmentRemoveColors() Eliminate all colors except the most populated 'finalcolors'. Then remove unused colors from the colormap, and reassign those pixels to the nearest remaining cluster, using the original pixel values.
Notes: (1) The goal is to generate a small number of colors. Typically this would be specified by 'finalcolors', a number that would be somewhere between 3 and 6. The parameter 'maxcolors' specifies the maximum number of colors generated in the first phase. This should be larger than finalcolors, perhaps twice as large. If more than 'maxcolors' are generated in the first phase using the input 'maxdist', the distance is repeatedly increased by a multiplicative factor until the condition is satisfied. The implicit relation between 'maxdist' and 'maxcolors' is thus adjusted programmatically. (2) As a very rough guideline, given a target value of 'finalcolors', here are approximate values of 'maxdist' and 'maxcolors' to start with:
finalcolors maxcolors maxdist
----------- --------- -------
3 6 100
4 8 90
5 10 75
6 12 60
For a given number of finalcolors, if you use too many maxcolors, the result will be noisy. If you use too few, the result will be a relatively poor assignment of colors.
Input: pixs (8 bpp, colormapped)
selsize (for closing)
countarray (ptr to array containing the number of pixels
found in each color in the colormap)
Return: 0 if OK, 1 on error
Notes: (1) This operation is in-place. (2) This is phase 3 of color segmentation. It is the first part of a two-step noise removal process. Colors with a large population are closed first; this operation absorbs small sets of intercolated pixels of a different color.
Input: pixs (32 bpp; 24-bit color)
maxdist (max euclidean dist to existing cluster)
maxcolors (max number of colors allowed in first pass)
Return: pixd (8 bit with colormap), or null on error
Notes: (1) This is phase 1. See description in pixColorSegment(). (2) Greedy unsupervised classification. If the limit 'maxcolors' is exceeded, the computation is repeated with a larger allowed cluster size. (3) On each successive iteration, 'maxdist' is increased by a constant factor. See comments in pixColorSegment() for a guideline on parameter selection. Note that the diagonal of the 8-bit rgb color cube is about 440, so for 'maxdist' = 440, you are guaranteed to get 1 color!
Input: pixd (8 bpp, colormapped)
pixs (32 bpp rgb, with initial pixel values)
finalcolors (max number of colors to retain)
Return: 0 if OK, 1 on error
Notes: (1) This operation is in-place. (2) This is phase 4 of color segmentation, and the second part of the 2-step noise removal. Only 'finalcolors' different colors are retained, with colors with smaller populations being replaced by the nearest color of the remaining colors. For highest accuracy, for pixels that are being replaced, we find the nearest colormap color to the original rgb color.
| LEPT_DLL l_int32 pixColorsForQuantization | ( | PIX * | pixs, |
| l_int32 | thresh, | ||
| l_int32 * | pncolors, | ||
| l_int32 * | piscolor, | ||
| l_int32 | debug | ||
| ) |
pixColorsForQuantization() Input: pixs (8 bpp gray or 32 bpp rgb; with or without colormap) thresh (binary threshold on edge gradient; 0 for default) &ncolors (<return> the number of colors found) &iscolor (<optional return>=""> 1 if significant color is found; 0 otherwise. If pixs is 8 bpp, and does not have a colormap with color entries, this is 0) debug (1 to output masked image that is tested for colors; 0 otherwise) Return: 0 if OK, 1 on error.
Notes: (1) This function finds a measure of the number of colors that are found in low-gradient regions of an image. By its magnitude relative to some threshold (not specified in this function), it gives a good indication of whether quantization will generate posterization. This number is larger for images with regions of slowly varying intensity (if 8 bpp) or color (if rgb). Such images, if quantized, may require dithering to avoid posterization, and lossless compression is then expected to be poor. (2) If pixs has a colormap, the number of colors returned is the number in the colormap. (3) It is recommended that document images be reduced to a width of 800 pixels before applying this function. Then it can be expected that color detection will be fairly accurate and the number of colors will reflect both the content and the type of compression to be used. For less than 15 colors, there is unlikely to be a halftone image, and lossless quantization should give both a good visual result and better compression. (4) When using the default threshold on the gradient (15), images (both gray and rgb) where ncolors is greater than about 15 will compress poorly with either lossless compression or dithered quantization, and they may be posterized with non-dithered quantization. (5) For grayscale images, or images without significant color, this returns the number of significant gray levels in the low-gradient regions. The actual number of gray levels can be large due to jpeg compression noise in the background. (6) Similarly, for color images, the actual number of different (r,g,b) colors in the low-gradient regions (rather than the number of occupied level 4 octcubes) can be quite large, e.g., due to jpeg compression noise, even for regions that appear to be of a single color. By quantizing to level 4 octcubes, most of these superfluous colors are removed from the counting. (7) The image is tested for color. If there is very little color, it is thresholded to gray and the number of gray levels in the low gradient regions is found. If the image has color, the number of occupied level 4 octcubes is found. (8) The number of colors in the low-gradient regions increases monotonically with the threshold @thresh on the edge gradient. (9) Background: grayscale and color quantization is often useful to achieve highly compressed images with little visible distortion. However, gray or color washes (regions of low gradient) can defeat this approach to high compression. How can one determine if an image is expected to compress well using gray or color quantization? We use the fact that
Input: pixs (32 bpp rgb)
rfract (fractional shift in red component)
gfract (fractional shift in green component)
bfract (fractional shift in blue component)
Return: pixd, or null on error
Notes: (1) This allows independent fractional shifts of the r,g and b components. A positive shift pushes to saturation (255); a negative shift pushes toward 0 (black). (2) The effect can be imagined using a color wheel that consists (for our purposes) of these 6 colors, separated by 60 degrees: red, magenta, blue, cyan, green, yellow (3) So, for example, a negative shift of the blue component (bfract < 0) could be accompanied by positive shifts of red and green to make an image more yellow. (4) Examples of limiting cases: rfract = 1 ==> r = 255 rfract = -1 ==> r = 0
| LEPT_DLL l_int32 pixColumnStats | ( | PIX * | pixs, |
| BOX * | box, | ||
| NUMA ** | pnamean, | ||
| NUMA ** | pnamedian, | ||
| NUMA ** | pnamode, | ||
| NUMA ** | pnamodecount, | ||
| NUMA ** | pnavar, | ||
| NUMA ** | pnarootvar | ||
| ) |
Input: pixs (8 bpp; not cmapped)
box (<optional> clipping box; can be null)
&namean (<optional return> numa of mean values)
&namedian (<optional return> numa of median values)
&namode (<optional return> numa of mode intensity values)
&namodecount (<optional return> numa of mode counts)
&navar (<optional return> numa of variance)
&narootvar (<optional return> numa of square root of variance)
Return: na (numa of requested statistic for each column),
or null on error
Notes: (1) This computes numas that represent row vectors of statistics, with each of its values derived from the corresponding col of a Pix. (2) Use NULL on input to prevent computation of any of the 5 numas. (3) Other functions that compute pixel column statistics are: pixCountPixelsByColumn() pixAverageByColumn() pixVarianceByColumn() pixGetColumnStats()
Input: pixd (1 bpp, 8 bpp gray or 32 bpp rgb; no cmap)
pixs (1 bpp, 8 bpp gray or 32 bpp rgb; no cmap)
pixm (<optional> 1 bpp mask; no operation if NULL)
Return: 0 if OK; 1 on error
Notes: (1) In-place operation; pixd is changed. (2) This sets each pixel in pixd that co-locates with an ON pixel in pixm to the corresponding value of pixs. (3) pixs and pixd must be the same depth and not colormapped. (4) All three input pix are aligned at the UL corner, and the operation is clipped to the intersection of all three images. (5) If pixm == NULL, it's a no-op. (6) Implementation: see notes in pixCombineMaskedGeneral(). For 8 bpp selective masking, you might guess that it would be faster to generate an 8 bpp version of pixm, using pixConvert1To8(pixm, 0, 255), and then use a general combine operation d = (d & ~m) | (s & m) on a word-by-word basis. Not always. The word-by-word combine takes a time that is independent of the mask data. If the mask is relatively sparse, the byte-check method is actually faster!
| LEPT_DLL l_int32 pixCombineMaskedGeneral | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y | ||
| ) |
Input: pixd (1 bpp, 8 bpp gray or 32 bpp rgb)
pixs (1 bpp, 8 bpp gray or 32 bpp rgb)
pixm (<optional> 1 bpp mask)
x, y (origin of pixs and pixm relative to pixd; can be negative)
Return: 0 if OK; 1 on error
Notes: (1) In-place operation; pixd is changed. (2) This is a generalized version of pixCombinedMasked(), where the source and mask can be placed at the same (arbitrary) location relative to pixd. (3) pixs and pixd must be the same depth and not colormapped. (4) The UL corners of both pixs and pixm are aligned with the point (x, y) of pixd, and the operation is clipped to the intersection of all three images. (5) If pixm == NULL, it's a no-op. (6) Implementation. There are two ways to do these. In the first, we use rasterop, ORing the part of pixs under the mask with pixd (which has been appropriately cleared there first). In the second, the mask is used one pixel at a time to selectively replace pixels of pixd with those of pixs. Here, we use rasterop for 1 bpp and pixel-wise replacement for 8 and 32 bpp. To use rasterop for 8 bpp, for example, we must first generate an 8 bpp version of the mask. The code is simple:
Pix *pixm8 = pixConvert1To8(NULL, pixm, 0, 255); Pix *pixt = pixAnd(NULL, pixs, pixm8); pixRasterop(pixd, x, y, wmin, hmin, PIX_DST & PIX_NOT(PIX_SRC), pixm8, 0, 0); pixRasterop(pixd, x, y, wmin, hmin, PIX_SRC | PIX_DST, pixt, 0, 0); pixDestroy(&pixt); pixDestroy(&pixm8);
| LEPT_DLL l_int32 pixCompareBinary | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | comptype, | ||
| l_float32 * | pfract, | ||
| PIX ** | ppixdiff | ||
| ) |
Input: pix1 (1 bpp)
pix2 (1 bpp)
comptype (L_COMPARE_XOR, L_COMPARE_SUBTRACT)
&fract (<return> fraction of pixels that are different)
&pixdiff (<optional return> pix of difference)
Return: 0 if OK; 1 on error
Notes: (1) The two images are aligned at the UL corner, and do not need to be the same size. (2) If using L_COMPARE_SUBTRACT, pix2 is subtracted from pix1. (3) The total number of pixels is determined by pix1.
| LEPT_DLL l_int32 pixCompareGray | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | comptype, | ||
| l_int32 | plottype, | ||
| l_int32 * | psame, | ||
| l_float32 * | pdiff, | ||
| l_float32 * | prmsdiff, | ||
| PIX ** | ppixdiff | ||
| ) |
Input: pix1 (8 or 16 bpp, not cmapped)
pix2 (8 or 16 bpp, not cmapped)
comptype (L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF)
plottype (gplot plot output type, or 0 for no plot)
&same (<optional return> 1 if pixel values are identical)
&diff (<optional return> average difference)
&rmsdiff (<optional return> rms of difference)
&pixdiff (<optional return> pix of difference)
Return: 0 if OK; 1 on error
Notes: (1) See pixCompareGrayOrRGB() for details. (2) Use pixCompareGrayOrRGB() if the input pix are colormapped.
| LEPT_DLL l_int32 pixCompareGrayByHisto | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| BOX * | box1, | ||
| BOX * | box2, | ||
| l_float32 | minratio, | ||
| l_int32 | maxgray, | ||
| l_int32 | factor, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_float32 * | pscore, | ||
| l_int32 | debugflag | ||
| ) |
Input: pix1, pix2 (any depth; colormap OK)
box1, box2 (<optional> region selected from each; can be null)
minratio (requiring sizes be compatible; < 1.0)
maxgray (max value to keep in histo; >= 200, 255 to keep all)
factor (subsampling; >= 1)
nx, ny (number of subregions to use for histograms; e.g. 3x3)
&score (<return> similarity score of histograms)
debugflag (1 for debug output; 0 for no debugging)
Return: 0 if OK, 1 on error
Notes: (1) This function compares two grayscale photo regions. It can do it with a single histogram from each region, or with a set of (nx * ny) spatially aligned histograms. For both cases, align the regions using the centroid of the inverse image, and crop to the smallest of the two. (2) An initial filter gives @score = 0 if the ratio of widths and heights (smallest / largest) does not exceed a threshold @minratio. This must be between 0.5 and 1.0. If set at 1.0, both images must be exactly the same size. A typical value for @minratio is 0.9. (3) The lightest values in the histogram can be disregarded. Set @maxgray to the lightest value to be kept. For example, to eliminate white (255), set @maxgray = 254. @maxgray must be >= 200. (4) For an efficient representation of the histogram, normalize using a multiplicative factor so that the number in the maximum bucket is 255. It then takes 256 bytes to store. (5) When comparing the histograms of two regions:
| LEPT_DLL l_int32 pixCompareGrayOrRGB | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | comptype, | ||
| l_int32 | plottype, | ||
| l_int32 * | psame, | ||
| l_float32 * | pdiff, | ||
| l_float32 * | prmsdiff, | ||
| PIX ** | ppixdiff | ||
| ) |
Input: pix1 (8 or 16 bpp gray, 32 bpp rgb, or colormapped)
pix2 (8 or 16 bpp gray, 32 bpp rgb, or colormapped)
comptype (L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF)
plottype (gplot plot output type, or 0 for no plot)
&same (<optional return> 1 if pixel values are identical)
&diff (<optional return> average difference)
&rmsdiff (<optional return> rms of difference)
&pixdiff (<optional return> pix of difference)
Return: 0 if OK; 1 on error
Notes: (1) The two images are aligned at the UL corner, and do not need to be the same size. If they are not the same size, the comparison will be made over overlapping pixels. (2) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (3) If RGB, each component is compared separately. (4) If type is L_COMPARE_ABS_DIFF, pix2 is subtracted from pix1 and the absolute value is taken. (5) If type is L_COMPARE_SUBTRACT, pix2 is subtracted from pix1 and the result is clipped to 0. (6) The plot output types are specified in gplot.h. Use 0 if no difference plot is to be made. (7) If the images are pixelwise identical, no difference plot is made, even if requested. The result (TRUE or FALSE) is optionally returned in the parameter 'same'. (8) The average difference (either subtracting or absolute value) is optionally returned in the parameter 'diff'. (9) The RMS difference is optionally returned in the parameter 'rmsdiff'. For RGB, we return the average of the RMS differences for each of the components.
| LEPT_DLL l_int32 pixComparePhotoRegionsByHisto | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| BOX * | box1, | ||
| BOX * | box2, | ||
| l_float32 | minratio, | ||
| l_int32 | factor, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_float32 * | pscore, | ||
| l_int32 | debugflag | ||
| ) |
pixComparePhotoRegionsByHisto()
Input: pix1, pix2 (any depth; colormap OK)
box1, box2 (<optional> photo regions from each; can be null)
minratio (requiring sizes be compatible; < 1.0)
factor (subsampling; >= 1)
nx, ny (number of subregions to use for histograms; e.g. 3x3)
&score (<return> similarity score of histograms)
debugflag (1 for debug output; 0 for no debugging)
Return: 0 if OK, 1 on error
Notes: (1) This function compares two grayscale photo regions. If a box is given, the region is clipped; otherwise assume the entire images are photo regions. This is done with a set of (nx * ny) spatially aligned histograms, which are aligned using the centroid of the inverse image. (2) An initial filter gives @score = 0 if the ratio of widths and heights (smallest / largest) does not exceed a threshold @minratio. This must be between 0.5 and 1.0. If set at 1.0, both images must be exactly the same size. A typical value for @minratio is 0.9. (3) Because this function should not be used on text or line graphics, which can give false positive results (i.e., high scores for different images), filter the images using pixGenPhotoHistos(), which returns tiled histograms only if an image is not text and comparison is expected to work with histograms. If either image fails the test, the comparison returns a score of 0.0. (4) The white value counts in the histograms are removed; they are typically pixels that were padded to achieve alignment. (5) For an efficient representation of the histogram, normalize using a multiplicative factor so that the number in the maximum bucket is 255. It then takes 256 bytes to store. (6) When comparing the histograms of two regions, use the Earth Mover distance (EMD), with the histograms normalized so that the sum over bins is the same. Further normalize by dividing by 255, so that the result is in [0.0 ... 1.0]. (7) Get a similarity score S = 1.0 - k * D, where k is a constant, say in the range 5-10 D = normalized EMD and for multiple tiles, take the Min(S) to be the final score. Using aligned tiles gives protection against accidental similarity of the overall grayscale histograms. A small number of aligned tiles works well. (8) With debug on, you get a pdf that shows, for each tile, the images, histograms and score.
Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped)
pix2 (8 bpp gray or 32 bpp rgb, or colormapped)
factor (subsampling factor; use 0 or 1 for no subsampling)
Return: narank (numa of rank difference), or null on error
Notes: (1) This answers the question: if the pixel values in each component are compared by absolute difference, for any value of difference, what is the fraction of pixel pairs that have a difference of this magnitude or greater. For a difference of 0, the fraction is 1.0. In this sense, it is a mapping from pixel difference to rank order of difference. (2) The two images are aligned at the UL corner, and do not need to be the same size. If they are not the same size, the comparison will be made over overlapping pixels. (3) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (4) If RGB, pixel differences for each component are aggregated into a single histogram.
| LEPT_DLL l_int32 pixCompareRGB | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | comptype, | ||
| l_int32 | plottype, | ||
| l_int32 * | psame, | ||
| l_float32 * | pdiff, | ||
| l_float32 * | prmsdiff, | ||
| PIX ** | ppixdiff | ||
| ) |
Input: pix1 (32 bpp rgb)
pix2 (32 bpp rgb)
comptype (L_COMPARE_SUBTRACT, L_COMPARE_ABS_DIFF)
plottype (gplot plot output type, or 0 for no plot)
&same (<optional return> 1 if pixel values are identical)
&diff (<optional return> average difference)
&rmsdiff (<optional return> rms of difference)
&pixdiff (<optional return> pix of difference)
Return: 0 if OK; 1 on error
Notes: (1) See pixCompareGrayOrRGB() for details.
| LEPT_DLL l_int32 pixCompareTiled | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | type, | ||
| PIX ** | ppixdiff | ||
| ) |
Input: pix1 (8 bpp or 32 bpp rgb)
pix2 (8 bpp 32 bpp rgb)
sx, sy (tile size; must be > 1)
type (L_MEAN_ABSVAL or L_ROOT_MEAN_SQUARE)
&pixdiff (<return> pix of difference)
Return: 0 if OK; 1 on error
Notes: (1) With L_MEAN_ABSVAL, we compute for each tile the average abs value of the pixel component difference between the two (aligned) images. With L_ROOT_MEAN_SQUARE, we compute instead the rms difference over all components. (2) The two input pix must be the same depth. Comparison is made using UL corner alignment. (3) For 32 bpp, the distance between corresponding tiles is found by averaging the measured difference over all three components of each pixel in the tile. (4) The result, pixdiff, contains one pixel for each source tile.
| LEPT_DLL l_int32 pixCompareWithTranslation | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | thresh, | ||
| l_int32 * | pdelx, | ||
| l_int32 * | pdely, | ||
| l_float32 * | pscore, | ||
| l_int32 | debugflag | ||
| ) |
Input: pix1, pix2 (any depth; colormap OK)
thresh (threshold for converting to 1 bpp)
&delx (<return> x translation on pix2 to align with pix1)
&dely (<return> y translation on pix2 to align with pix1)
&score (<return> correlation score at best alignment)
debugflag (1 for debug output; 0 for no debugging)
Return: 0 if OK, 1 on error
Notes: (1) This does a coarse-to-fine search for best translational alignment of two images, measured by a scoring function that is the correlation between the fg pixels. (2) The threshold is used if the images aren't 1 bpp. (3) With debug on, you get a pdf that shows, as a grayscale image, the score as a function of shift from the initial estimate, for each of the four levels. The shift is 0 at the center of the image. (4) With debug on, you also get a pdf that shows the difference at the best alignment between the two images, at each of the four levels. The red and green pixels show locations where one image has a fg pixel and the other doesn't. The black pixels are where both images have fg pixels, and white pixels are where neither image has fg pixels.
Input: filename
comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
Return: pixc, or null on error
Notes: (1) Use @comptype == IFF_DEFAULT to have the compression type automatically determined. (2) If the comptype is invalid for this file, the default will be substituted.
Input: pix
comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
Return: pixc, or null on error
Notes: (1) Use @comptype == IFF_DEFAULT to have the compression type automatically determined.
Input: data (compressed string)
size (number of bytes)
copyflag (L_INSERT or L_COPY)
Return: pixc, or null on error
Notes: (1) This works when the compressed string is png, jpeg or tiffg4. (2) The copyflag determines if the data in the new Pixcomp is a copy of the input data.
| LEPT_DLL l_int32 pixcompDetermineFormat | ( | l_int32 | comptype, |
| l_int32 | d, | ||
| l_int32 | cmapflag, | ||
| l_int32 * | pformat | ||
| ) |
Input: comptype (IFF_DEFAULT, IFF_TIFF_G4, IFF_PNG, IFF_JFIF_JPEG)
d (pix depth)
cmapflag (1 if pix to be compressed as a colormap; 0 otherwise)
&format (return IFF_TIFF, IFF_PNG or IFF_JFIF_JPEG)
Return: 0 if OK; 1 on error
Notes: (1) This determines the best format for a pix, given both the request (@comptype) and the image characteristics. (2) If @comptype == IFF_DEFAULT, this does not necessarily result in png encoding. Instead, it returns one of the three formats that is both valid and most likely to give best compression. (3) If the pix cannot be compressed by the input value of @comptype, this selects IFF_PNG, which can compress all pix.
Input: pixc
&w, &h, &d (<optional return>)
Return: 0 if OK, 1 on error
| LEPT_DLL FPIX* pixComponentFunction | ( | PIX * | pix, |
| l_float32 | rnum, | ||
| l_float32 | gnum, | ||
| l_float32 | bnum, | ||
| l_float32 | rdenom, | ||
| l_float32 | gdenom, | ||
| l_float32 | bdenom | ||
| ) |
Input: pix (32 bpp rgb)
rnum, gnum, bnum (coefficients for numerator)
rdenom, gdenom, bdenom (coefficients for denominator)
Return: fpixd, or null on error
Notes: (1) This stores a function of the component values of each input pixel in @fpixd. (2) The function is a ratio of linear combinations of component values. There are two special cases for denominator coefficients: (a) The denominator is 1.0: input 0 for all denominator coefficients (b) Only one component is used in the denominator: input 1.0 for that denominator component and 0.0 for the other two. (3) If the denominator is 0, multiply by an arbitrary number that is much larger than 1. Choose 256 "arbitrarily".
Input: fp (file stream)
pixc
text (<optional> identifying string; can be null)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixConformsToRectangle | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_int32 | dist, | ||
| l_int32 * | pconforms | ||
| ) |
Input: pixs (1 bpp)
box (<optional> if null, use the entire pixs)
dist (max distance allowed between bounding box and nearest
foreground pixel within it)
&conforms (<return> 0 (false) if not conforming;
1 (true) if conforming)
Return: 0 if OK, 1 on error
Notes: (1) There are several ways to test if a connected component has an essentially rectangular boundary, such as: a. Fraction of fill into the bounding box b. Max-min distance of fg pixel from periphery of bounding box c. Max depth of bg intrusions into component within bounding box The weakness of (a) is that it is highly sensitive to holes within the c.c. The weakness of (b) is that it can have arbitrarily large intrusions into the c.c. Method (c) tests the integrity of the outer boundary of the c.c., with respect to the enclosing bounding box, so we use it. (2) This tests if the connected component within the box conforms to the box at all points on the periphery within @dist. Inside, at a distance from the box boundary that is greater than @dist, we don't care about the pixels in the c.c. (3) We can think of the conforming condition as follows: No pixel inside a distance @dist from the boundary can connect to the boundary through a path through the bg. To implement this, we need to do a flood fill. We can go either from inside toward the boundary, or the other direction. It's easiest to fill from the boundary, and then verify that there are no filled pixels farther than @dist from the boundary.
Input: pixs (1 bpp)
&pixa (<optional return> pixa of each c.c.)
connectivity (4 or 8)
Return: boxa, or null on error
Notes: (1) This is the top-level call for getting bounding boxes or a pixa of the components, and it can be used instead of either pixConnCompBB() or pixConnCompPixa(), rsp.
Input: pixs (1 bpp)
connect (connectivity: 4 or 8)
Return: pixd (32 bpp, 1 spp), or null on error
Notes: (1) The pixel values in pixd label the area of the fg component to which the pixel belongs. Pixels in the bg are labelled 0. (2) For purposes of visualization, the output can be converted to 8 bpp, using pixConvert32To8() or pixMaxDynamicRange().
Input: pixs (1 bpp)
connectivity (4 or 8)
Return: boxa, or null on error
Notes: (1) Finds bounding boxes of 4- or 8-connected components in a binary image. (2) This works on a copy of the input pix. The c.c. are located in raster order and erased one at a time. In the process, the b.b. is computed and saved.
| LEPT_DLL l_int32 pixConnCompIncrAdd | ( | PIX * | pixs, |
| PTAA * | ptaa, | ||
| l_int32 * | pncc, | ||
| l_float32 | x, | ||
| l_float32 | y, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (32 bpp, with pixels labelled by c.c.)
ptaa (with each pta of pixel locations indexed by c.c.)
&ncc (number of c.c)
x,y (location of added pixel)
debug (0 for no output; otherwise output whenever
debug <= nvals, up to debug == 3)
Return: -1 if nothing happens; 0 if a pixel is added; 1 on error
Notes: (1) This adds a pixel and updates the labelled connected components. Before calling this function, initialize the process using pixConnCompIncrInit(). (2) As a result of adding a pixel, one of the following can happen, depending on the number of neighbors with non-zero value: (a) nothing: the pixel is already a member of a c.c. (b) no neighbors: a new component is added, increasing the number of c.c. (c) one neighbor: the pixel is added to an existing c.c. (d) more than one neighbor: the added pixel causes joining of two or more c.c., reducing the number of c.c. A maximum of 4 c.c. can be joined. (3) When two c.c. are joined, the pixels in the larger index are relabelled to those of the smaller in pixs, and their locations are transferred to the pta with the smaller index in the ptaa. The pta corresponding to the larger index is then deleted. (4) This is an efficient implementation of a "union-find" operation, which supports the generation and merging of disjoint sets of pixels. This function can be called about 1.3 million times per second.
| LEPT_DLL l_int32 pixConnCompIncrInit | ( | PIX * | pixs, |
| l_int32 | conn, | ||
| PIX ** | ppixd, | ||
| PTAA ** | pptaa, | ||
| l_int32 * | pncc | ||
| ) |
Input: pixs (1 bpp)
conn (connectivity: 4 or 8)
&pixd (<return> 32 bpp, with c.c. labelled)
&ptaa (<return> with pixel locations indexed by c.c.)
&ncc (<return> initial number of c.c.)
Return: 0 if OK, 1 on error
Notes: (1) This labels the connected components in a 1 bpp pix, and additionally sets up a ptaa that lists the locations of pixels in each of the components. (2) It can be used to initialize the output image and arrays for an application that maintains information about connected components incrementally as pixels are added. (3) pixs can be empty or have some foreground pixels. (4) The connectivity is stored in pixd->special. (5) Always initialize with the first pta in ptaa being empty and representing the background value (index 0) in the pix.
Input: pixs (1 bpp)
&pixa (<return> pixa of each c.c.)
connectivity (4 or 8)
Return: boxa, or null on error
Notes: (1) This finds bounding boxes of 4- or 8-connected components in a binary image, and saves images of each c.c in a pixa array. (2) It sets up 2 temporary pix, and for each c.c. that is located in raster order, it erases the c.c. from one pix, then uses the b.b. to extract the c.c. from the two pix using an XOR, and finally erases the c.c. from the second pix. (3) A clone of the returned boxa (where all boxes in the array are clones) is inserted into the pixa. (4) If the input is valid, this always returns a boxa and a pixa. If pixs is empty, the boxa and pixa will be empty.
Input: pixs (1 bpp)
connect (connectivity: 4 or 8)
depth (of pixd: 8 or 16 bpp; use 0 for auto determination)
Return: pixd (8, 16 or 32 bpp), or null on error
Notes: (1) pixd is 8, 16 or 32 bpp, and the pixel values label the fg component, starting with 1. Pixels in the bg are labelled 0. (2) If @depth = 0, the depth of pixd is 8 if the number of c.c. is less than 254, 16 if the number of c.c is less than 0xfffe, and 32 otherwise. (3) If @depth = 8, the assigned label for the n-th component is 1 + n % 254. We use mod 254 because 0 is uniquely assigned to black: e.g., see pixcmapCreateRandom(). Likewise, if @depth = 16, the assigned label uses mod(2^16 - 2), and if @depth = 32, no mod is taken.
| LEPT_DLL PIX* pixContrastNorm | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | mindiff, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy | ||
| ) |
Input: pixd (<optional> 8 bpp; null or equal to pixs)
pixs (8 bpp grayscale; not colormapped)
sx, sy (tile dimensions)
mindiff (minimum difference to accept as valid)
smoothx, smoothy (half-width of convolution kernel applied to
min and max arrays: use 0 for no smoothing)
Return: pixd always
Notes: (1) This function adaptively attempts to expand the contrast to the full dynamic range in each tile. If the contrast in a tile is smaller than @mindiff, it uses the min and max pixel values from neighboring tiles. It also can use convolution to smooth the min and max values from neighboring tiles. After all that processing, it is possible that the actual pixel values in the tile are outside the computed [min ... max] range for local contrast normalization. Such pixels are taken to be at either 0 (if below the min) or 255 (if above the max). (2) pixd can be equal to pixs (in-place operation) or null (makes a new pixd). (3) sx and sy give the tile size; they are typically at least 20. (4) mindiff is used to eliminate results for tiles where it is likely that either fg or bg is missing. A value around 50 or more is reasonable. (5) The full width and height of the convolution kernel are (2 * smoothx + 1) and (2 * smoothy + 1). Some smoothing is typically useful, and we limit the smoothing half-widths to the range from 0 to 8. (6) A linear TRC (gamma = 1.0) is applied to increase the contrast in each tile. The result can subsequently be globally corrected, by applying pixGammaTRC() with arbitrary values of gamma and the 0 and 255 points of the mapping.
Input: pixd (<optional> null or equal to pixs)
pixs (8 or 32 bpp; or 2, 4 or 8 bpp with colormap)
factor (0.0 is no enhancement)
Return: pixd always
Notes: (1) pixd must either be null or equal to pixs. For in-place operation, set pixd == pixs: pixContrastTRC(pixs, pixs, ...); To get a new image, set pixd == null: pixd = pixContrastTRC(NULL, pixs, ...); (2) If pixs is colormapped, the colormap is transformed, either in-place or in a copy of pixs. (3) Contrast is enhanced by mapping each color component using an atan function with maximum slope at 127. Pixels below 127 are lowered in intensity and pixels above 127 are increased. (4) The useful range for the contrast factor is scaled to be in (0.0 to 1.0), but larger values can also be used. (5) If factor == 0.0, no enhancement is performed; return a copy unless in-place, in which case this is a no-op. (6) For color images that are not colormapped, the mapping is applied to each component.
Input: pixd (<optional> null or equal to pixs)
pixs (8 or 32 bpp; or 2, 4 or 8 bpp with colormap)
pixm (<optional> null or 1 bpp)
factor (0.0 is no enhancement)
Return: pixd always
Notes: (1) Same as pixContrastTRC() except mapping is optionally over a subset of pixels described by pixm. (2) Masking does not work for colormapped images. (3) See pixContrastTRC() for details on how to use the parameters.
Input: pixs (16 bpp)
type (L_LS_BYTE, L_MS_BYTE, L_CLIP_TO_FF)
Return: pixd (8 bpp), or null on error
Notes: (1) For each dest pixel, use either the LSB, the MSB, or the min(val, 255) for each 16-bit src pixel.
Input: pixd (<optional> 16 bpp, can be null)
pixs (1 bpp)
val0 (16 bit value to be used for 0s in pixs)
val1 (16 bit value to be used for 1s in pixs)
Return: pixd (16 bpp)
Notes: (1) If pixd is null, a new pix is made. (2) If pixd is not null, it must be of equal width and height as pixs. It is always returned.
Input: pixd (<optional> 2 bpp, can be null)
pixs (1 bpp)
val0 (2 bit value to be used for 0s in pixs)
val1 (2 bit value to be used for 1s in pixs)
Return: pixd (2 bpp)
Notes: (1) If pixd is null, a new pix is made. (2) If pixd is not null, it must be of equal width and height as pixs. It is always returned. (3) A simple unpacking might use val0 = 0 and val1 = 3. (4) If you want a colormapped pixd, use pixConvert1To2Cmap().
Input: pixs (1 bpp) Return: pixd (2 bpp, cmapped)
Notes: (1) Input 0 is mapped to (255, 255, 255); 1 is mapped to (0, 0, 0)
Input: pixd (<optional> 32 bpp, can be null)
pixs (1 bpp)
val0 (32 bit value to be used for 0s in pixs)
val1 (32 bit value to be used for 1s in pixs)
Return: pixd (32 bpp)
Notes: (1) If pixd is null, a new pix is made. (2) If pixd is not null, it must be of equal width and height as pixs. It is always returned.
Input: pixd (<optional> 4 bpp, can be null)
pixs (1 bpp)
val0 (4 bit value to be used for 0s in pixs)
val1 (4 bit value to be used for 1s in pixs)
Return: pixd (4 bpp)
Notes: (1) If pixd is null, a new pix is made. (2) If pixd is not null, it must be of equal width and height as pixs. It is always returned. (3) A simple unpacking might use val0 = 0 and val1 = 15, or v.v. (4) If you want a colormapped pixd, use pixConvert1To4Cmap().
Input: pixs (1 bpp) Return: pixd (4 bpp, cmapped)
Notes: (1) Input 0 is mapped to (255, 255, 255); 1 is mapped to (0, 0, 0)
Input: pixd (<optional> 8 bpp, can be null)
pixs (1 bpp)
val0 (8 bit value to be used for 0s in pixs)
val1 (8 bit value to be used for 1s in pixs)
Return: pixd (8 bpp)
Notes: (1) If pixd is null, a new pix is made. (2) If pixd is not null, it must be of equal width and height as pixs. It is always returned. (3) A simple unpacking might use val0 = 0 and val1 = 255, or v.v. (4) To have a colormap associated with the 8 bpp pixd, usepixConvert1To8Cmap().
Input: pixs (1 bpp) Return: pixd (8 bpp, cmapped)
Notes: (1) Input 0 is mapped to (255, 255, 255); 1 is mapped to (0, 0, 0)
Input: pixs (24 bpp rgb) Return: pixd (32 bpp rgb), or null on error
Notes: (1) 24 bpp rgb pix are not supported in leptonica, except for a small number of formatted write operations. The data is a byte array, with pixels in order r,g,b, and padded to 32 bit boundaries in each line. (2) Because 24 bpp rgb pix are conveniently generated by programs such as xpdf (which has SplashBitmaps that store the raster data in consecutive 24-bit rgb pixels), it is useful to provide 24 bpp pix that simply incorporate that data. The only things we can do with these are: (a) write them to file in png, jpeg, tiff and pnm (b) interconvert between 24 and 32 bpp in memory (for testing).
| LEPT_DLL PIX* pixConvert2To8 | ( | PIX * | pixs, |
| l_uint8 | val0, | ||
| l_uint8 | val1, | ||
| l_uint8 | val2, | ||
| l_uint8 | val3, | ||
| l_int32 | cmapflag | ||
| ) |
Input: pixs (2 bpp)
val0 (8 bit value to be used for 00 in pixs)
val1 (8 bit value to be used for 01 in pixs)
val2 (8 bit value to be used for 10 in pixs)
val3 (8 bit value to be used for 11 in pixs)
cmapflag (TRUE if pixd is to have a colormap; FALSE otherwise)
Return: pixd (8 bpp), or null on error
Notes:
Input: pixs (32 bpp, single component)
type (L_LS_TWO_BYTES, L_MS_TWO_BYTES, L_CLIP_TO_FFFF)
Return: pixd (16 bpp ), or null on error
Notes: (1) The data in pixs is typically used for labelling. It is an array of l_uint32 values, not rgb or rgba.
Input: pixs (32 bpp rgb) Return: pixd (24 bpp rgb), or null on error
Notes: (1) See pixconvert24To32().
Input: pixs (32 bpp, single component)
type16 (L_LS_TWO_BYTES, L_MS_TWO_BYTES, L_CLIP_TO_FFFF)
type8 (L_LS_BYTE, L_MS_BYTE, L_CLIP_TO_FF)
Return: pixd (8 bpp ), or null on error
Input: pixs (4 bpp)
cmapflag (TRUE if pixd is to have a colormap; FALSE otherwise)
Return: pixd (8 bpp), or null on error
Notes:
Input: pixs (8 bpp; colormap removed to gray)
leftshift (number of bits: 0 is no shift;
8 replicates in MSB and LSB of dest)
Return: pixd (16 bpp), or null on error
Notes: (1) For left shift of 8, the 8 bit value is replicated in both the MSB and the LSB of the pixels in pixd. That way, we get proportional mapping, with a correct map from 8 bpp white (0xff) to 16 bpp white (0xffff).
Input: pix (8 bpp) Return: 32 bpp rgb pix, or null on error
Notes: (1) If there is no colormap, replicates the gray value into the 3 MSB of the dest pixel. (2) Implicit assumption about RGB component ordering.
| LEPT_DLL PIX* pixConvertColorToSubpixelRGB | ( | PIX * | pixs, |
| l_float32 | scalex, | ||
| l_float32 | scaley, | ||
| l_int32 | order | ||
| ) |
pixConvertColorToSubpixelRGB()
Input: pixs (32 bpp or colormapped)
scalex, scaley
order (of subpixel rgb color components in composition of pixd:
L_SUBPIXEL_ORDER_RGB, L_SUBPIXEL_ORDER_BGR,
L_SUBPIXEL_ORDER_VRGB, L_SUBPIXEL_ORDER_VBGR)
Return: pixd (32 bpp), or null on error
Notes: (1) If pixs has a colormap, it is removed to 32 bpp rgb. If the colormap has no color, pixConvertGrayToSubpixelRGB() should be called instead, because it will give the same result more efficiently. The function pixConvertToSubpixelRGB() will do the best thing for all cases. (2) For horizontal subpixel splitting, the input rgb image is rescaled by @scaley vertically and by 3.0 times @scalex horizontally. Then for each horizontal triplet of pixels, the r component of the final pixel is selected from the r component of the appropriate pixel in the triplet, and likewise for g and b. Vertical subpixel splitting is handled similarly.
Input: pixs (1, 2, 4, 8, 16, 32 bpp) Return: pixd (1, 8, or 32 bpp), or null on error
Notes: (1) For wrapping in PostScript, we convert pixs to 1 bpp, 8 bpp (gray) and 32 bpp (RGB color). (2) Colormaps are removed. For pixs with colormaps, the images are converted to either 8 bpp gray or 32 bpp RGB, depending on whether the colormap has color content. (3) Images without colormaps, that are not 1 bpp or 32 bpp, are converted to 8 bpp gray.
Input: pixs (2, 4 or 8 bpp grayscale) Return: pixd (2, 4 or 8 bpp with colormap), or null on error
Notes: (1) This is a simple interface for adding a colormap to a 2, 4 or 8 bpp grayscale image without causing any quantization. There is some similarity to operations in grayquant.c, such as pixThresholdOn8bpp(), where the emphasis is on quantization with an arbitrary number of levels, and a colormap is an option. (2) Returns a copy if pixs already has a colormap. (3) For 8 bpp src, this is a lossless transformation. (4) For 2 and 4 bpp src, this generates a colormap that assumes full coverage of the gray space, with equally spaced levels: 4 levels for d = 2 and 16 levels for d = 4. (5) In all cases, the depth of the dest is the same as the src.
Input: pixs (8 bpp grayscale)
mindepth (of pixd; valid values are 2, 4 and 8)
Return: pixd (2, 4 or 8 bpp with colormap), or null on error
Notes: (1) Returns a copy if pixs already has a colormap. (2) This is a lossless transformation; there is no quantization. We compute the number of different gray values in pixs, and construct a colormap that has exactly these values. (3) 'mindepth' is the minimum depth of pixd. If mindepth == 8, pixd will always be 8 bpp. Let the number of different gray values in pixs be ngray. If mindepth == 4, we attempt to save pixd as a 4 bpp image, but if ngray > 16, pixd must be 8 bpp. Likewise, if mindepth == 2, the depth of pixd will be 2 if ngray <= 4 and 4 if ngray > 4 but <= 16.
Input: pixs (8 or 16 bpp grayscale)
gamma factor (0.0 or 1.0 for default; > 1.0 for brighter;
2.0 is quite nice)
Return: pixd (8 bpp with colormap), or null on error
Notes: (1) For 8 bpp input, this simply adds a colormap to the input image. (2) For 16 bpp input, it first converts to 8 bpp, using the MSB, and then adds the colormap. (3) The colormap is modeled after the Matlab "jet" configuration.
| LEPT_DLL PIX* pixConvertGrayToSubpixelRGB | ( | PIX * | pixs, |
| l_float32 | scalex, | ||
| l_float32 | scaley, | ||
| l_int32 | order | ||
| ) |
Input: pixs (8 bpp or colormapped)
scalex, scaley
order (of subpixel rgb color components in composition of pixd:
L_SUBPIXEL_ORDER_RGB, L_SUBPIXEL_ORDER_BGR,
L_SUBPIXEL_ORDER_VRGB, L_SUBPIXEL_ORDER_VBGR)
Return: pixd (32 bpp), or null on error
Notes: (1) If pixs has a colormap, it is removed to 8 bpp. (2) For horizontal subpixel splitting, the input gray image is rescaled by @scaley vertically and by 3.0 times @scalex horizontally. Then each horizontal triplet of pixels is mapped back to a single rgb pixel, with the r, g and b values being assigned from the triplet of gray values. Similar operations are used for vertical subpixel splitting. (3) This is a form of subpixel rendering that tends to give the resulting text a sharper and somewhat chromatic display. For horizontal subpixel splitting, the observable difference between @order=L_SUBPIXEL_ORDER_RGB and @order=L_SUBPIXEL_ORDER_BGR is reduced by optical diffusers in the display that make the pixel color appear to emerge from the entire pixel.
Input: pixd (can be NULL; if not NULL, must == pixs)
pixs
Return: pixd always
Notes: (1) For pixs = pixd, this is in-place; otherwise pixd must be NULL. (2) The user takes responsibility for making sure that pixs is in our HSV space. The definition of our HSV space is given in convertRGBToHSV(). (3) The h, s and v values are stored in the same places as the r, g and b values, respectively. Here, they are explicitly placed in the 3 MS bytes in the pixel.
Input: pixs (1, 2, 4, 8 bpp, not cmapped)
d (destination depth: 2, 4 or 8)
Return: pixd (2, 4 or 8 bpp), or null on error
Notes: (1) This is a lossless unpacking (depth-increasing) conversion. If ds is the depth of pixs, then
Input: pixs (32 bpp rgb)
ditherflag (1 to dither, 0 otherwise)
Return: pixd (2, 4 or 8 bpp with colormap), or null on error
Notes: (1) This function has two relatively simple modes of color quantization: (a) If the image is made orthographically and has not more than 256 'colors' at the level 4 octcube leaves, it is quantized nearly exactly. The ditherflag is ignored. (b) Most natural images have more than 256 different colors; in that case we use adaptive octree quantization, with dithering if requested. (2) If there are not more than 256 occupied level 4 octcubes, the color in the colormap that represents all pixels in one of those octcubes is given by the first pixel that falls into that octcube. (3) If there are more than 256 colors, we use adaptive octree color quantization. (4) Dithering gives better visual results on images where there is a color wash (a slow variation of color), but it is about twice as slow and results in significantly larger files when losslessly compressed (e.g., into png).
Input: pix (32 bpp RGB)
rwt, gwt, bwt (non-negative; these should add to 1.0,
or use 0.0 for default)
Return: 8 bpp pix, or null on error
Notes: (1) Use a weighted average of the RGB values.
Input: pix (32 bpp RGB) Return: 8 bpp pix, or null on error
Notes: (1) This function should be used if speed of conversion is paramount, and the green channel can be used as a fair representative of the RGB intensity. It is several times faster than pixConvertRGBToGray(). (2) To combine RGB to gray conversion with subsampling, use pixScaleRGBToGrayFast() instead.
Input: pix (32 bpp RGB)
type (L_CHOOSE_MIN, L_CHOOSE_MAX or L_CHOOSE_MAX_MIN_DIFF)
Return: 8 bpp pix, or null on error
Notes: (1) This chooses the min, the max, or the difference between the max and the min, of the three RGB sample values.
Input: pixs (32 bpp rgb)
refval (between 1 and 255; typ. less than 128)
Return: pixd (8 bpp), or null on error
Notes: (1) This returns the max component value, boosted by the saturation. The maximum boost occurs where the maximum component value is equal to some reference value. This particular weighting is due to Dany Qumsiyeh. (2) For gray pixels (zero saturation), this returns the intensity of any component. (3) For fully saturated pixels ('fullsat'), this rises linearly with the max value and has a slope equal to 255 divided by the reference value; for a max value greater than the reference value, it is clipped to 255. (4) For saturation values in between, the output is a linear combination of (2) and (3), weighted by saturation. It falls between these two curves, and does not exceed 255. (5) This can be useful for distinguishing an object that has nonzero saturation from a gray background. For this, the refval should be chosen near the expected value of the background, to achieve maximum saturation boost there.
Input: pixd (can be NULL; if not NULL, must == pixs)
pixs
Return: pixd always
Notes: (1) For pixs = pixd, this is in-place; otherwise pixd must be NULL. (2) The definition of our HSV space is given in convertRGBToHSV(). (3) The h, s and v values are stored in the same places as the r, g and b values, respectively. Here, they are explicitly placed in the 3 MS bytes in the pixel. (4) Normalizing to 1 and considering the r,g,b components, a simple way to understand the HSV space is:
Input: pixs (32 bpp RGB or 8 bpp with colormap) Return: pixd (8 bpp hue of HSV), or null on error
Notes: (1) The conversion to HSV hue is in-lined here. (2) If there is a colormap, it is removed. (3) If you just want the hue component, this does it at about 10 Mpixels/sec/GHz, which is about 2x faster than using pixConvertRGBToHSV()
Input: pixs (rgb) Return: fpixa (lab)
Notes: (1) The [l,a,b] values are stored as float values in three fpix that are returned in a fpixa.
Input: pix (32 bpp RGB) Return: 8 bpp pix, or null on error
Notes: (1) Use a standard luminance conversion.
Input: pixs (32 bpp RGB or 8 bpp with colormap) Return: pixd (8 bpp sat of HSV), or null on error
Notes: (1) The conversion to HSV sat is in-lined here. (2) If there is a colormap, it is removed. (3) If you just want the saturation component, this does it at about 12 Mpixels/sec/GHz.
Input: pixs (32 bpp RGB or 8 bpp with colormap) Return: pixd (8 bpp max component intensity of HSV), or null on error
Notes: (1) The conversion to HSV sat is in-lined here. (2) If there is a colormap, it is removed. (3) If you just want the value component, this does it at about 35 Mpixels/sec/GHz.
Input: pixs (rgb) Return: fpixa (xyz)
Notes: (1) The [x,y,z] values are stored as float values in three fpix that are returned in a fpixa. (2) The XYZ color space was defined in 1931 as a reference model that simulates human color perception. When Y is taken as luminance, the values of X and Z constitute a color plane representing all the hues that can be perceived. This gamut of colors is larger than the gamuts that can be displayed or printed. For example, although all rgb values map to XYZ, the converse is not true. (3) The value of the coefficients depends on the illuminant. We use coefficients for converting sRGB under D65 (the spectrum from a 6500 degree K black body; an approximation to daylight color). See, e.g., http://www.cs.rit.edu/~ncs/color/t_convert.html For more general information on color transforms, see: http://www.brucelindbloom.com/ http://user.engineering.uiowa.edu/~aip/Misc/ColorFAQ.html http://en.wikipedia.org/wiki/CIE_1931_color_space
Input: pixd (can be NULL; if not NULL, must == pixs)
pixs
Return: pixd always
Notes: (1) For pixs = pixd, this is in-place; otherwise pixd must be NULL. (2) The Y, U and V values are stored in the same places as the r, g and b values, respectively. Here, they are explicitly placed in the 3 MS bytes in the pixel. (3) Normalizing to 1 and considering the r,g,b components, a simple way to understand the YUV space is:
Input: pixs (1, 2, 4, 8, 16 or 32 bpp)
threshold (for final binarization, relative to 8 bpp)
Return: pixd (1 bpp), or null on error
Notes: (1) This is a top-level function, with simple default values used in pixConvertTo8() if unpacking is necessary. (2) Any existing colormap is removed. (3) If the input image has 1 bpp and no colormap, the operation is lossless and a copy is returned.
Input: pixs (1, 8 bpp) Return: pixd (16 bpp), or null on error
Usage: Top-level function, with simple default values for unpacking. 1 bpp: val0 = 0xffff, val1 = 0 8 bpp: replicates the 8 bit value in both the MSB and LSB of the 16 bit pixel.
Input: pixs (1, 2, 4, 8, 16 or 32 bpp)
factor (submsampling factor; integer >= 1)
threshold (for final binarization, relative to 8 bpp)
Return: pixd (1 bpp), or null on error
Notes: (1) This is a quick and dirty, top-level converter. (2) See pixConvertTo1() for default values.
Input: pixs (1, 2, 4, 8, 16 or 32 bpp) Return: pixd (32 bpp), or null on error
Usage: Top-level function, with simple default values for unpacking. 1 bpp: val0 = 255, val1 = 0 and then replication into R, G and B components 2 bpp: if colormapped, use the colormap values; otherwise, use val0 = 0, val1 = 0x55, val2 = 0xaa, val3 = 255 and replicate gray into R, G and B components 4 bpp: if colormapped, use the colormap values; otherwise, replicate 2 nybs into a byte, and then into R,G,B components 8 bpp: if colormapped, use the colormap values; otherwise, replicate gray values into R, G and B components 16 bpp: replicate MSB into R, G and B components 24 bpp: unpack the pixels, maintaining word alignment on each scanline 32 bpp: makes a copy
Notes: (1) Never returns a clone of pixs. (2) Implicit assumption about RGB component ordering.
Input: pixs (1, 2, 4, 8, 16 or 32 bpp)
factor (submsampling factor; integer >= 1)
Return: pixd (32 bpp), or null on error
Notes: (1) This is a fast, quick/dirty, top-level converter. (2) See pixConvertTo32() for default values.
Input: pixs (1, 2, 4, 8, 16 or 32 bpp)
cmapflag (TRUE if pixd is to have a colormap; FALSE otherwise)
Return: pixd (8 bpp), or null on error
Notes: (1) This is a top-level function, with simple default values for unpacking. (2) The result, pixd, is made with a colormap if specified. It is always a new image – never a clone. For example, if d == 8, and cmapflag matches the existence of a cmap in pixs, the operation is lossless and it returns a copy. (3) The default values used are:
Input: pixs (1, 2, 4, 8, 16 or 32 bpp)
factor (submsampling factor; integer >= 1)
cmapflag (TRUE if pixd is to have a colormap; FALSE otherwise)
Return: pixd (8 bpp), or null on error
Notes: (1) This is a fast, quick/dirty, top-level converter. (2) See pixConvertTo8() for default values.
Input: pixs (1, 2, 4, 8, 16 or 32 bpp)
dither (1 to dither if necessary; 0 otherwise)
Return: pixd (8 bpp, cmapped), or null on error
Notes: (1) This is a top-level function, with simple default values for unpacking. (2) The result, pixd, is always made with a colormap. (3) If d == 8, the operation is lossless and it returns a copy. (4) The default values used for increasing depth are:
Input: pixs (1, 2, 4, 8, 16, with or without colormap; or 32 bpp rgb)
copyflag (use 0 to return clone if pixs does not need to
be changed; 1 to return a copy in those situations)
warnflag (1 to issue warning if colormap is removed; else 0)
Return: pixd (8 bpp grayscale or 32 bpp rgb), or null on error
Notes: (1) If there is a colormap, the colormap is removed to 8 or 32 bpp, depending on whether the colors in the colormap are all gray. (2) If the input is either rgb or 8 bpp without a colormap, this returns either a clone or a copy, depending on @copyflag. (3) Otherwise, the pix is converted to 8 bpp grayscale. In all cases, pixd does not have a colormap.
Input: pix (1, 2, 4, 8, 16 or 32 bpp)
ncomps (number of components: 3 for RGB, 1 otherwise)
Return: dpix, or null on error
Notes: (1) If colormapped, remove to grayscale. (2) If 32 bpp and @ncomps == 3, this is RGB; convert to luminance. In all other cases the src image is treated as having a single component of pixel values.
Input: pix (1, 2, 4, 8, 16 or 32 bpp)
ncomps (number of components: 3 for RGB, 1 otherwise)
Return: fpix, or null on error
Notes: (1) If colormapped, remove to grayscale. (2) If 32 bpp and @ncomps == 3, this is RGB; convert to luminance. In all other cases the src image is treated as having a single component of pixel values.
| LEPT_DLL l_int32 pixConvertToPdf | ( | PIX * | pix, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | fileout, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
Input: pix
type (L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE)
quality (used for JPEG only; 0 for default (75))
fileout (output pdf file; only required on last image on page)
x, y (location of lower-left corner of image, in pixels,
relative to the PostScript origin (0,0) at
the lower-left corner of the page)
res (override the resolution of the input image, in ppi;
use 0 to respect the resolution embedded in the input)
title (<optional> pdf title)
&lpd (ptr to lpd, which is created on the first invocation
and returned until last image is processed)
position (in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
L_LAST_IMAGE)
Return: 0 if OK, 1 on error
Notes: (1) If @res == 0 and the input resolution field is 0, this will use DEFAULT_INPUT_RES. (2) This only writes data to fileout if it is the last image to be written on the page. (3) See comments in convertToPdf().
| LEPT_DLL l_int32 pixConvertToPdfData | ( | PIX * | pix, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | res, | ||
| const char * | title, | ||
| L_PDF_DATA ** | plpd, | ||
| l_int32 | position | ||
| ) |
Input: pix (all depths; cmap OK)
type (L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE)
quality (used for JPEG only; 0 for default (75))
&data (<return> pdf array)
&nbytes (<return> number of bytes in pdf array)
x, y (location of lower-left corner of image, in pixels,
relative to the PostScript origin (0,0) at
the lower-left corner of the page)
res (override the resolution of the input image, in ppi;
use 0 to respect the resolution embedded in the input)
title (<optional> pdf title)
&lpd (ptr to lpd, which is created on the first invocation
and returned until last image is processed)
position (in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE,
L_LAST_IMAGE)
Return: 0 if OK, 1 on error
Notes: (1) If @res == 0 and the input resolution field is 0, this will use DEFAULT_INPUT_RES. (2) This only writes @data if it is the last image to be written on the page. (3) See comments in convertToPdf().
| LEPT_DLL l_int32 pixConvertToPdfDataSegmented | ( | PIX * | pixs, |
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXA * | boxa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
pixConvertToPdfDataSegmented()
Input: pixs (any depth, cmap OK)
res (input image resolution; typ. 300 ppi; use 0 for default)
type (compression type for non-image regions; the
image regions are always compressed with L_JPEG_ENCODE)
thresh (used for converting gray --> 1 bpp with L_G4_ENCODE)
boxa (<optional> of image regions; can be null)
quality (used for jpeg image regions; 0 for default)
scalefactor (used for jpeg regions; must be <= 1.0)
title (<optional> pdf title; typically taken from the
input file for the pix)
&data (<return> pdf data in memory)
&nbytes (<return> number of bytes in pdf data)
Return: 0 if OK, 1 on error
Notes: (1) See convertToPdfSegmented() for details.
| LEPT_DLL l_int32 pixConvertToPdfSegmented | ( | PIX * | pixs, |
| l_int32 | res, | ||
| l_int32 | type, | ||
| l_int32 | thresh, | ||
| BOXA * | boxa, | ||
| l_int32 | quality, | ||
| l_float32 | scalefactor, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: pixs (any depth, cmap OK)
res (input image resolution; typ. 300 ppi; use 0 for default)
type (compression type for non-image regions; the
image regions are always compressed with L_JPEG_ENCODE)
thresh (used for converting gray --> 1 bpp with L_G4_ENCODE)
boxa (<optional> of image regions; can be null)
quality (used for jpeg image regions; 0 for default)
scalefactor (used for jpeg regions; must be <= 1.0)
title (<optional> pdf title; typically taken from the
input file for the pix)
fileout (output pdf file)
Return: 0 if OK, 1 on error
Notes: (1) See convertToPdfSegmented() for details.
| LEPT_DLL PIX* pixConvertToSubpixelRGB | ( | PIX * | pixs, |
| l_float32 | scalex, | ||
| l_float32 | scaley, | ||
| l_int32 | order | ||
| ) |
Input: pixs (8 bpp grayscale, 32 bpp rgb, or colormapped)
scalex, scaley (anisotropic scaling permitted between
source and destination)
order (of subpixel rgb color components in composition of pixd:
L_SUBPIXEL_ORDER_RGB, L_SUBPIXEL_ORDER_BGR,
L_SUBPIXEL_ORDER_VRGB, L_SUBPIXEL_ORDER_VBGR)
Return: pixd (32 bpp), or null on error
Notes: (1) If pixs has a colormap, it is removed based on its contents to either 8 bpp gray or rgb. (2) For horizontal subpixel splitting, the input image is rescaled by @scaley vertically and by 3.0 times @scalex horizontally. Then each horizontal triplet of pixels is mapped back to a single rgb pixel, with the r, g and b values being assigned based on the pixel triplet. For gray triplets, the r, g, and b values are set equal to the three gray values. For color triplets, the r, g and b values are set equal to the components from the appropriate subpixel. Vertical subpixel splitting is handled similarly. (3) See pixConvertGrayToSubpixelRGB() and pixConvertColorToSubpixelRGB() for further details.
Input: pixd (can be NULL; if not NULL, must == pixs)
pixs
Return: pixd always
Notes: (1) For pixs = pixd, this is in-place; otherwise pixd must be NULL. (2) The user takes responsibility for making sure that pixs is in YUV space. (3) The Y, U and V values are stored in the same places as the r, g and b values, respectively. Here, they are explicitly placed in the 3 MS bytes in the pixel.
Input: pixs (8, 16, 32 bpp; no colormap)
kernel
outdepth (of pixd: 8, 16 or 32)
normflag (1 to normalize kernel to unit sum; 0 otherwise)
Return: pixd (8, 16 or 32 bpp)
Notes: (1) This gives a convolution with an arbitrary kernel. (2) The input pixs must have only one sample/pixel. To do a convolution on an RGB image, use pixConvolveRGB(). (3) The parameter @outdepth determines the depth of the result. If the kernel is normalized to unit sum, the output values can never exceed 255, so an output depth of 8 bpp is sufficient. If the kernel is not normalized, it may be necessary to use 16 or 32 bpp output to avoid overflow. (4) If normflag == 1, the result is normalized by scaling all kernel values for a unit sum. If the sum of kernel values is very close to zero, the kernel can not be normalized and the convolution will not be performed. A warning is issued. (5) The kernel values can be positive or negative, but the result for the convolution can only be stored as a positive number. Consequently, if it goes negative, the choices are to clip to 0 or take the absolute value. We're choosing to take the absolute value. (Another possibility would be to output a second unsigned image for the negative values.) If you want to get a clipped result, or to keep the negative values in the result, use fpixConvolve(), with the converters in fpix2.c between pix and fpix. (6) This uses a mirrored border to avoid special casing on the boundaries. (7) To get a subsampled output, call l_setConvolveSampling(). The time to make a subsampled output is reduced by the product of the sampling factors. (8) The function is slow, running at about 12 machine cycles for each pixel-op in the convolution. For example, with a 3 GHz cpu, a 1 Mpixel grayscale image, and a kernel with (sx * sy) = 25 elements, the convolution takes about 100 msec.
Input: pixs (32 bpp rgb)
kernel
Return: pixd (32 bpp rgb)
Notes: (1) This gives a convolution on an RGB image using an arbitrary kernel (which we normalize to keep each component within the range [0 ... 255]. (2) The input pixs must be RGB. (3) The kernel values can be positive or negative, but the result for the convolution can only be stored as a positive number. Consequently, if it goes negative, we clip the result to 0. (4) To get a subsampled output, call l_setConvolveSampling(). The time to make a subsampled output is reduced by the product of the sampling factors. (5) This uses a mirrored border to avoid special casing on the boundaries.
Input: pixs (32 bpp rgb)
kelx (x-dependent kernel)
kely (y-dependent kernel)
Return: pixd (32 bpp rgb)
Notes: (1) This does a convolution on an RGB image using a separable kernel that is a sequence of convolutions in x and y. The two one-dimensional kernel components must be input separately; the full kernel is the product of these components. The support for the full kernel is thus a rectangular region. (2) The kernel values can be positive or negative, but the result for the convolution can only be stored as a positive number. Consequently, if it goes negative, we clip the result to 0. (3) To get a subsampled output, call l_setConvolveSampling(). The time to make a subsampled output is reduced by the product of the sampling factors. (4) This uses a mirrored border to avoid special casing on the boundaries.
| LEPT_DLL PIX* pixConvolveSep | ( | PIX * | pixs, |
| L_KERNEL * | kelx, | ||
| L_KERNEL * | kely, | ||
| l_int32 | outdepth, | ||
| l_int32 | normflag | ||
| ) |
Input: pixs (8, 16, 32 bpp; no colormap)
kelx (x-dependent kernel)
kely (y-dependent kernel)
outdepth (of pixd: 8, 16 or 32)
normflag (1 to normalize kernel to unit sum; 0 otherwise)
Return: pixd (8, 16 or 32 bpp)
Notes: (1) This does a convolution with a separable kernel that is is a sequence of convolutions in x and y. The two one-dimensional kernel components must be input separately; the full kernel is the product of these components. The support for the full kernel is thus a rectangular region. (2) The input pixs must have only one sample/pixel. To do a convolution on an RGB image, use pixConvolveSepRGB(). (3) The parameter @outdepth determines the depth of the result. If the kernel is normalized to unit sum, the output values can never exceed 255, so an output depth of 8 bpp is sufficient. If the kernel is not normalized, it may be necessary to use 16 or 32 bpp output to avoid overflow. (2) The @normflag parameter is used as in pixConvolve(). (4) The kernel values can be positive or negative, but the result for the convolution can only be stored as a positive number. Consequently, if it goes negative, the choices are to clip to 0 or take the absolute value. We're choosing the former for now. Another possibility would be to output a second unsigned image for the negative values. (5) Warning: if you use l_setConvolveSampling() to get a subsampled output, and the sampling factor is larger than the kernel half-width, it is faster to use the non-separable version pixConvolve(). This is because the first convolution here must be done on every raster line, regardless of the vertical sampling factor. If the sampling factor is smaller than kernel half-width, it's faster to use the separable convolution. (6) This uses mirrored borders to avoid special casing on the boundaries.
| LEPT_DLL PIX* pixConvolveWithBias | ( | PIX * | pixs, |
| L_KERNEL * | kel1, | ||
| L_KERNEL * | kel2, | ||
| l_int32 | force8, | ||
| l_int32 * | pbias | ||
| ) |
Input: pixs (8 bpp; no colormap)
kel1
kel2 (can be null; use if separable)
force8 (if 1, force output to 8 bpp; otherwise, determine
output depth by the dynamic range of pixel values)
&bias (<return> applied bias)
Return: pixd (8 or 16 bpp)
Notes: (1) This does a convolution with either a single kernel or a pair of separable kernels, and automatically applies whatever bias (shift) is required so that the resulting pixel values are non-negative. (2) The kernel is always normalized. If there are no negative values in the kernel, a standard normalized convolution is performed, with 8 bpp output. If the sum of kernel values is very close to zero, the kernel can not be normalized and the convolution will not be performed. An error message results. (3) If there are negative values in the kernel, the pix is converted to an fpix, the convolution is done on the fpix, and a bias (shift) may need to be applied. (4) If force8 == TRUE and the range of values after the convolution is > 255, the output values will be scaled to fit in [0 ... 255]. If force8 == FALSE, the output will be either 8 or 16 bpp, to accommodate the dynamic range of output values without scaling.
Input: pixd (<optional>; can be null, or equal to pixs,
or different from pixs)
pixs
Return: pixd, or null on error
Notes: (1) There are three cases: (a) pixd == null (makes a new pix; refcount = 1) (b) pixd == pixs (no-op) (c) pixd != pixs (data copy; no change in refcount) If the refcount of pixd > 1, case (c) will side-effect these handles. (2) The general pattern of use is: pixd = pixCopy(pixd, pixs); This will work for all three cases. For clarity when the case is known, you can use: (a) pixd = pixCopy(NULL, pixs); (c) pixCopy(pixd, pixs); (3) For case (c), we check if pixs and pixd are the same size (w,h,d). If so, the data is copied directly. Otherwise, the data is reallocated to the correct size and the copy proceeds. The refcount of pixd is unchanged. (4) This operation, like all others that may involve a pre-existing pixd, will side-effect any existing clones of pixd.
| LEPT_DLL PIX* pixCopyBorder | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: pixd (all depths; colormap ok; can be NULL)
pixs (same depth and size as pixd)
left, right, top, bot (number of pixels to copy)
Return: pixd, or null on error if pixd is not defined
Notes: (1) pixd can be null, but otherwise it must be the same size and depth as pixs. Always returns pixd. (1) This is useful in situations where by setting a few border pixels we can avoid having to copy all pixels in pixs into pixd as an initialization step for some operation.
Input: src and dest Pix Return: 0 if OK, 1 on error
Notes: (1) This always destroys any colormap in pixd (except if the operation is a no-op.
Input: pixd
pixd
Return: 0 if OK, 1 on error
Input: pixd (32 bpp)
pixs (32 bpp)
comp (one of the set: {COLOR_RED, COLOR_GREEN,
COLOR_BLUE, L_ALPHA_CHANNEL})
Return: 0 if OK; 1 on error
Notes: (1) The two images are registered to the UL corner. The sizes are usually the same, and a warning is issued if they differ.
Input: pixd
pixs
Return: 0 if OK, 1 on error
Input: pix1 (1 bpp)
pix2 (1 bpp)
&val (<return> correlation)
Return: 0 if OK; 1 on error
Notes: (1) The correlation is a number between 0.0 and 1.0, based on foreground similarity: (|1 AND 2|)**2 correlation = -----------— |1| * |2| where |x| is the count of foreground pixels in image x. If the images are identical, this is 1.0. If they have no fg pixels in common, this is 0.0. If one or both images have no fg pixels, the correlation is 0.0. (2) Typically the two images are of equal size, but this is not enforced. Instead, the UL corners are aligned.
| LEPT_DLL l_int32 pixCorrelationScore | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | area1, | ||
| l_int32 | area2, | ||
| l_float32 | delx, | ||
| l_float32 | dely, | ||
| l_int32 | maxdiffw, | ||
| l_int32 | maxdiffh, | ||
| l_int32 * | tab, | ||
| l_float32 * | pscore | ||
| ) |
Input: pix1 (test pix, 1 bpp)
pix2 (exemplar pix, 1 bpp)
area1 (number of on pixels in pix1)
area2 (number of on pixels in pix2)
delx (x comp of centroid difference)
dely (y comp of centroid difference)
maxdiffw (max width difference of pix1 and pix2)
maxdiffh (max height difference of pix1 and pix2)
tab (sum tab for byte)
&score (<return> correlation score)
Return: 0 if OK, 1 on error
Note: we check first that the two pix are roughly the same size. For jbclass (jbig2) applications at roughly 300 ppi, maxdiffw and maxdiffh should be at least 2.
Only if they meet that criterion do we compare the bitmaps. The centroid difference is used to align the two images to the nearest integer for the correlation.
The correlation score is the ratio of the square of the number of pixels in the AND of the two bitmaps to the product of the number of ON pixels in each. Denote the number of ON pixels in pix1 by |1|, the number in pix2 by |2|, and the number in the AND of pix1 and pix2 by |1 & 2|. The correlation score is then (|1 & 2|)**2 / (|1|*|2|).
This score is compared with an input threshold, which can be modified depending on the weight of the template. The modified threshold is thresh + (1.0 - thresh) * weight * R where weight is a fixed input factor between 0.0 and 1.0 R = |2| / area(2) and area(2) is the total number of pixels in 2 (i.e., width x height).
To understand why a weight factor is useful, consider what happens with thick, sans-serif characters that look similar and have a value of R near 1. Different characters can have a high correlation value, and the classifier will make incorrect substitutions. The weight factor raises the threshold for these characters.
Yet another approach to reduce such substitutions is to run the classifier in a non-greedy way, matching to the template with the highest score, not the first template with a score satisfying the matching constraint. However, this is not particularly effective.
The implementation here gives the same result as in pixCorrelationScoreSimple(), where a temporary Pix is made to hold the AND and implementation uses rasterop: pixt = pixCreateTemplate(pix1); pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix2, 0, 0); pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC & PIX_DST, pix1, 0, 0); pixCountPixels(pixt, &count, tab); pixDestroy(&pixt); However, here it is done in a streaming fashion, counting as it goes, and touching memory exactly once, giving a 3-4x speedup over the simple implementation. This very fast correlation matcher was contributed by William Rucklidge.
| LEPT_DLL l_int32 pixCorrelationScoreShifted | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | area1, | ||
| l_int32 | area2, | ||
| l_int32 | delx, | ||
| l_int32 | dely, | ||
| l_int32 * | tab, | ||
| l_float32 * | pscore | ||
| ) |
Input: pix1 (1 bpp)
pix2 (1 bpp)
area1 (number of on pixels in pix1)
area2 (number of on pixels in pix2)
delx (x translation of pix2 relative to pix1)
dely (y translation of pix2 relative to pix1)
tab (sum tab for byte)
&score (<return> correlation score)
Return: 0 if OK, 1 on error
Notes: (1) This finds the correlation between two 1 bpp images, when pix2 is shifted by (delx, dely) with respect to each other. (2) This is implemented by starting with a copy of pix1 and ANDing its pixels with those of a shifted pix2. (3) Get the pixel counts for area1 and area2 using piCountPixels(). (4) A good estimate for a shift that would maximize the correlation is to align the centroids (cx1, cy1; cx2, cy2), giving the relative translations etransx and etransy: etransx = cx1 - cx2 etransy = cy1 - cy2 Typically delx is chosen to be near etransx; ditto for dely. This function is used in pixBestCorrelation(), where the translations delx and dely are varied to find the best alignment. (5) We do not check the sizes of pix1 and pix2, because they should be comparable.
| LEPT_DLL l_int32 pixCorrelationScoreSimple | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | area1, | ||
| l_int32 | area2, | ||
| l_float32 | delx, | ||
| l_float32 | dely, | ||
| l_int32 | maxdiffw, | ||
| l_int32 | maxdiffh, | ||
| l_int32 * | tab, | ||
| l_float32 * | pscore | ||
| ) |
Input: pix1 (test pix, 1 bpp)
pix2 (exemplar pix, 1 bpp)
area1 (number of on pixels in pix1)
area2 (number of on pixels in pix2)
delx (x comp of centroid difference)
dely (y comp of centroid difference)
maxdiffw (max width difference of pix1 and pix2)
maxdiffh (max height difference of pix1 and pix2)
tab (sum tab for byte)
&score (<return> correlation score, in range [0.0 ... 1.0])
Return: 0 if OK, 1 on error
Notes: (1) This calculates exactly the same value as pixCorrelationScore(). It is 2-3x slower, but much simpler to understand. (2) The returned correlation score is 0.0 if the width or height exceed @maxdiffw or @maxdiffh.
| LEPT_DLL l_int32 pixCorrelationScoreThresholded | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | area1, | ||
| l_int32 | area2, | ||
| l_float32 | delx, | ||
| l_float32 | dely, | ||
| l_int32 | maxdiffw, | ||
| l_int32 | maxdiffh, | ||
| l_int32 * | tab, | ||
| l_int32 * | downcount, | ||
| l_float32 | score_threshold | ||
| ) |
pixCorrelationScoreThresholded()
Input: pix1 (test pix, 1 bpp)
pix2 (exemplar pix, 1 bpp)
area1 (number of on pixels in pix1)
area2 (number of on pixels in pix2)
delx (x comp of centroid difference)
dely (y comp of centroid difference)
maxdiffw (max width difference of pix1 and pix2)
maxdiffh (max height difference of pix1 and pix2)
tab (sum tab for byte)
downcount (count of 1 pixels below each row of pix1)
score_threshold
Return: whether the correlation score is >= score_threshold
Note: we check first that the two pix are roughly the same size. Only if they meet that criterion do we compare the bitmaps. The centroid difference is used to align the two images to the nearest integer for the correlation.
The correlation score is the ratio of the square of the number of pixels in the AND of the two bitmaps to the product of the number of ON pixels in each. Denote the number of ON pixels in pix1 by |1|, the number in pix2 by |2|, and the number in the AND of pix1 and pix2 by |1 & 2|. The correlation score is then (|1 & 2|)**2 / (|1|*|2|).
This score is compared with an input threshold, which can be modified depending on the weight of the template. The modified threshold is thresh + (1.0 - thresh) * weight * R where weight is a fixed input factor between 0.0 and 1.0 R = |2| / area(2) and area(2) is the total number of pixels in 2 (i.e., width x height).
To understand why a weight factor is useful, consider what happens with thick, sans-serif characters that look similar and have a value of R near 1. Different characters can have a high correlation value, and the classifier will make incorrect substitutions. The weight factor raises the threshold for these characters.
Yet another approach to reduce such substitutions is to run the classifier in a non-greedy way, matching to the template with the highest score, not the first template with a score satisfying the matching constraint. However, this is not particularly effective.
This very fast correlation matcher was contributed by William Rucklidge.
| LEPT_DLL l_int32 pixCountArbInRect | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_int32 | val, | ||
| l_int32 | factor, | ||
| l_int32 * | pcount | ||
| ) |
Input: pixs (8 bpp, or colormapped)
box (<optional>) over which count is made;
use entire image null)
val (pixel value to count)
factor (subsampling factor; integer >= 1)
&count (<return> count; estimate it if factor > 1)
Return: na (histogram), or null on error
Notes: (1) If pixs is cmapped, @val is compared to the colormap index; otherwise, @val is compared to the grayscale value. (2) Set the subsampling @factor > 1 to reduce the amount of computation. If @factor > 1, multiply the count by @factor * @factor.
Input: pix (1 bpp)
box (<optional> clipping box for count; can be null)
Return: na of number of ON pixels by column, or null on error
Notes: (1) To resample for a bin size different from 1, use numaUniformSampling() on the result of this function.
Input: pix (1 bpp)
box (<optional> clipping box for count; can be null)
Return: na of number of ON pixels by row, or null on error
Notes: (1) To resample for a bin size different from 1, use numaUniformSampling() on the result of this function.
Input: pixs (1 bpp)
connectivity (4 or 8)
&count (<return>
Return: 0 if OK, 1 on error
Notes: (1) This is the top-level call for getting the number of 4- or 8-connected components in a 1 bpp image. (2) It works on a copy of the input pix. The c.c. are located in raster order and erased one at a time.
Input: pix (1 bpp)
&count (<return> count of ON pixels)
tab8 (<optional> 8-bit pixel lookup table)
Return: 0 if OK; 1 on error
Input: pix (1 bpp) Return: na of counts in each column, or null on error
Input: pix (1 bpp)
tab8 (<optional> 8-bit pixel lookup table)
Return: na of counts, or null on error
Input: pix (1 bpp)
row number
&count (<return> sum of ON pixels in raster line)
tab8 (<optional> 8-bit pixel lookup table)
Return: 0 if OK; 1 on error
Input: width, height, depth
Return: pixd (with data allocated and initialized to 0),
or null on error
Input: pixc Return: pix, or null on error
Input: width, height, depth Return: pixd (with no data allocated), or null on error
Notes: (1) It is assumed that all 32 bit pix have 3 spp. If there is a valid alpha channel, this will be set to 4 spp later. (2) If the number of bytes to be allocated is larger than the maximum value in an int32, we can get overflow, resulting in a smaller amount of memory actually being allocated. Later, an attempt to access memory that wasn't allocated will cause a crash. So to avoid crashing a program (or worse) with bad (or malicious) input, this is where we limit the requested allocation of image data in a typesafe way.
Input: width, height, depth
Return: pixd (with data allocated but not initialized),
or null on error
Notes: (1) Must set pad bits to avoid reading unitialized data, because some optimized routines (e.g., pixConnComp()) read from pad bits.
Input: 8 bpp red pix
8 bpp green pix
8 bpp blue pix
Return: 32 bpp pix, interleaved with 4 samples/pixel,
or null on error
Notes: (1) the 4th byte, sometimes called the "alpha channel", and which is often used for blending between different images, is left with 0 value. (2) see Note (4) in pix.h for details on storage of 8-bit samples within each 32-bit word. (3) This implementation, setting the r, g and b components sequentially, is much faster than setting them in parallel by constructing an RGB dest pixel and writing it to dest. The reason is there are many more cache misses when reading from 3 input images simultaneously.
| LEPT_DLL l_int32 pixCropAlignedToCentroid | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | factor, | ||
| BOX ** | pbox1, | ||
| BOX ** | pbox2 | ||
| ) |
Input: pix1, pix2 (any depth; colormap OK)
factor (subsampling; >= 1)
&box1 (<return> crop box for pix1)
&box2 (<return> crop box for pix2)
Return: 0 if OK, 1 on error
Notes: (1) This finds the maximum crop boxes for two 8 bpp images when their centroids of their photometric inverses are aligned. Black pixels have weight 255; white pixels have weight 0.
Input: pixs1 (any depth, colormap OK)
pixs2 (any depth, colormap OK)
&pixd1 (<return> may be a clone)
&pixd2 (<return> may be a clone)
Return: 0 if OK, 1 on error
Notes: (1) This resizes pixs1 and/or pixs2 by cropping at the right and bottom, so that they're the same size. (2) If a pix doesn't need to be cropped, a clone is returned. (3) Note: the images are implicitly aligned to the UL corner.
Input: pixs (any depth, colormap OK)
w, h (max dimensions of cropped image)
Return: pixd (cropped if necessary) or null on error.
Notes: (1) If either w or h is smaller than the corresponding dimension of pixs, this returns a cropped image; otherwise it returns a clone of pixs.
| LEPT_DLL l_int32 pixDecideIfPhotoImage | ( | PIX * | pix, |
| l_int32 | factor, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_float32 | thresh, | ||
| NUMAA ** | pnaa, | ||
| PIXA * | pixadebug | ||
| ) |
Input: pix (8 bpp, centroid in center)
factor (subsampling for histograms; >= 1)
nx, ny (number of subregions to use for histograms)
thresh (threshold for photo/text; use 0 for default)
&naa (<return> array of normalized histograms)
pixadebug (<optional> use only for debug output)
Return: 0 if OK, 1 on error
Notes: (1) The input image must be 8 bpp (no colormap), and padded with white pixels so the centroid of photo-inverted pixels is at the center of the image. (2) If the pix is not almost certainly a photoimage, the returned histograms (@naa) are null. (3) If histograms are generated, the white (255) count is set to 0. This removes all pixels values above 230, including white padding from the centroid matching operation, from consideration. The resulting histograms are then normalized so the maximum count is 255. (4) Default for @thresh is 1.3; this seems sufficiently conservative. (5) Use @pixadebug == NULL unless debug output is requested.
Input: pixs (any depth)
box (<optional> if null, use entire pixs)
&istext (<return> 1 if text; 0 if photo; -1 if not determined)
pixadb (<optional> pre-allocated, for showing intermediate
computation; use null to skip)
Return: 0 if OK, 1 on error
Notes: (1) It is assumed that pixs has the correct resolution set. If the resolution is 0, we set to 300 and issue a warning. (2) If necessary, the image is scaled to 300 ppi; most of the processing is done at this resolution. (3) Text is assumed to be in horizontal lines. (4) Because thin vertical lines are removed before filtering for text lines, this should identify tables as text. (5) If @box is null and pixs contains both text lines and line art, this function might return @istext == true. (6) If the input pixs is empty, or for some other reason the result can not be determined, return -1. (7) For debug output, input a pre-allocated pixa.
Input: data (serialized data in memory)
nbytes (number of bytes in data string)
Return: pix, or NULL on error
Notes: (1) See pixSerializeToMemory() for the binary format. (2) Note the image size limits.
Input: pixs (any depth)
redsearch (for binary search: reduction factor = 1, 2 or 4;
use 0 for default)
Return: pixd (deskewed pix), or null on error
Notes: (1) This binarizes if necessary and finds the skew angle. If the angle is large enough and there is sufficient confidence, it returns a deskewed image; otherwise, it returns a clone.
| LEPT_DLL PIX* pixDeskewBarcode | ( | PIX * | pixs, |
| PIX * | pixb, | ||
| BOX * | box, | ||
| l_int32 | margin, | ||
| l_int32 | threshold, | ||
| l_float32 * | pangle, | ||
| l_float32 * | pconf | ||
| ) |
Input: pixs (input image; 8 bpp)
pixb (binarized edge-filtered input image)
box (identified region containing barcode)
margin (of extra pixels around box to extract)
threshold (for binarization; ~20)
&angle (<optional return> in degrees, clockwise is positive)
&conf (<optional return> confidence)
Return: pixd (deskewed barcode), or null on error
Note: (1) The (optional) angle returned is the angle in degrees (cw positive) necessary to rotate the image so that it is deskewed.
| LEPT_DLL PIX* pixDeskewGeneral | ( | PIX * | pixs, |
| l_int32 | redsweep, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta, | ||
| l_int32 | redsearch, | ||
| l_int32 | thresh, | ||
| l_float32 * | pangle, | ||
| l_float32 * | pconf | ||
| ) |
Input: pixs (any depth)
redsweep (for linear search: reduction factor = 1, 2 or 4;
use 0 for default)
sweeprange (in degrees in each direction from 0;
use 0.0 for default)
sweepdelta (in degrees; use 0.0 for default)
redsearch (for binary search: reduction factor = 1, 2 or 4;
use 0 for default;)
thresh (for binarizing the image; use 0 for default)
&angle (<optional return> angle required to deskew,
in degrees; use NULL to skip)
&conf (<optional return> conf value is ratio
of max/min scores; use NULL to skip)
Return: pixd (deskewed pix), or null on error
Notes: (1) This binarizes if necessary and finds the skew angle. If the angle is large enough and there is sufficient confidence, it returns a deskewed image; otherwise, it returns a clone.
| LEPT_DLL PIX* pixDeskewLocal | ( | PIX * | pixs, |
| l_int32 | nslices, | ||
| l_int32 | redsweep, | ||
| l_int32 | redsearch, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta, | ||
| l_float32 | minbsdelta | ||
| ) |
Input: pixs
nslices (the number of horizontal overlapping slices; must
be larger than 1 and not exceed 20; use 0 for default)
redsweep (sweep reduction factor: 1, 2, 4 or 8;
use 0 for default value)
redsearch (search reduction factor: 1, 2, 4 or 8, and
not larger than redsweep; use 0 for default value)
sweeprange (half the full range, assumed about 0; in degrees;
use 0.0 for default value)
sweepdelta (angle increment of sweep; in degrees;
use 0.0 for default value)
minbsdelta (min binary search increment angle; in degrees;
use 0.0 for default value)
Return: pixd, or null on error
Notes: (1) This function allows deskew of a page whose skew changes approximately linearly with vertical position. It uses a projective transform that in effect does a differential shear about the LHS of the page, and makes all text lines horizontal. (2) The origin of the keystoning can be either a cheap document feeder that rotates the page as it is passed through, or a camera image taken from either the left or right side of the vertical. (3) The image transformation is a projective warping, not a rotation. Apart from this function, the text lines must be properly aligned vertically with respect to each other. This can be done by pre-processing the page; e.g., by rotating or horizontally shearing it. Typically, this can be achieved by vertically aligning the page edge.
Input: &pix <will be nulled> Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the pix. (2) Always nulls the input ptr.
Input: pix Return: 0 if OK, 1 on error
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
sel
Return: pixd
Notes: (1) This dilates src using hits in Sel. (2) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (3) For clarity, if the case is known, use these patterns: (a) pixd = pixDilate(NULL, pixs, ...); (b) pixDilate(pixs, pixs, ...); (c) pixDilate(pixd, pixs, ...); (4) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do separably if both hsize and vsize are > 1. (4) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (5) For clarity, if the case is known, use these patterns: (a) pixd = pixDilateBrick(NULL, pixs, ...); (b) pixDilateBrick(pixs, pixs, ...); (c) pixDilateBrick(pixd, pixs, ...); (6) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) These implement 2D brick Sels, using linear Sels generated with selaAddBasic(). (2) A brick Sel has hits for all elements. (3) The origin of the Sel is at (x, y) = (hsize/2, vsize/2) (4) Do separably if both hsize and vsize are > 1. (5) It is necessary that both horizontal and vertical Sels of the input size are defined in the basic sela. (6) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (7) For clarity, if the case is known, use these patterns: (a) pixd = pixDilateBrickDwa(NULL, pixs, ...); (b) pixDilateBrickDwa(pixs, pixs, ...); (c) pixDilateBrickDwa(pixd, pixs, ...); (8) The size of pixd is determined by pixs. (9) If either linear Sel is not found, this calls the appropriate decomposible function.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do compositely for each dimension > 1. (4) Do separably if both hsize and vsize are > 1. (5) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (6) For clarity, if the case is known, use these patterns: (a) pixd = pixDilateCompBrick(NULL, pixs, ...); (b) pixDilateCompBrick(pixs, pixs, ...); (c) pixDilateCompBrick(pixd, pixs, ...); (7) The dimensions of the resulting image are determined by pixs. (8) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) These implement a separable composite dilation with 2D brick Sels. (2) For efficiency, it may decompose each linear morphological operation into two (brick + comb). (3) A brick Sel has hits for all elements. (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2) (5) Do separably if both hsize and vsize are > 1. (6) It is necessary that both horizontal and vertical Sels of the input size are defined in the basic sela. (7) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (8) For clarity, if the case is known, use these patterns: (a) pixd = pixDilateCompBrickDwa(NULL, pixs, ...); (b) pixDilateCompBrickDwa(pixs, pixs, ...); (c) pixDilateCompBrickDwa(pixd, pixs, ...); (9) The size of pixd is determined by pixs. (10) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) Ankur Jain suggested and implemented extending the composite DWA operations beyond the 63 pixel limit. This is a simplified and approximate implementation of the extension. This allows arbitrary Dwa morph operations using brick Sels, by decomposing the horizontal and vertical dilations into a sequence of 63-element dilations plus a dilation of size between 3 and 62. (2) The 63-element dilations are exact, whereas the extra dilation is approximate, because the underlying decomposition is in pixDilateCompBrickDwa(). See there for further details. (3) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (4) There is no need to call this directly: pixDilateCompBrickDwa() calls this function if either brick dimension exceeds 63.
Input: pixs
hsize (of Sel; must be odd; origin implicitly in center)
vsize (ditto)
Return: pixd
Notes: (1) Sel is a brick with all elements being hits (2) If hsize = vsize = 1, just returns a copy.
Input: pixs (8 bpp, not cmapped)
hsize (1 or 3)
vsize (1 or 3)
Return: pixd, or null on error
Notes: (1) Special case for 1x3, 3x1 or 3x3 brick sel (all hits) (2) If hsize = vsize = 1, just returns a copy.
Input: pix (1, 2, 4, 8, 16, 32 bpp)
x, y (location of display frame on the screen)
Return: 0 if OK; 1 on error
Notes: (1) This displays the image using xzgv, xli or xv on Unix, or i_view on Windows. The display program must be on your $PATH variable. It is chosen by setting the global var_DISPLAY_PROG, using l_chooseDisplayProg(). Default on Unix is xzgv. (2) Images with dimensions larger than MAX_DISPLAY_WIDTH or MAX_DISPLAY_HEIGHT are downscaled to fit those constraints. This is particularly important for displaying 1 bpp images with xv, because xv automatically downscales large images by subsampling, which looks poor. For 1 bpp, we use scale-to-gray to get decent-looking anti-aliased images. In all cases, we write a temporary file to /tmp/lept/disp, that is read by the display program. (3) For spp == 4, we call pixDisplayLayersRGBA() to show 3 versions of the image: the image with a fully opaque alpha, the alpha, and the image as it would appear with a white background. (4) Note: this function uses a static internal variable to number output files written by a single process. Behavior with a shared library may be unpredictable.
| LEPT_DLL PIX* pixDisplayColorArray | ( | l_uint32 * | carray, |
| l_int32 | ncolors, | ||
| l_int32 | side, | ||
| l_int32 | ncols, | ||
| l_int32 | fontsize | ||
| ) |
Input: carray (array of colors: 0xrrggbb00)
ncolors (size of array)
side (size of each color square; suggest 200)
ncols (number of columns in output color matrix)
fontsize (to label each square with text. Valid set is
{4,6,8,10,12,14,16,18,20}. Use 0 to disable.)
Return: pixd (color array), or null on error
Input: pix1 (1 bpp)
pix2 (1 bpp)
Return: pixd (4 bpp cmapped), or null on error
Notes: (1) This gives a color representation of the difference between pix1 and pix2. The color difference depends on the order. The pixels in pixd have 4 colors:
| LEPT_DLL PIX* pixDisplayHitMissSel | ( | PIX * | pixs, |
| SEL * | sel, | ||
| l_int32 | scalefactor, | ||
| l_uint32 | hitcolor, | ||
| l_uint32 | misscolor | ||
| ) |
Input: pixs (1 bpp)
sel (hit-miss in general)
scalefactor (an integer >= 1; use 0 for default)
hitcolor (RGB0 color for center of hit pixels)
misscolor (RGB0 color for center of miss pixels)
Return: pixd (RGB showing both pixs and sel), or null on error
Notes: (1) We don't allow scalefactor to be larger than MAX_SEL_SCALEFACTOR (2) The colors are conveniently given as 4 bytes in hex format, such as 0xff008800. The least significant byte is ignored.
Input: pixs (cmap or 32 bpp rgba)
val (32 bit unsigned color to use as background)
maxw (max output image width; 0 for no scaling)
Return: pixd (showing various image views), or null on error
Notes: (1) Use @val == 0xffffff00 for white background. (2) Three views are given:
| LEPT_DLL PIX* pixDisplayMatchedPattern | ( | PIX * | pixs, |
| PIX * | pixp, | ||
| PIX * | pixe, | ||
| l_int32 | x0, | ||
| l_int32 | y0, | ||
| l_uint32 | color, | ||
| l_float32 | scale, | ||
| l_int32 | nlevels | ||
| ) |
Input: pixs (input image, 1 bpp)
pixp (pattern to be removed from image, 1 bpp)
pixe (image after erosion by Sel that approximates pixp, 1 bpp)
x0, y0 (center of Sel)
color (to paint the matched patterns; 0xrrggbb00)
scale (reduction factor for output pixd)
nlevels (if scale < 1.0, threshold to this number of levels)
Return: pixd (8 bpp, colormapped), or null on error
Notes: (1) A 4 bpp colormapped image is generated. (2) If scale <= 1.0, do scale to gray for the output, and threshold to nlevels of gray. (3) You can use various functions in selgen to create a Sel that will generate pixe from pixs. (4) This function is applied after pixe has been computed. It finds the centroid of each c.c., and colors the output pixels using pixp (appropriately aligned) as a stencil. Alignment is done using the origin of the Sel and the centroid of the eroded image to place the stencil pixp.
Input: filepattern Return: 0 if OK; 1 on error
Notes: (1) This allows display of multiple images using gthumb on unix and i_view32 on windows. The @filepattern is a regular expression that is expanded by the shell. (2) _fullpath automatically changes '/' to '\' if necessary.
Input: pixd (can be same as pixs or null; 32 bpp if in-place)
pixs (1, 2, 4, 8, 16 or 32 bpp)
pta (of path to be plotted)
Return: pixd (32 bpp RGB version of pixs, with path in green).
Notes: (1) To write on an existing pixs, pixs must be 32 bpp and call with pixd == pixs: pixDisplayPta(pixs, pixs, pta); To write to a new pix, use pixd == NULL and call: pixd = pixDisplayPta(NULL, pixs, pta); (2) On error, returns pixd to avoid losing pixs if called as pixs = pixDisplayPta(pixs, pixs, pta);
Input: pixs (1, 2, 4, 8, 16 or 32 bpp)
ptaa (array of paths to be plotted)
Return: pixd (32 bpp RGB version of pixs, with paths plotted
in different colors), or null on error
| LEPT_DLL PIX* pixDisplayPtaaPattern | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| PTAA * | ptaa, | ||
| PIX * | pixp, | ||
| l_int32 | cx, | ||
| l_int32 | cy | ||
| ) |
Input: pixd (32 bpp)
pixs (1, 2, 4, 8, 16 or 32 bpp; 32 bpp if in place)
ptaa (giving locations at which the pattern is displayed)
pixp (1 bpp pattern to be placed such that its reference
point co-locates with each point in pta)
cx, cy (reference point in pattern)
Return: pixd (32 bpp RGB version of pixs).
Notes: (1) To write on an existing pixs, pixs must be 32 bpp and call with pixd == pixs: pixDisplayPtaPattern(pixs, pixs, pta, ...); To write to a new pix, use pixd == NULL and call: pixd = pixDisplayPtaPattern(NULL, pixs, pta, ...); (2) Puts a random color on each pattern associated with a pta. (3) On error, returns pixd to avoid losing pixs if called as pixs = pixDisplayPtaPattern(pixs, pixs, pta, ...); (4) A typical pattern to be used is a circle, generated with generatePtaFilledCircle()
| LEPT_DLL PIX* pixDisplayPtaPattern | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| PTA * | pta, | ||
| PIX * | pixp, | ||
| l_int32 | cx, | ||
| l_int32 | cy, | ||
| l_uint32 | color | ||
| ) |
Input: pixd (can be same as pixs or null; 32 bpp if in-place)
pixs (1, 2, 4, 8, 16 or 32 bpp)
pta (giving locations at which the pattern is displayed)
pixp (1 bpp pattern to be placed such that its reference
point co-locates with each point in pta)
cx, cy (reference point in pattern)
color (in 0xrrggbb00 format)
Return: pixd (32 bpp RGB version of pixs).
Notes: (1) To write on an existing pixs, pixs must be 32 bpp and call with pixd == pixs: pixDisplayPtaPattern(pixs, pixs, pta, ...); To write to a new pix, use pixd == NULL and call: pixd = pixDisplayPtaPattern(NULL, pixs, pta, ...); (2) On error, returns pixd to avoid losing pixs if called as pixs = pixDisplayPtaPattern(pixs, pixs, pta, ...); (3) A typical pattern to be used is a circle, generated with generatePtaFilledCircle()
| LEPT_DLL l_int32 pixDisplayWithTitle | ( | PIX * | pixs, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| const char * | title, | ||
| l_int32 | dispflag | ||
| ) |
Input: pix (1, 2, 4, 8, 16, 32 bpp)
x, y (location of display frame)
title (<optional> on frame; can be NULL);
dispflag (1 to write, else disabled)
Return: 0 if OK; 1 on error
Notes: (1) See notes for pixDisplay(). (2) This displays the image if dispflag == 1.
Input: pix (1, 2, 4, 8, 16, 32 bpp)
reduction (-1 to reset/erase; 0 to disable;
otherwise this is a reduction factor)
Return: 0 if OK; 1 on error
Notes: (1) This defaults to jpeg output for pix that are 32 bpp or 8 bpp without a colormap. If you want to write all images losslessly, use format == IFF_PNG in pixDisplayWriteFormat(). (2) See pixDisplayWriteFormat() for usage details.
Input: pix (1, 2, 4, 8, 16, 32 bpp)
reduction (-1 to reset/erase; 0 to disable;
otherwise this is a reduction factor)
format (IFF_PNG or IFF_JFIF_JPEG)
Return: 0 if OK; 1 on error
Notes: (1) This writes files if reduction > 0. These can be displayed using pixDisplayMultiple("/tmp/lept/display/file*"); (2) All previously written files can be erased by calling with reduction < 0; the value of pixs is ignored. (3) If reduction > 1 and depth == 1, this does a scale-to-gray reduction. (4) This function uses a static internal variable to number output files written by a single process. Behavior with a shared library may be unpredictable. (5) Output file format is as follows: format == IFF_JFIF_JPEG: png if d < 8 or d == 16 or if the output pix has a colormap. Otherwise, output is jpg. format == IFF_PNG: png (lossless) on all images. (6) For 16 bpp, the choice of full dynamic range with log scale is the best for displaying these images. Alternative outputs are pix8 = pixMaxDynamicRange(pixt, L_LINEAR_SCALE); pix8 = pixConvert16To8(pixt, 0); // low order byte pix8 = pixConvert16To8(pixt, 1); // high order byte
| LEPT_DLL PIX* pixDistanceFunction | ( | PIX * | pixs, |
| l_int32 | connectivity, | ||
| l_int32 | outdepth, | ||
| l_int32 | boundcond | ||
| ) |
Input: pixs (1 bpp source)
connectivity (4 or 8)
outdepth (8 or 16 bits for pixd)
boundcond (L_BOUNDARY_BG, L_BOUNDARY_FG)
Return: pixd, or null on error
Notes: (1) This computes the distance of each pixel from the nearest background pixel. All bg pixels therefore have a distance of 0, and the fg pixel distances increase linearly from 1 at the boundary. It can also be used to compute the distance of each pixel from the nearest fg pixel, by inverting the input image before calling this function. Then all fg pixels have a distance 0 and the bg pixel distances increase linearly from 1 at the boundary. (2) The algorithm, described in Leptonica on the page on seed filling and connected components, is due to Luc Vincent. In brief, we generate an 8 or 16 bpp image, initialized with the fg pixels of the input pix set to 1 and the 1-boundary pixels (i.e., the boundary pixels of width 1 on the four sides set as either:
Input: pixs (8 bpp)
cmapflag (1 to generate a colormap)
Return: pixd (dithered 2 bpp), or null on error
An analog of the Floyd-Steinberg error diffusion dithering algorithm is used to "dibitize" an 8 bpp grayscale image to 2 bpp, using equally spaced gray values of 0, 85, 170, and 255, which are served by thresholds of 43, 128 and 213. If cmapflag == 1, the colormap values are set to 0, 85, 170 and 255. If a pixel has a value between 0 and 42, it is dibitized to 0, and the excess (above 0) is added to the three neighboring pixels, in the fractions 3/8 to (i, j+1), 3/8 to (i+1, j) and 1/4 to (i+1, j+1), truncating to 255 if necessary. If a pixel has a value between 43 and 127, it is dibitized to 1, and the excess (above 85) is added to the three neighboring pixels as before. If the value is below 85, the excess is subtracted. With a value between 128 and 212, it is dibitized to 2, with the excess on either side of 170 distributed as before. Finally, with a value between 213 and 255, it is dibitized to 3, with the excess (below 255) subtracted from the neighbors. We always truncate to 0 or 255. The details can be seen in the lookup table generation.
This function differs from straight dithering in that it allows clipping of grayscale to 0 or 255 if the values are sufficiently close, without distribution of the excess. This uses default values (from pix.h) to specify the range of lower and upper values (near 0 and 255, rsp) that are clipped to black and white without propagating the excess. Not propagating the excess has the effect of reducing the snake patterns in parts of the image that are nearly black or white; however, it also prevents any attempt to reproduce gray for those values.
The implementation uses 3 lookup tables for simplicity, and a pair of line buffers to avoid modifying pixs.
| LEPT_DLL PIX* pixDitherTo2bppSpec | ( | PIX * | pixs, |
| l_int32 | lowerclip, | ||
| l_int32 | upperclip, | ||
| l_int32 | cmapflag | ||
| ) |
Input: pixs (8 bpp)
lowerclip (lower clip distance to black; use 0 for default)
upperclip (upper clip distance to white; use 0 for default)
cmapflag (1 to generate a colormap)
Return: pixd (dithered 2 bpp), or null on error
Notes: (1) See comments above in pixDitherTo2bpp() for details. (2) The input parameters lowerclip and upperclip specify the range of lower and upper values (near 0 and 255, rsp) that are clipped to black and white without propagating the excess. For that reason, lowerclip and upperclip should be small numbers.
Input: pixs Return: pixd (dithered binary), or null on error
The Floyd-Steinberg error diffusion dithering algorithm binarizes an 8 bpp grayscale image to a threshold of 128. If a pixel has a value above 127, it is binarized to white and the excess (below 255) is subtracted from three neighboring pixels in the fractions 3/8 to (i, j+1), 3/8 to (i+1, j) and 1/4 to (i+1,j+1), truncating to 0 if necessary. Likewise, if it the pixel has a value below 128, it is binarized to black and the excess above 0 is added to the neighboring pixels, truncating to 255 if necessary.
This function differs from straight dithering in that it allows clipping of grayscale to 0 or 255 if the values are sufficiently close, without distribution of the excess. This uses default values to specify the range of lower and upper values (near 0 and 255, rsp) that are clipped to black and white without propagating the excess. Not propagating the excess has the effect of reducing the snake patterns in parts of the image that are nearly black or white; however, it also prevents the attempt to reproduce gray for those values.
The implementation is straightforward. It uses a pair of line buffers to avoid changing pixs. It is about 2x faster than the implementation using LUTs.
Input: pixs
lowerclip (lower clip distance to black; use -1 for default)
upperclip (upper clip distance to white; use -1 for default)
Return: pixd (dithered binary), or null on error
This implementation is deprecated. You should use pixDitherToBinary().
See comments in pixDitherToBinary()
This implementation additionally uses three lookup tables to generate the output pixel value and the excess or deficit carried over to the neighboring pixels.
Input: pixs
lowerclip (lower clip distance to black; use 0 for default)
upperclip (upper clip distance to white; use 0 for default)
Return: pixd (dithered binary), or null on error
Notes: (1) See comments above in pixDitherToBinary() for details. (2) The input parameters lowerclip and upperclip specify the range of lower and upper values (near 0 and 255, rsp) that are clipped to black and white without propagating the excess. For that reason, lowerclip and upperclip should be small numbers.
Input: pixs (any depth; can be cmapped)
boxa (of boxes, to draw)
width (of lines)
val (rgba color to draw)
Return: pixd (with outlines of boxes added), or null on error
Notes: (1) If pixs is 1 bpp or is colormapped, it is converted to 8 bpp and the boxa is drawn using a colormap; otherwise, it is converted to 32 bpp rgb.
Input: pixs (any depth, can be cmapped)
boxa (of boxes, to draw)
width (thickness of line)
Return: pixd (with box outlines drawn), or null on error
Notes: (1) If pixs is 1 bpp, we draw the boxa using a colormap; otherwise, we convert to 32 bpp. (2) We use up to 254 different colors for drawing the boxes. (3) If boxes overlap, the later ones draw over earlier ones.
| LEPT_DLL l_int32 pixelFractionalShift | ( | l_int32 | rval, |
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_float32 | fraction, | ||
| l_uint32 * | ppixel | ||
| ) |
Input: rval, gval, bval
fraction (negative toward black; positive toward white)
&ppixel (<return> rgb value)
Return: 0 if OK, 1 on error
Notes: (1) This transformation leaves the hue invariant, while changing the saturation and intensity. It can be used for that purpose in pixLinearMapToTargetColor(). (2) @fraction is in the range [-1 .... +1]. If @fraction < 0, saturation is increased and brightness is reduced. The opposite results if @fraction > 0. If @fraction == -1, the resulting pixel is black; @fraction == 1 results in white.
| LEPT_DLL l_int32 pixelLinearMapToTargetColor | ( | l_uint32 | scolor, |
| l_uint32 | srcmap, | ||
| l_uint32 | dstmap, | ||
| l_uint32 * | pdcolor | ||
| ) |
Input: scolor (rgb source color: 0xrrggbb00)
srcmap (source mapping color: 0xrrggbb00)
dstmap (target mapping color: 0xrrggbb00)
&pdcolor (<return> rgb dest color: 0xrrggbb00)
Return: 0 if OK, 1 on error
Notes: (1) This does this does a piecewise linear mapping of each component of @scolor to @dcolor, based on the relation between the components of @srcmap and @dstmap. It is the same transformation, performed on a single color, as mapped on every pixel in a pix by pixLinearMapToTargetColor(). (2) For each component, if the sval is larger than the smap, the dval will be pushed up from dmap towards white. Otherwise, dval will be pushed down from dmap towards black. This is because you can visualize the transformation as a linear stretching where smap moves to dmap, and everything else follows linearly with 0 and 255 fixed. (3) The mapping will in general change the hue of @scolor. However, if the @srcmap and @dstmap targets are related by a transformation given by pixelFractionalShift(), the hue will be invariant.
| LEPT_DLL l_int32 pixelShiftByComponent | ( | l_int32 | rval, |
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_uint32 | srcval, | ||
| l_uint32 | dstval, | ||
| l_uint32 * | ppixel | ||
| ) |
Input: rval, gval, bval
srcval (source color: 0xrrggbb00)
dstval (target color: 0xrrggbb00)
&ppixel (<return> rgb value)
Return: 0 if OK, 1 on error
Notes: (1) This is a linear transformation that gives the same result on a single pixel as pixShiftByComponent() gives on a pix. Each component is handled separately. If the dest component is larger than the src, then the component is pushed toward 255 by the same fraction as the src --> dest shift.
| LEPT_DLL PIX* pixEmbedForRotation | ( | PIX * | pixs, |
| l_float32 | angle, | ||
| l_int32 | incolor, | ||
| l_int32 | width, | ||
| l_int32 | height | ||
| ) |
Input: pixs (1, 2, 4, 8, 32 bpp rgb)
angle (radians; clockwise is positive)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
width (original width; use 0 to avoid embedding)
height (original height; use 0 to avoid embedding)
Return: pixd, or null on error
Notes: (1) For very small rotations, just return a clone. (2) Generate larger image to embed pixs if necessary, and place the center of the input image in the center. (3) Rotation brings either white or black pixels in from outside the image. For colormapped images where there is no white or black, a new color is added if possible for these pixels; otherwise, either the lightest or darkest color is used. In most cases, the colormap will be removed prior to rotation. (4) The dest is to be expanded so that no image pixels are lost after rotation. Input of the original width and height allows the expansion to stop at the maximum required size, which is a square with side equal to sqrt(w*w + h*h). (5) For an arbitrary angle, the expansion can be found by considering the UL and UR corners. As the image is rotated, these move in an arc centered at the center of the image. Normalize to a unit circle by dividing by half the image diagonal. After a rotation of T radians, the UL and UR corners are at points T radians along the unit circle. Compute the x and y coordinates of both these points and take the max of absolute values; these represent the half width and half height of the containing rectangle. The arithmetic is done using formulas for sin(a+b) and cos(a+b), where b = T. For the UR corner, sin(a) = h/d and cos(a) = w/d. For the UL corner, replace a by (pi - a), and you have sin(pi - a) = h/d, cos(pi - a) = -w/d. The equations given below follow directly.
Input: pixs Return: 0 if OK, 1 on error
Notes: (1) This is used on little-endian platforms to swap the bytes within a word; bytes 0 and 3 are swapped, and bytes 1 and 2 are swapped. (2) This is required for little-endians in situations where we convert from a serialized byte order that is in raster order, as one typically has in file formats, to one with MSB-to-the-left in each 32-bit word, or v.v. See pix.h for a description of the canonical format (MSB-to-the left) that is used for both little-endian and big-endian platforms. For big-endians, the MSB-to-the-left word order has the bytes in raster order when serialized, so no byte flipping is required.
Input: pixs Return: pixd, or null on error
Notes: (1) This is used to convert the data in a pix to a serialized byte buffer in raster order, and, for RGB, in order RGBA. This requires flipping bytes within each 32-bit word for little-endian platforms, because the words have a MSB-to-the-left rule, whereas byte raster-order requires the left-most byte in each word to be byte 0. For big-endians, no swap is necessary, so this returns a clone. (2) Unlike pixEndianByteSwap(), which swaps the bytes in-place, this returns a new pix (or a clone). We provide this because often when serialization is done, the source pix needs to be restored to canonical little-endian order, and this requires a second byte swap. In such a situation, it is twice as fast to make a new pix in big-endian order, use it, and destroy it.
Input: pixs Return: 0 if OK, 1 on error
Notes: (1) This is used on little-endian platforms to swap the 2-byte entities within a 32-bit word. (2) This is equivalent to a full byte swap, as performed by pixEndianByteSwap(), followed by byte swaps in each of the 16-bit entities separately.
Input: pixs Return: 0 if OK, 1 on error
Notes: (1) This is used on little-endian platforms to swap the 2-byte entities within a 32-bit word. (2) This is equivalent to a full byte swap, as performed by pixEndianByteSwap(), followed by byte swaps in each of the 16-bit entities separately. (3) Unlike pixEndianTwoByteSwap(), which swaps the shorts in-place, this returns a new pix (or a clone). We provide this to avoid having to swap twice in situations where the input pix must be restored to canonical little-endian order.
Input: pix1
pix2
&same (<return> 1 if same; 0 if different)
Return: 0 if OK; 1 on error
Notes: (1) Equality is defined as having the same pixel values for each respective image pixel. (2) This works on two pix of any depth. If one or both pix have a colormap, the depths can be different and the two pix can still be equal. (3) This ignores the alpha component for 32 bpp images. (4) If both pix have colormaps and the depths are equal, use the pixEqualWithCmap() function, which does a fast comparison if the colormaps are identical and a relatively slow comparison otherwise. (5) In all other cases, any existing colormaps must first be removed before doing pixel comparison. After the colormaps are removed, the resulting two images must have the same depth. The "lowest common denominator" is RGB, but this is only chosen when necessary, or when both have colormaps but different depths. (6) For images without colormaps that are not 32 bpp, all bits in the image part of the data array must be identical.
Input: pixd (<optional> null or equal to pixs)
pixs (8 bpp gray, 32 bpp rgb, or colormapped)
fract (fraction of equalization movement of pixel values)
factor (subsampling factor; integer >= 1)
Return: pixd, or null on error
Notes: (1) pixd must either be null or equal to pixs. For in-place operation, set pixd == pixs: pixEqualizeTRC(pixs, pixs, ...); To get a new image, set pixd == null: pixd = pixEqualizeTRC(NULL, pixs, ...); (2) In histogram equalization, a tone reproduction curve mapping is used to make the number of pixels at each intensity equal. (3) If fract == 0.0, no equalization is performed; return a copy unless in-place, in which case this is a no-op. If fract == 1.0, equalization is complete. (4) Set the subsampling factor > 1 to reduce the amount of computation. (5) If pixs is colormapped, the colormap is removed and converted to rgb or grayscale. (6) If pixs has color, equalization is done in each channel separately. (7) Note that even if there is a colormap, we can get an in-place operation because the intermediate image pixt is copied back to pixs (which for in-place is the same as pixd).
Input: pix1
pix2
use_alpha (1 to compare alpha in RGBA; 0 to ignore)
&same (<return> 1 if same; 0 if different)
Return: 0 if OK; 1 on error
Notes: (1) See notes in pixEqual(). (2) This is more general than pixEqual(), in that for 32 bpp RGBA images, where spp = 4, you can optionally include the alpha component in the comparison.
Input: pix1
pix2
&same
Return: 0 if OK, 1 on error
Notes: (1) This returns same = TRUE if the images have identical content. (2) Both pix must have a colormap, and be of equal size and depth. If these conditions are not satisfied, it is not an error; the returned result is same = FALSE. (3) We then check whether the colormaps are the same; if so, the comparison proceeds 32 bits at a time. (4) If the colormaps are different, the comparison is done by slow brute force.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
sel
Return: pixd
Notes: (1) This erodes src using hits in Sel. (2) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (3) For clarity, if the case is known, use these patterns: (a) pixd = pixErode(NULL, pixs, ...); (b) pixErode(pixs, pixs, ...); (c) pixErode(pixd, pixs, ...); (4) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do separably if both hsize and vsize are > 1. (4) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (5) For clarity, if the case is known, use these patterns: (a) pixd = pixErodeBrick(NULL, pixs, ...); (b) pixErodeBrick(pixs, pixs, ...); (c) pixErodeBrick(pixd, pixs, ...); (6) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) These implement 2D brick Sels, using linear Sels generated with selaAddBasic(). (2) A brick Sel has hits for all elements. (3) The origin of the Sel is at (x, y) = (hsize/2, vsize/2) (4) Do separably if both hsize and vsize are > 1. (5) It is necessary that both horizontal and vertical Sels of the input size are defined in the basic sela. (6) Note that we must always set or clear the border pixels before each operation, depending on the the b.c. (symmetric or asymmetric). (7) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (8) For clarity, if the case is known, use these patterns: (a) pixd = pixErodeBrickDwa(NULL, pixs, ...); (b) pixErodeBrickDwa(pixs, pixs, ...); (c) pixErodeBrickDwa(pixd, pixs, ...); (9) The size of the result is determined by pixs. (10) If either linear Sel is not found, this calls the appropriate decomposible function.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do compositely for each dimension > 1. (4) Do separably if both hsize and vsize are > 1. (5) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (6) For clarity, if the case is known, use these patterns: (a) pixd = pixErodeCompBrick(NULL, pixs, ...); (b) pixErodeCompBrick(pixs, pixs, ...); (c) pixErodeCompBrick(pixd, pixs, ...); (7) The dimensions of the resulting image are determined by pixs. (8) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) These implement a separable composite erosion with 2D brick Sels. (2) For efficiency, it may decompose each linear morphological operation into two (brick + comb). (3) A brick Sel has hits for all elements. (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2) (5) Do separably if both hsize and vsize are > 1. (6) It is necessary that both horizontal and vertical Sels of the input size are defined in the basic sela. (7) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (8) For clarity, if the case is known, use these patterns: (a) pixd = pixErodeCompBrickDwa(NULL, pixs, ...); (b) pixErodeCompBrickDwa(pixs, pixs, ...); (c) pixErodeCompBrickDwa(pixd, pixs, ...); (9) The size of pixd is determined by pixs. (10) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) See pixDilateCompBrickExtendDwa() for usage. (2) There is no need to call this directly: pixErodeCompBrickDwa() calls this function if either brick dimension exceeds 63.
Input: pixs
hsize (of Sel; must be odd; origin implicitly in center)
vsize (ditto)
Return: pixd
Notes: (1) Sel is a brick with all elements being hits (2) If hsize = vsize = 1, just returns a copy.
Input: pixs (8 bpp, not cmapped)
hsize (1 or 3)
vsize (1 or 3)
Return: pixd, or null on error
Notes: (1) Special case for 1x3, 3x1 or 3x3 brick sel (all hits) (2) If hsize = vsize = 1, just returns a copy. (3) It would be nice not to add a border, but it is required if we want the same results as from the general case. We add 4 bytes on the left to speed up the copying, and 8 bytes at the right and bottom to allow unrolling of the computation of 8 pixels.
Input: pixs (1 bpp)
factor (expansion factor: 1, 2, 4, 8, 16)
Return: pixd (expanded 1 bpp by replication), or null on error
Input: pixs (1 bpp)
factor (integer scale factor for replicative expansion)
Return: pixd (scaled up), or null on error
Input: pixs (1, 2, 4, 8, 16, 32 bpp)
factor (integer scale factor for replicative expansion)
Return: pixd (scaled up), or null on error.
Input: pixs (8 bpp)
addw (number of extra pixels horizontally to add)
addh (number of extra pixels vertically to add)
Return: pixd (extended with replicated pixel values), or null on error
Notes: (1) The pixel values are extended to the left and down, as required.
Input: pixs (input image; 8 bpp)
thresh (estimated pixel threshold for crossing
white <--> black; typ. ~120)
debugflag (use 1 to generate debug output)
Return: numa (of crossings, in pixel units), or null on error
Input: pixs (8 bpp, no colormap)
debugflag (use 1 to generate debug output)
Return: pixa (deskewed and cropped barcodes), or null if
none found or on error
| LEPT_DLL NUMA* pixExtractBarcodeWidths1 | ( | PIX * | pixs, |
| l_float32 | thresh, | ||
| l_float32 | binfract, | ||
| NUMA ** | pnaehist, | ||
| NUMA ** | pnaohist, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixs (input image; 8 bpp)
thresh (estimated pixel threshold for crossing
white <--> black; typ. ~120)
binfract (histo binsize as a fraction of minsize; e.g., 0.25)
&naehist (<optional return> histogram of black widths; NULL ok)
&naohist (<optional return> histogram of white widths; NULL ok)
debugflag (use 1 to generate debug output)
Return: nad (numa of barcode widths in encoded integer units),
or null on error
Note: (1) The widths are alternating black/white, starting with black and ending with black. (2) This method uses the widths of the bars directly, in terms of the (float) number of pixels between transitions. The histograms of these widths for black and white bars is generated and interpreted.
| LEPT_DLL NUMA* pixExtractBarcodeWidths2 | ( | PIX * | pixs, |
| l_float32 | thresh, | ||
| l_float32 * | pwidth, | ||
| NUMA ** | pnac, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixs (input image; 8 bpp)
thresh (estimated pixel threshold for crossing
white <--> black; typ. ~120)
&width (<optional return> best decoding window width, in pixels)
&nac (<optional return> number of transitions in each window)
debugflag (use 1 to generate debug output)
Return: nad (numa of barcode widths in encoded integer units),
or null on error
Notes: (1) The widths are alternating black/white, starting with black and ending with black. (2) The optional best decoding window width is the width of the window that is used to make a decision about whether a transition occurs. It is approximately the average width in pixels of the narrowest white and black bars (i.e., those corresponding to unit width). (3) The optional return signal @nac is a sequence of 0s, 1s, and perhaps a few 2s, giving the number of crossings in each window. On the occasion where there is a '2', it is interpreted as as ending two runs: the previous one and another one that has length 1.
Input: pixs (1 bpp)
filling connectivity (4 or 8)
Return: pixd (all pixels in the src that are in connected
components touching the border), or null on error
Input: pixs (1 bpp)
type (0 for background pixels; 1 for foreground pixels)
Return: pixd, or null on error
Notes: (1) Extracts the fg or bg boundary pixels for each component. Components are assumed to end at the boundary of pixs.
Notes: (1) This extracts the pix image data for use in another context. The caller still needs to use pixDestroy() on the input pix. (2) If refcount == 1, the data is extracted and the pix->data ptr is set to NULL. (3) If refcount > 1, this simply returns a copy of the data, using the pix allocator, and leaving the input pix unchanged.
| LEPT_DLL NUMA* pixExtractOnLine | ( | PIX * | pixs, |
| l_int32 | x1, | ||
| l_int32 | y1, | ||
| l_int32 | x2, | ||
| l_int32 | y2, | ||
| l_int32 | factor | ||
| ) |
Input: pixs (1 bpp or 8 bpp; no colormap)
x1, y1 (one end point for line)
x2, y2 (another end pt for line)
factor (sampling; >= 1)
Return: na (of pixel values along line), or null on error.
Notes: (1) Input end points are clipped to the pix. (2) If the line is either horizontal, or closer to horizontal than to vertical, the points will be extracted from left to right in the pix. Likewise, if the line is vertical, or closer to vertical than to horizontal, the points will be extracted from top to bottom. (3) Can be used with numaCountReverals(), for example, to characterize the intensity smoothness along a line.
| LEPT_DLL PIXA* pixExtractTextlines | ( | PIX * | pixs, |
| l_int32 | maxw, | ||
| l_int32 | maxh, | ||
| l_int32 | minw, | ||
| l_int32 | minh | ||
| ) |
Input: pixs (any depth, assumed to have nearly horizontal text)
maxw, maxh (initial filtering: remove any components in pixs
with components larger than maxw or maxh)
minw, minh (final filtering: remove extracted 'lines'
with sizes smaller than minw or minh)
Return: pixa (of textline images, including bounding boxes), or
null on error
Notes: (1) This first removes components from pixs that are either wide (> @maxw) or tall (> @maxh). (2) This function assumes that textlines have sufficient vertical separation and small enough skew so that a horizontal dilation sufficient to join words will not join textlines. Images with multiple columns of text may have the textlines join across the space between columns. (3) A final filtering operation removes small components, such that width < @minw or height < @minh. (4) For reasonable accuracy, the resolution of pixs should be at least 100 ppi. For reasonable efficiency, the resolution should not exceed 600 ppi. (5) This can be used to determine if some region of a scanned image is horizontal text. (6) As an example, for a pix with resolution 300 ppi, a reasonable set of parameters is: pixExtractTextlines(pix, 150, 150, 10, 5);
Input: pixs (colormapped or 8 bpp or 32 bpp)
pixb (8 bpp blender)
factor (multiplicative factor to apply to blender value)
type (L_BLEND_TO_WHITE, L_BLEND_TO_BLACK)
Return: pixd, or null on error
Notes: (1) This function combines two pix aligned to the UL corner; they need not be the same size. (2) Each pixel in pixb is multiplied by 'factor' divided by 255, and clipped to the range [0 ... 1]. This gives the fade fraction to be appied to pixs. Fade either to white (L_BLEND_TO_WHITE) or to black (L_BLEND_TO_BLACK).
Input: pixs
xsize (width of max/min op, smoothing; any integer >= 1)
ysize (height of max/min op, smoothing; any integer >= 1)
type (L_TOPHAT_WHITE: image - min
L_TOPHAT_BLACK: max - image)
Return: pixd, or null on error
Notes: (1) Don't be fooled. This is NOT a tophat. It is a tophat-like operation, where the result is similar to what you'd get if you used an erosion instead of an opening, or a dilation instead of a closing. (2) Instead of opening or closing at full resolution, it does a fast downscale/minmax operation, then a quick small smoothing at low res, a replicative expansion of the "background" to full res, and finally a removal of the background level from the input image. The smoothing step may not be important. (3) It does not remove noise as well as a tophat, but it is 5 to 10 times faster. If you need the preciseness of the tophat, don't use this. (4) The L_TOPHAT_WHITE flag emphasizes small bright regions, whereas the L_TOPHAT_BLACK flag emphasizes small dark regions.
| LEPT_DLL PIX* pixFewColorsMedianCutQuantMixed | ( | PIX * | pixs, |
| l_int32 | ncolor, | ||
| l_int32 | ngray, | ||
| l_int32 | maxncolors, | ||
| l_int32 | darkthresh, | ||
| l_int32 | lightthresh, | ||
| l_int32 | diffthresh | ||
| ) |
pixFewColorsMedianCutQuantMixed()
Input: pixs (32 bpp rgb)
ncolor (number of colors to be assigned to pixels with
significant color)
ngray (number of gray colors to be used; must be >= 2)
maxncolors (maximum number of colors to be returned
from pixColorsForQuantization(); use 0 for default)
darkthresh (threshold near black; if the lightest component
is below this, the pixel is not considered to
be gray or color; use 0 for default)
lightthresh (threshold near white; if the darkest component
is above this, the pixel is not considered to
be gray or color; use 0 for default)
diffthresh (thresh for the max difference between component
values; for differences below this, the pixel
is considered to be gray; use 0 for default)
considered gray; use 0 for default)
Return: pixd (8 bpp, median cut quantized for pixels that are
not gray; gray pixels are quantized separately
over the full gray range); null if too many colors
or on error
Notes: (1) This is the "few colors" version of pixMedianCutQuantMixed(). It fails (returns NULL) if it finds more than maxncolors, but otherwise it gives the same result. (2) Recommended input parameters are: @maxncolors: 20 @darkthresh: 20 @lightthresh: 244 @diffthresh: 15 (any higher can miss colors differing slightly from gray) (3) Both ncolor and ngray should be at least equal to maxncolors. If they're not, they are automatically increased, and a warning is given. (4) If very little color content is found, the input is converted to gray and quantized in equal intervals. (5) This can be useful for quantizing orthographically generated images such as color maps, where there may be more than 256 colors because of aliasing or jpeg artifacts on text or lines, but there are a relatively small number of solid colors. (6) Example of usage: // Try to quantize, using default values for mixed med cut Pix *pixq = pixFewColorsMedianCutQuantMixed(pixs, 100, 20, 0, 0, 0, 0); if (!pixq) // too many colors; don't quantize pixq = pixClone(pixs);
Input: pixs (32 bpp rgb)
level (significant bits for each of RGB; valid in [1...6])
Return: pixd (quantized to octcube) or null on error
Notes: (1) Generates a colormapped image, where the colormap table values are the averages of all pixels that are found in the octcube. (2) This fails if there are more than 256 colors (i.e., more than 256 occupied octcubes). (3) Often level 3 (512 octcubes) will succeed because not more than half of them are occupied with 1 or more pixels. (4) The depth of the result, which is either 2, 4 or 8 bpp, is the minimum required to hold the number of colors that are found. (5) This can be useful for quantizing orthographically generated images such as color maps, where there may be more than 256 colors because of aliasing or jpeg artifacts on text or lines, but there are a relatively small number of solid colors. Then, use with level = 3 can often generate a compact and accurate representation of the original RGB image. For this purpose, it is better than pixFewColorsOctcubeQuant2(), because it uses the average value of pixels in the octcube rather than the first found pixel. It is also simpler to use, because it generates the histogram internally.
| LEPT_DLL PIX* pixFewColorsOctcubeQuant2 | ( | PIX * | pixs, |
| l_int32 | level, | ||
| NUMA * | na, | ||
| l_int32 | ncolors, | ||
| l_int32 * | pnerrors | ||
| ) |
Input: pixs (32 bpp rgb)
level (of octcube indexing, for histogram: 3, 4, 5, 6)
na (histogram of pixel occupation in octree leaves at
given level)
ncolors (number of occupied octree leaves at given level)
&nerrors (<optional return> num of pixels not exactly
represented in the colormap)
Return: pixd (2, 4 or 8 bpp with colormap), or null on error
Notes: (1) Generates a colormapped image, where the colormap table values are the averages of all pixels that are found in the octcube. (2) This fails if there are more than 256 colors (i.e., more than 256 occupied octcubes). (3) Often level 3 (512 octcubes) will succeed because not more than half of them are occupied with 1 or more pixels. (4) For an image with not more than 256 colors, it is unlikely that two pixels of different color will fall in the same octcube at level = 4. However it is possible, and this function optionally returns @nerrors, the number of pixels where, because more than one color is in the same octcube, the pixel color is not exactly reproduced in the colormap. The colormap for an occupied leaf of the octree contains the color of the first pixel encountered in that octcube. (5) This differs from pixFewColorsOctcubeQuant1(), which also requires not more than 256 occupied leaves, but represents the color of each leaf by an average over the pixels in that leaf. This also requires precomputing the histogram of occupied octree leaves, which is generated using pixOctcubeHistogram(). (6) This is used in pixConvertRGBToColormap() for images that are determined, by their histogram, to have relatively few colors. This typically happens with orthographically produced images (as oppopsed to natural images), where it is expected that most of the pixels within a leaf octcube have exactly the same color, and quantization to that color is lossless.
| LEPT_DLL PIX* pixFewColorsOctcubeQuantMixed | ( | PIX * | pixs, |
| l_int32 | level, | ||
| l_int32 | darkthresh, | ||
| l_int32 | lightthresh, | ||
| l_int32 | diffthresh, | ||
| l_float32 | minfract, | ||
| l_int32 | maxspan | ||
| ) |
pixFewColorsOctcubeQuantMixed()
Input: pixs (32 bpp rgb)
level (significant octcube bits for each of RGB;
valid in [1...6]; use 0 for default)
darkthresh (threshold near black; if the lightest component
is below this, the pixel is not considered to
be gray or color; uses 0 for default)
lightthresh (threshold near white; if the darkest component
is above this, the pixel is not considered to
be gray or color; use 0 for default)
diffthresh (thresh for the max difference between component
values; for differences below this, the pixel
is considered to be gray; use 0 for default)
considered gray; use 0 for default)
minfract (min fraction of pixels for gray histo bin;
use 0.0 for default)
maxspan (max size of gray histo bin; use 0 for default)
Return: pixd (8 bpp, quantized to octcube for pixels that are
not gray; gray pixels are quantized separately
over the full gray range), or null on error
Notes: (1) First runs pixFewColorsOctcubeQuant1(). If this succeeds, it separates the color from gray(ish) entries in the cmap, and re-quantizes the gray pixels. The result has some pixels in color and others in gray. (2) This fails if there are more than 256 colors (i.e., more than 256 occupied octcubes in the color quantization). (3) Level 3 (512 octcubes) will usually succeed because not more than half of them are occupied with 1 or more pixels. (4) This uses the criterion from pixColorFraction() for deciding if a colormap entry is color; namely, if the color components are not too close to either black or white, and the maximum difference between component values equals or exceeds a threshold. (5) For quantizing the gray pixels, it uses a histogram-based method where input parameters determining the buckets are the minimum population fraction and the maximum allowed size. (6) Recommended input parameters are: @level: 3 or 4 (3 is default) @darkthresh: 20 @lightthresh: 244 @diffthresh: 20 @minfract: 0.05 @maxspan: 15 These numbers are intended to be conservative (somewhat over- sensitive) in color detection, It's usually better to pay extra with octcube quantization of a grayscale image than to use grayscale quantization on an image that has some actual color. Input 0 on any of these to get the default. (7) This can be useful for quantizing orthographically generated images such as color maps, where there may be more than 256 colors because of aliasing or jpeg artifacts on text or lines, but there are a relatively small number of solid colors. It usually gives results that are better than pixOctcubeQuantMixedWithGray(), both in size and appearance. But it is a bit slower.
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
sel name
Return: pixd
Notes: (1) This is a dwa implementation of the hit-miss transform on pixs by the sel. (2) The sel must be limited in size to not more than 31 pixels about the origin. It must have at least one hit, and it can have any number of misses. (3) This handles all required setting of the border pixels before erosion and dilation.
Input: pixs (1 bpp)
filling connectivity (4 or 8)
Return: pixd (with the background c.c. touching the border
filled to foreground), or null on error
Notes: (1) This fills all bg components touching the border to fg. It is the photometric inverse of pixRemoveBorderConnComps(). (2) Invert the result to get the "holes" left after this fill. This can be done multiple times, extracting holes within holes after each pair of fillings. Specifically, this code peels away n successive embeddings of components: pix1 = <initial image>=""> for (i = 0; i < 2 * n; i++) { pix2 = pixFillBgFromBorder(pix1, 8); pixInvert(pix2, pix2); pixDestroy(&pix1); pix1 = pix2; }
Input: pixs (1 bpp)
filling connectivity (4 or 8)
Return: pixd (all topologically outer closed borders are filled
as connected comonents), or null on error
Notes: (1) Start with 1-pixel black border on otherwise white pixd (2) Subtract input pixs to remove border pixels that were also on the closed border (3) Use the inverted pixs as the filling mask to fill in all the pixels from the outer border to the closed border on pixs (4) Invert the result to get the filled component, including the input border (5) If the borders are 4-c.c., use 8-c.c. filling, and v.v. (6) Closed borders within c.c. that represent holes, etc., are filled.
| LEPT_DLL PIX* pixFillHolesToBoundingRect | ( | PIX * | pixs, |
| l_int32 | minsize, | ||
| l_float32 | maxhfract, | ||
| l_float32 | minfgfract | ||
| ) |
Input: pixs (1 bpp)
minsize (min number of pixels in the hole)
maxhfract (max hole area as fraction of fg pixels in the cc)
minfgfract (min fg area as fraction of bounding rectangle)
Return: pixd (pixs, with some holes possibly filled and some c.c.
possibly expanded to their bounding rects),
or null on error
Notes: (1) This does not fill holes that are smaller in area than 'minsize'. (2) This does not fill holes with an area larger than 'maxhfract' times the fg area of the c.c. (3) This does not expand the fg of the c.c. to bounding rect if the fg area is less than 'minfgfract' times the area of the bounding rect. (4) The decisions are made as follows:
Input: pix (8 bpp; a map, with one pixel for each tile in
a larger image)
nx (number of horizontal pixel tiles that are entirely
covered with pixels in the original source image)
ny (ditto for the number of vertical pixel tiles)
filltype (L_FILL_WHITE or L_FILL_BLACK)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place operation on pix (the map). pix is typically a low-resolution version of some other image from which it was derived, where each pixel in pix corresponds to a rectangular tile (say, m x n) of pixels in the larger image. All we need to know about the larger image is whether or not the rightmost column and bottommost row of pixels in pix correspond to tiles that are only partially covered by pixels in the larger image. (2) Typically, some number of pixels in the input map are not known, and their values must be determined by near pixels that are known. These unknown pixels are the 'holes'. They can take on only two values, 0 and 255, and the instruction about which to fill is given by the filltype flag. (3) The "holes" can come from two sources. The first is when there are not enough foreground or background pixels in a tile; the second is when a tile is at least partially covered by an image mask. If we're filling holes in a fg mask, the holes are initialized to black (0) and use L_FILL_BLACK. For filling holes in a bg mask, initialize the holes to white (255) and use L_FILL_WHITE. (4) If w is the map width, nx = w or nx = w - 1; ditto for h and ny.
Input: pixs (1 bpp, with 4-connected polygon outline)
pta (vertices of the polygon)
xmin, ymin (min values of vertices of polygon)
Return: pixd (with outline filled), or null on error
Notes: (1) This fills the interior of the polygon, returning a new pix. It works for both convex and non-convex polygons. (2) To generate a filled polygon from a pta: PIX *pixt = pixRenderPolygon(pta, 1, &xmin, &ymin); PIX *pixd = pixFillPolygon(pixt, pta, xmin, ymin); pixDestroy(&pixt);
Input: pixs (32 bpp)
offset (same as used for initialization)
depth (8, 16 or 32 bpp, of destination)
Return: pixd (8, 16 or 32 bpp), or null on error
Notes: (1) The offset must be >= 0 and should not exceed 0x40000000. (2) The offset is subtracted from the src 32 bpp image (3) For 8 bpp dest, the result is clipped to [0, 0xff] (4) For 16 bpp dest, the result is clipped to [0, 0xffff]
Input: pixs (32 bpp)
offset (same as used for initialization)
threshold (values less than this are set in the destination)
Return: pixd (1 bpp), or null on error
Notes: (1) The offset must be >= 0 and should not exceed 0x40000000. (2) The offset is subtracted from the src 32 bpp image
Input: pixs (1 bpp)
tab (<optional> pixel sum table, can be NULL)
&fract (<return> fg area/size ratio)
Return: 0 if OK, 1 on error
Notes: (1) This finds the ratio of the number of fg pixels to the size of the pix (w * h). It is typically used for a single connected component.
| LEPT_DLL l_int32 pixFindAreaFractionMasked | ( | PIX * | pixs, |
| BOX * | box, | ||
| PIX * | pixm, | ||
| l_int32 * | tab, | ||
| l_float32 * | pfract | ||
| ) |
Input: pixs (1 bpp, typically a single component)
box (<optional> for pixs relative to pixm)
pixm (1 bpp mask, typically over the entire image from
which the component pixs was extracted)
tab (<optional> pixel sum table, can be NULL)
&fract (<return> fg area/size ratio)
Return: 0 if OK, 1 on error
Notes: (1) This finds the ratio of the number of masked fg pixels in pixs to the total number of fg pixels in pixs. It is typically used for a single connected component. If there are no fg pixels, this returns a ratio of 0.0. (2) The box gives the location of the pix relative to that of the UL corner of the mask. Therefore, the rasterop is performed with the pix translated to its location (x, y) in the mask before ANDing. If box == NULL, the UL corners of pixs and pixm are aligned.
Input: pixs (1 bpp)
tab (<optional> pixel sum table, can be NULL)
&fract (<return> area/perimeter ratio)
Return: 0 if OK, 1 on error
Notes: (1) The area is the number of fg pixels that are not on the boundary (i.e., are not 8-connected to a bg pixel), and the perimeter is the number of fg boundary pixels. Returns 0.0 if there are no fg pixels. (2) This function is retained because clients are using it.
Input: pixs (1 bpp)
&pta (<optional return> pairs of pts corresponding to
approx. ends of each text line)
debug (usually 0; set to 1 for debugging output)
Return: na (of baseline y values), or null on error
Notes: (1) Input binary image must have text lines already aligned horizontally. This can be done by either rotating the image with pixDeskew(), or, if a projective transform is required, by doing pixDeskewLocal() first. (2) Input null for &pta if you don't want this returned. The pta will come in pairs of points (left and right end of each baseline). (3) Caution: this will not work properly on text with multiple columns, where the lines are not aligned between columns. If there are multiple columns, they should be extracted separately before finding the baselines. (4) This function constructs different types of output for baselines; namely, a set of raster line values and a set of end points of each baseline. (5) This function was designed to handle short and long text lines without using dangerous thresholds on the peak heights. It does this by combining the differential signal with a morphological analysis of the locations of the text lines. One can also combine this data to normalize the peak heights, by weighting the differential signal in the region of each baseline by the inverse of the width of the text line found there. (6) There are various debug sections that can be turned on with the debug flag.
Input: pixs (1 bpp) Return: pta, or null on error
Notes: (1) Finds the 4 corner-most pixels, as defined by a search inward from each corner, using a 45 degree line.
pixFindDifferentialSquareSum()
Input: pixs
&sum (<return> result)
Return: 0 if OK, 1 on error
Notes: (1) At the top and bottom, we skip:
Input: pixs1 (8 bpp)
pixs2 (8 bpp)
Return: pixd (1 bpp mask), or null on error
Notes: (1) The two images are aligned at the UL corner, and the returned image has ON pixels where the pixels in pixs1 and pixs2 have equal values.
| LEPT_DLL l_int32 pixFindHistoPeaksHSV | ( | PIX * | pixs, |
| l_int32 | type, | ||
| l_int32 | width, | ||
| l_int32 | height, | ||
| l_int32 | npeaks, | ||
| l_float32 | erasefactor, | ||
| PTA ** | ppta, | ||
| NUMA ** | pnatot, | ||
| PIXA ** | ppixa | ||
| ) |
Input: pixs (32 bpp; HS, HV or SV histogram; not changed)
type (L_HS_HISTO, L_HV_HISTO or L_SV_HISTO)
width (half width of sliding window)
height (half height of sliding window)
npeaks (number of peaks to look for)
erasefactor (ratio of erase window size to sliding window size)
&pta (<return> locations of max for each integrated peak area)
&natot (<return> integrated peak areas)
&pixa (<optional return> pixa for debugging; NULL to skip)
Return: 0 if OK, 1 on error
Notes: (1) pixs is a 32 bpp histogram in a pair of HSV colorspace. It should be thought of as a single sample with 32 bps (bits/sample). (2) After each peak is found, the peak is erased with a window that is centered on the peak and scaled from the sliding window by @erasefactor. Typically, @erasefactor is chosen to be > 1.0. (3) Data for a maximum of @npeaks is returned in @pta and @natot. (4) For debugging, after the pixa is returned, display with: pixd = pixaDisplayTiledInRows(pixa, 32, 1000, 1.0, 0, 30, 2);
| LEPT_DLL l_int32 pixFindHorizontalRuns | ( | PIX * | pix, |
| l_int32 | y, | ||
| l_int32 * | xstart, | ||
| l_int32 * | xend, | ||
| l_int32 * | pn | ||
| ) |
Input: pix (1 bpp)
y (line to traverse)
xstart (returns array of start positions for fg runs)
xend (returns array of end positions for fg runs)
&n (<return> the number of runs found)
Return: 0 if OK; 1 on error
Notes: (1) This finds foreground horizontal runs on a single scanline. (2) To find background runs, use pixInvert() before applying this function. (3) The xstart and xend arrays are input. They should be of size w/2 + 1 to insure that they can hold the maximum number of runs in the raster line.
| LEPT_DLL l_int32 pixFindLargestRectangle | ( | PIX * | pixs, |
| l_int32 | polarity, | ||
| BOX ** | pbox, | ||
| const char * | debugfile | ||
| ) |
Input: pixs (1 bpp)
polarity (0 within background, 1 within foreground)
&box (<return> largest rectangle, either by area or
by perimeter)
debugflag (1 to output image with rectangle drawn on it)
Return: 0 if OK, 1 on error
Notes: (1) Why is this here? This is a simple and elegant solution to a problem in computational geometry that at first appears quite difficult: what is the largest rectangle that can be placed in the image, covering only pixels of one polarity (bg or fg)? The solution is O(n), where n is the number of pixels in the image, and it requires nothing more than using a simple recursion relation in a single sweep of the image. (2) In a sweep from UL to LR with left-to-right being the fast direction, calculate the largest white rectangle at (x, y), using previously calculated values at pixels #1 and #2: #1: (x, y - 1) #2: (x - 1, y) We also need the most recent "black" pixels that were seen in the current row and column. Consider the largest area. There are only two possibilities: (a) Min(w(1), horizdist) * (h(1) + 1) (b) Min(h(2), vertdist) * (w(2) + 1) where horizdist: the distance from the rightmost "black" pixel seen in the current row across to the current pixel vertdist: the distance from the lowest "black" pixel seen in the current column down to the current pixel and we choose the Max of (a) and (b). (3) To convince yourself that these recursion relations are correct, it helps to draw the maximum rectangles at #1 and #2. Then for #1, you try to extend the rectangle down one line, so that the height is h(1) + 1. Do you get the full width of #1, w(1)? It depends on where the black pixels are in the current row. You know the final width is bounded by w(1) and w(2) + 1, but the actual value depends on the distribution of black pixels in the current row that are at a distance from the current pixel that is between these limits. We call that value "horizdist", and the area is then given by the expression (a) above. Using similar reasoning for #2, where you attempt to extend the rectangle to the right by 1 pixel, you arrive at (b). The largest rectangle is then found by taking the Max.
| LEPT_DLL l_int32 pixFindMaxHorizontalRunOnLine | ( | PIX * | pix, |
| l_int32 | y, | ||
| l_int32 * | pxstart, | ||
| l_int32 * | psize | ||
| ) |
pixFindMaxHorizontalRunOnLine()
Input: pix (1 bpp)
y (line to traverse)
&xstart (<optional return> start position)
&size (<return> the size of the run)
Return: 0 if OK; 1 on error
Notes: (1) This finds the longest foreground horizontal run on a scanline. (2) To find background runs, use pixInvert() before applying this function.
Input: pix (1 bpp)
direction (L_HORIZONTAL_RUNS or L_VERTICAL_RUNS)
&nastart (<optional return> start locations of longest runs)
Return: na (of lengths of runs), or null on error
Notes: (1) This finds the longest foreground runs by row or column (2) To find background runs, use pixInvert() before applying this function.
| LEPT_DLL l_int32 pixFindMaxVerticalRunOnLine | ( | PIX * | pix, |
| l_int32 | x, | ||
| l_int32 * | pystart, | ||
| l_int32 * | psize | ||
| ) |
Input: pix (1 bpp)
x (column to traverse)
&ystart (<optional return> start position)
&size (<return> the size of the run)
Return: 0 if OK; 1 on error
Notes: (1) This finds the longest foreground vertical run on a scanline. (2) To find background runs, use pixInvert() before applying this function.
| LEPT_DLL l_int32 pixFindNormalizedSquareSum | ( | PIX * | pixs, |
| l_float32 * | phratio, | ||
| l_float32 * | pvratio, | ||
| l_float32 * | pfract | ||
| ) |
Input: pixs
&hratio (<optional return> ratio of normalized horiz square sum
to result if the pixel distribution were uniform)
&vratio (<optional return> ratio of normalized vert square sum
to result if the pixel distribution were uniform)
&fract (<optional return> ratio of fg pixels to total pixels)
Return: 0 if OK, 1 on error or if there are no fg pixels
Notes: (1) Let the image have h scanlines and N fg pixels. If the pixels were uniformly distributed on scanlines, the sum of squares of fg pixels on each scanline would be h * (N / h)^2. However, if the pixels are not uniformly distributed (e.g., for text), the sum of squares of fg pixels will be larger. We return in hratio and vratio the ratio of these two values. (2) If there are no fg pixels, hratio and vratio are returned as 0.0.
| LEPT_DLL l_int32 pixFindOverlapFraction | ( | PIX * | pixs1, |
| PIX * | pixs2, | ||
| l_int32 | x2, | ||
| l_int32 | y2, | ||
| l_int32 * | tab, | ||
| l_float32 * | pratio, | ||
| l_int32 * | pnoverlap | ||
| ) |
Input: pixs1, pixs2 (1 bpp)
x2, y2 (location in pixs1 of UL corner of pixs2)
tab (<optional> pixel sum table, can be null)
&ratio (<return> ratio fg intersection to fg union)
&noverlap (<optional return> number of overlapping pixels)
Return: 0 if OK, 1 on error
Notes: (1) The UL corner of pixs2 is placed at (x2, y2) in pixs1. (2) This measure is similar to the correlation.
| LEPT_DLL BOX* pixFindPageForeground | ( | PIX * | pixs, |
| l_int32 | threshold, | ||
| l_int32 | mindist, | ||
| l_int32 | erasedist, | ||
| l_int32 | pagenum, | ||
| l_int32 | showmorph, | ||
| l_int32 | display, | ||
| const char * | pdfdir | ||
| ) |
Input: pixs (full resolution (any type or depth)
threshold (for binarization; typically about 128)
mindist (min distance of text from border to allow
cleaning near border; at 2x reduction, this
should be larger than 50; typically about 70)
erasedist (when conditions are satisfied, erase anything
within this distance of the edge;
typically 30 at 2x reduction)
pagenum (use for debugging when called repeatedly; labels
debug images that are assembled into pdfdir)
showmorph (set to a negative integer to show steps in
generating masks; this is typically used
for debugging region extraction)
display (set to 1 to display mask and selected region
for debugging a single page)
pdfdir (subdirectory of /tmp where images showing the
result are placed when called repeatedly; use
null if no output requested)
Return: box (region including foreground, with some pixel noise
removed), or null if not found
Notes: (1) This doesn't simply crop to the fg. It attempts to remove pixel noise and junk at the edge of the image before cropping. The input @threshold is used if pixs is not 1 bpp. (2) There are several debugging options, determined by the last 4 arguments. (3) This is not intended to work on small thumbnails. The dimensions of pixs must be at least MinWidth x MinHeight. (4) If you want pdf output of results when called repeatedly, the pagenum arg labels the images written, which go into /tmp/lept/<pdfdir>/<pagenum>.png. In that case, you would clean out the /tmp directory before calling this function on each page: lept_rmdir("/lept/<pdfdir>"); lept_mkdir("/lept/<pdfdir>");
Input: pixs (1 bpp)
tab (<optional> pixel sum table, can be NULL)
&ratio (<return> perimeter/size ratio)
Return: 0 if OK, 1 on error
Notes: (1) We take the 'size' as twice the sum of the width and height of pixs, and the perimeter is the number of fg boundary pixels. We use the fg pixels of the boundary because the pix may be clipped to the boundary, so an erosion is required to count all boundary pixels. (2) This has a large value for dendritic, fractal-like components with highly irregular boundaries. (3) This is typically used for a single connected component. It has a value of about 1.0 for rectangular components with relatively smooth boundaries.
Input: pixs (1 bpp)
tab (<optional> pixel sum table, can be NULL)
&fract (<return> perimeter/area ratio)
Return: 0 if OK, 1 on error
Notes: (1) The perimeter is the number of fg boundary pixels, and the area is the number of fg pixels. This returns 0.0 if there are no fg pixels. (2) Unlike pixFindAreaPerimRatio(), this uses the full set of fg pixels for the area, and the ratio is taken in the opposite order. (3) This is typically used for a single connected component. This always has a value <= 1.0, and if the average distance of a fg pixel from the nearest bg pixel is d, this has a value ~1/d.
Input: pixs (1 bpp)
dist (max distance allowed between bounding box and nearest
foreground pixel within it)
minw, minh (minimum size in each direction as a requirement
for a conforming rectangle)
Return: boxa (of components that conform), or null on error
Notes: (1) This applies the function pixConformsToRectangle() to each 8-c.c. in pixs, and returns a boxa containing the regions of all components that are conforming. (2) Conforming components must satisfy both the size constraint given by @minsize and the slop in conforming to a rectangle determined by @dist.
| LEPT_DLL l_int32 pixFindRepCloseTile | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_int32 | searchdir, | ||
| l_int32 | mindist, | ||
| l_int32 | tsize, | ||
| l_int32 | ntiles, | ||
| BOX ** | pboxtile, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (32 bpp rgb)
box (region of pixs to search around)
searchdir (L_HORIZ or L_VERT; direction to search)
mindist (min distance of selected tile edge from box; >= 0)
tsize (tile size; > 1; even; typically ~50)
ntiles (number of tiles tested in each row/column)
&boxtile (<return> region of best tile)
debug (1 for debug output)
Return: 0 if OK, 1 on error
Notes: (1) This looks for one or two square tiles with conforming median intensity and low variance, that is outside but near the input box. (2) @mindist specifies the gap between the box and the potential tiles. The tiles are given an overlap of 50%. @ntiles specifies the number of tiles that are tested beyond @mindist for each row or column. (3) For example, if @mindist = 20, @tilesize = 50 and @ntiles = 3, a horizontal search to the right will have 3 tiles in each row, with left edges at 20, 45 and 70 from the right edge of the input @box. The number of rows of tiles is determined by the height of @box and @tsize, with the 50% overlap..
Input: pixs (1 bpp)
&angle (<return> angle required to deskew, in degrees)
&conf (<return> confidence value is ratio max/min scores)
Return: 0 if OK, 1 on error or if angle measurment not valid
Notes: (1) This is a simple high-level interface, that uses default values of the parameters for reasonable speed and accuracy. (2) The angle returned is the negative of the skew angle of the image. It is the angle required for deskew. Clockwise rotations are positive angles.
| LEPT_DLL PIX* pixFindSkewAndDeskew | ( | PIX * | pixs, |
| l_int32 | redsearch, | ||
| l_float32 * | pangle, | ||
| l_float32 * | pconf | ||
| ) |
Input: pixs (any depth)
redsearch (for binary search: reduction factor = 1, 2 or 4;
use 0 for default)
&angle (<optional return> angle required to deskew,
in degrees; use NULL to skip)
&conf (<optional return> conf value is ratio
of max/min scores; use NULL to skip)
Return: pixd (deskewed pix), or null on error
Notes: (1) This binarizes if necessary and finds the skew angle. If the angle is large enough and there is sufficient confidence, it returns a deskewed image; otherwise, it returns a clone.
| LEPT_DLL l_int32 pixFindSkewOrthogonalRange | ( | PIX * | pixs, |
| l_float32 * | pangle, | ||
| l_float32 * | pconf, | ||
| l_int32 | redsweep, | ||
| l_int32 | redsearch, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta, | ||
| l_float32 | minbsdelta, | ||
| l_float32 | confprior | ||
| ) |
| LEPT_DLL l_int32 pixFindSkewSweep | ( | PIX * | pixs, |
| l_float32 * | pangle, | ||
| l_int32 | reduction, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta | ||
| ) |
Input: pixs (1 bpp)
&angle (<return> angle required to deskew, in degrees)
reduction (factor = 1, 2, 4 or 8)
sweeprange (half the full range; assumed about 0; in degrees)
sweepdelta (angle increment of sweep; in degrees)
Return: 0 if OK, 1 on error or if angle measurment not valid
Notes: (1) This examines the 'score' for skew angles with equal intervals. (2) Caller must check the return value for validity of the result.
| LEPT_DLL l_int32 pixFindSkewSweepAndSearch | ( | PIX * | pixs, |
| l_float32 * | pangle, | ||
| l_float32 * | pconf, | ||
| l_int32 | redsweep, | ||
| l_int32 | redsearch, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta, | ||
| l_float32 | minbsdelta | ||
| ) |
Input: pixs (1 bpp)
&angle (<return> angle required to deskew; in degrees)
&conf (<return> confidence given by ratio of max/min score)
redsweep (sweep reduction factor = 1, 2, 4 or 8)
redsearch (binary search reduction factor = 1, 2, 4 or 8;
and must not exceed redsweep)
sweeprange (half the full range, assumed about 0; in degrees)
sweepdelta (angle increment of sweep; in degrees)
minbsdelta (min binary search increment angle; in degrees)
Return: 0 if OK, 1 on error or if angle measurment not valid
Notes: (1) This finds the skew angle, doing first a sweep through a set of equal angles, and then doing a binary search until convergence. (2) Caller must check the return value for validity of the result. (3) In computing the differential line sum variance score, we sum the result over scanlines, but we always skip:
| LEPT_DLL l_int32 pixFindSkewSweepAndSearchScore | ( | PIX * | pixs, |
| l_float32 * | pangle, | ||
| l_float32 * | pconf, | ||
| l_float32 * | pendscore, | ||
| l_int32 | redsweep, | ||
| l_int32 | redsearch, | ||
| l_float32 | sweepcenter, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta, | ||
| l_float32 | minbsdelta | ||
| ) |
pixFindSkewSweepAndSearchScore()
Input: pixs (1 bpp)
&angle (<return> angle required to deskew; in degrees)
&conf (<return> confidence given by ratio of max/min score)
&endscore (<optional return> max score; use NULL to ignore)
redsweep (sweep reduction factor = 1, 2, 4 or 8)
redsearch (binary search reduction factor = 1, 2, 4 or 8;
and must not exceed redsweep)
sweepcenter (angle about which sweep is performed; in degrees)
sweeprange (half the full range, taken about sweepcenter;
in degrees)
sweepdelta (angle increment of sweep; in degrees)
minbsdelta (min binary search increment angle; in degrees)
Return: 0 if OK, 1 on error or if angle measurment not valid
Notes: (1) This finds the skew angle, doing first a sweep through a set of equal angles, and then doing a binary search until convergence. (2) There are two built-in constants that determine if the returned confidence is nonzero:
| LEPT_DLL l_int32 pixFindSkewSweepAndSearchScorePivot | ( | PIX * | pixs, |
| l_float32 * | pangle, | ||
| l_float32 * | pconf, | ||
| l_float32 * | pendscore, | ||
| l_int32 | redsweep, | ||
| l_int32 | redsearch, | ||
| l_float32 | sweepcenter, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta, | ||
| l_float32 | minbsdelta, | ||
| l_int32 | pivot | ||
| ) |
pixFindSkewSweepAndSearchScorePivot()
Input: pixs (1 bpp)
&angle (<return> angle required to deskew; in degrees)
&conf (<return> confidence given by ratio of max/min score)
&endscore (<optional return> max score; use NULL to ignore)
redsweep (sweep reduction factor = 1, 2, 4 or 8)
redsearch (binary search reduction factor = 1, 2, 4 or 8;
and must not exceed redsweep)
sweepcenter (angle about which sweep is performed; in degrees)
sweeprange (half the full range, taken about sweepcenter;
in degrees)
sweepdelta (angle increment of sweep; in degrees)
minbsdelta (min binary search increment angle; in degrees)
pivot (L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER)
Return: 0 if OK, 1 on error or if angle measurment not valid
Notes: (1) See notes in pixFindSkewSweepAndSearchScore(). (2) This allows choice of shear pivoting from either the UL corner or the center. For small angles, the ability to discriminate angles is better with shearing from the UL corner. However, for large angles (say, greater than 20 degrees), it is better to shear about the center because a shear from the UL corner loses too much of the image.
| LEPT_DLL l_int32 pixFindThreshFgExtent | ( | PIX * | pixs, |
| l_int32 | thresh, | ||
| l_int32 * | ptop, | ||
| l_int32 * | pbot | ||
| ) |
Input: pixs (1 bpp)
thresh (threshold number of pixels in row)
&top (<optional return> location of top of region)
&bot (<optional return> location of bottom of region)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixFindVerticalRuns | ( | PIX * | pix, |
| l_int32 | x, | ||
| l_int32 * | ystart, | ||
| l_int32 * | yend, | ||
| l_int32 * | pn | ||
| ) |
Input: pix (1 bpp)
x (line to traverse)
ystart (returns array of start positions for fg runs)
yend (returns array of end positions for fg runs)
&n (<return> the number of runs found)
Return: 0 if OK; 1 on error
Notes: (1) This finds foreground vertical runs on a single scanline. (2) To find background runs, use pixInvert() before applying this function. (3) The ystart and yend arrays are input. They should be of size h/2 + 1 to insure that they can hold the maximum number of runs in the raster line.
Input: pixs (32 bpp; 24-bit color)
ditherflag (1 for dithering; 0 for no dithering)
Return: pixd (8 bit with colormap), or null on error
This simple 1-pass color quantization works by breaking the color space into 256 pieces, with 3 bits quantized for each of red and green, and 2 bits quantized for blue. We shortchange blue because the eye is least sensitive to blue. This division of the color space is into two levels of octrees, followed by a further division by 4 (not 8), where both blue octrees have been combined in the third level.
The color map is generated from the 256 color centers by taking the representative color to be the center of the cell volume. This gives a maximum error in the red and green values of 16 levels, and a maximum error in the blue sample of 32 levels.
Each pixel in the 24-bit color image is placed in its containing cell, given by the relevant MSbits of the red, green and blue samples. An error-diffusion dithering is performed on each color sample to give the appearance of good average local color. Dithering is required; without it, the contouring and visible color errors are very bad.
I originally implemented this algorithm in two passes, where the first pass was used to compute the weighted average of each sample in each pre-allocated region of color space. The idea was to use these centroids in the dithering algorithm of the second pass, to reduce the average error that was being dithered. However, with dithering, there is virtually no difference, so there is no reason to make the first pass. Consequently, this 1-pass version just assigns the pixels to the centers of the pre-allocated cells. We use dithering to spread the difference between the sample value and the location of the center of the cell. For speed and simplicity, we use integer dithering and propagate only to the right, down, and diagonally down-right, with ratios 3/8, 3/8 and 1/4, respectively. The results should be nearly as good, and a bit faster, with propagation only to the right and down.
The algorithm is very fast, because there is no search, only fast generation of the cell index for each pixel. We use a simple mapping from the three 8 bit rgb samples to the 8 bit cell index; namely, (r7 r6 r5 g7 g6 g5 b7 b6). This is not in an octcube format, but it doesn't matter. There are no storage requirements. We could keep a running average of the center of each sample in each cluster, rather than using the center of the cell, but this is just extra work, esp. with dithering.
This method gives surprisingly good results with dithering. However, without dithering, the loss of color accuracy is evident in regions that are very light or that have subtle blending of colors.
Input: pixs (32 bpp rgb)
level (significant bits for each of r,g,b)
Return: pixd (rgb; quantized to octcube centers), or null on error
Notes: (1) Unlike the other color quantization functions, this one generates an rgb image. (2) The pixel values are quantized to the center of each octcube (at the specified level) containing the pixel. They are not quantized to the average of the pixels in that octcube.
Input: pixd (<optional>; can be null, equal to pixs,
or different from pixs)
pixs (all depths)
Return: pixd, or null on error
Notes: (1) This does a left-right flip of the image, which is equivalent to a rotation out of the plane about a vertical line through the image center. (2) There are 3 cases for input: (a) pixd == null (creates a new pixd) (b) pixd == pixs (in-place operation) (c) pixd != pixs (existing pixd) (3) For clarity, use these three patterns, respectively: (a) pixd = pixFlipLR(NULL, pixs); (b) pixFlipLR(pixs, pixs); (c) pixFlipLR(pixd, pixs); (4) If an existing pixd is not the same size as pixs, the image data will be reallocated. (5) The pixel access routines allow a trivial implementation. However, for d < 8, it is more efficient to right-justify each line to a 32-bit boundary and then extract bytes and do pixel reversing. In those cases, as in the 180 degree rotation, we right-shift the data (if necessary) to right-justify on the 32 bit boundary, and then read the bytes off each raster line in reverse order, reversing the pixels in each byte using a table. These functions for 1, 2 and 4 bpp were tested against the "trivial" version (shown here for 4 bpp): for (i = 0; i < h; i++) { line = data + i * wpl; memcpy(buffer, line, bpl); for (j = 0; j < w; j++) { val = GET_DATA_QBIT(buffer, w - 1 - j); SET_DATA_QBIT(line, j, val); } }
Input: pix
(x,y) pixel coords
Return: 0 if OK; 1 on error
Input: pixd (<optional>; can be null, equal to pixs,
or different from pixs)
pixs (all depths)
Return: pixd, or null on error
Notes: (1) This does a top-bottom flip of the image, which is equivalent to a rotation out of the plane about a horizontal line through the image center. (2) There are 3 cases for input: (a) pixd == null (creates a new pixd) (b) pixd == pixs (in-place operation) (c) pixd != pixs (existing pixd) (3) For clarity, use these three patterns, respectively: (a) pixd = pixFlipTB(NULL, pixs); (b) pixFlipTB(pixs, pixs); (c) pixFlipTB(pixd, pixs); (4) If an existing pixd is not the same size as pixs, the image data will be reallocated. (5) This is simple and fast. We use the memcpy function to do all the work on aligned data, regardless of pixel depth.
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
operation (L_MORPH_DILATE, L_MORPH_ERODE,
L_MORPH_OPEN, L_MORPH_CLOSE)
sel name
Return: pixd
Notes: (1) This is a dwa operation, and the Sels must be limited in size to not more than 31 pixels about the origin. (2) A border of appropriate size (32 pixels, or 64 pixels for safe closing with asymmetric b.c.) must be added before this function is called. (3) This handles all required setting of the border pixels before erosion and dilation. (4) The closing operation is safe; no pixels can be removed near the boundary.
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
operation (L_MORPH_DILATE, L_MORPH_ERODE,
L_MORPH_OPEN, L_MORPH_CLOSE)
sel name
Return: pixd
Notes: (1) This is a dwa operation, and the Sels must be limited in size to not more than 31 pixels about the origin. (2) A border of appropriate size (32 pixels, or 64 pixels for safe closing with asymmetric b.c.) must be added before this function is called. (3) This handles all required setting of the border pixels before erosion and dilation. (4) The closing operation is safe; no pixels can be removed near the boundary.
Input: pix (1 bpp)
&fract (<return> fraction of ON pixels)
Return: 0 if OK; 1 on error
Input: pix1 (1 bpp)
pix2 (1 bpp)
&fract (<return> fraction of fg pixels in 1 that are
aligned with the fg of 2)
Return: 0 if OK, 1 on error.
Notes: (1) This gives the fraction of fg pixels in pix1 that are in the intersection (i.e., under the fg) of pix2: |1 & 2|/|1|, where |...| means the number of fg pixels. Note that this is different from the situation where pix1 and pix2 are reversed. (2) Both pix1 and pix2 are registered to the UL corners. A warning is issued if pix1 and pix2 have different sizes. (3) This can also be used to find the fraction of fg pixels in pix1 that are NOT under the fg of pix2: 1.0 - |1 & 2|/|1| (4) If pix1 or pix2 are empty, this returns @fract = 0.0. (5) For example, pix2 could be a frame around the outside of the image, made from pixMakeFrameMask().
Notes: (1) This frees the data and sets the pix data ptr to null. It should be used before pixSetData() in the situation where you want to free any existing data before doing a subsequent assignment with pixSetData().
| LEPT_DLL PIX* pixGammaTRC | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_float32 | gamma, | ||
| l_int32 | minval, | ||
| l_int32 | maxval | ||
| ) |
Input: pixd (<optional> null or equal to pixs)
pixs (8 or 32 bpp; or 2, 4 or 8 bpp with colormap)
gamma (gamma correction; must be > 0.0)
minval (input value that gives 0 for output; can be < 0)
maxval (input value that gives 255 for output; can be > 255)
Return: pixd always
Notes: (1) pixd must either be null or equal to pixs. For in-place operation, set pixd == pixs: pixGammaTRC(pixs, pixs, ...); To get a new image, set pixd == null: pixd = pixGammaTRC(NULL, pixs, ...); (2) If pixs is colormapped, the colormap is transformed, either in-place or in a copy of pixs. (3) We use a gamma mapping between minval and maxval. (4) If gamma < 1.0, the image will appear darker; if gamma > 1.0, the image will appear lighter; (5) If gamma = 1.0 and minval = 0 and maxval = 255, no enhancement is performed; return a copy unless in-place, in which case this is a no-op. (6) For color images that are not colormapped, the mapping is applied to each component. (7) minval and maxval are not restricted to the interval [0, 255]. If minval < 0, an input value of 0 is mapped to a nonzero output. This will turn black to gray. If maxval > 255, an input value of 255 is mapped to an output value less than 255. This will turn white (e.g., in the background) to gray. (8) Increasing minval darkens the image. (9) Decreasing maxval bleaches the image. (10) Simultaneously increasing minval and decreasing maxval will darken the image and make the colors more intense; e.g., minval = 50, maxval = 200. (11) See numaGammaTRC() for further examples of use.
| LEPT_DLL PIX* pixGammaTRCMasked | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| PIX * | pixm, | ||
| l_float32 | gamma, | ||
| l_int32 | minval, | ||
| l_int32 | maxval | ||
| ) |
Input: pixd (<optional> null or equal to pixs)
pixs (8 or 32 bpp; not colormapped)
pixm (<optional> null or 1 bpp)
gamma (gamma correction; must be > 0.0)
minval (input value that gives 0 for output; can be < 0)
maxval (input value that gives 255 for output; can be > 255)
Return: pixd always
Notes: (1) Same as pixGammaTRC() except mapping is optionally over a subset of pixels described by pixm. (2) Masking does not work for colormapped images. (3) See pixGammaTRC() for details on how to use the parameters.
| LEPT_DLL PIX* pixGammaTRCWithAlpha | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_float32 | gamma, | ||
| l_int32 | minval, | ||
| l_int32 | maxval | ||
| ) |
Input: pixd (<optional> null or equal to pixs)
pixs (32 bpp)
gamma (gamma correction; must be > 0.0)
minval (input value that gives 0 for output; can be < 0)
maxval (input value that gives 255 for output; can be > 255)
Return: pixd always
Notes: (1) See usage notes in pixGammaTRC(). (2) This version saves the alpha channel. It is only valid for 32 bpp (no colormap), and is a bit slower.
| LEPT_DLL l_int32 pixGenerateCIData | ( | PIX * | pixs, |
| l_int32 | type, | ||
| l_int32 | quality, | ||
| l_int32 | ascii85, | ||
| L_COMP_DATA ** | pcid | ||
| ) |
Input: pixs (8 or 32 bpp, no colormap)
type (L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE)
quality (used for jpeg only; 0 for default (75))
ascii85 (0 for binary; 1 for ascii85-encoded)
&cid (<return> compressed data)
Return: 0 if OK, 1 on error
Notes: (1) Set ascii85:
Input: pta
w, h (of pix)
Return: pix (1 bpp), or null on error
Notes: (1) Points are rounded to nearest ints. (2) Any points outside (w,h) are silently discarded. (3) Output 1 bpp pix has values 1 for each point in the pta.
| LEPT_DLL PIX* pixGenerateMaskByBand | ( | PIX * | pixs, |
| l_int32 | lower, | ||
| l_int32 | upper, | ||
| l_int32 | inband, | ||
| l_int32 | usecmap | ||
| ) |
Input: pixs (2, 4 or 8 bpp, or colormapped)
lower, upper (two pixel values from which a range, either
between (inband) or outside of (!inband),
determines which pixels in pixs cause us to
set a 1 in the dest mask)
inband (1 for finding pixels in [lower, upper];
0 for finding pixels in [0, lower) union (upper, 255])
usecmap (1 to retain cmap values; 0 to convert to gray)
Return: pixd (1 bpp), or null on error
Notes: (1) Generates a 1 bpp mask pixd, the same size as pixs, where the fg pixels in the mask are those either within the specified band (for inband == 1) or outside the specified band (for inband == 0). (2) If pixs is colormapped, @usecmap determines if the colormap values are used, or if the colormap is removed to gray and the gray values are used. For the latter, it generates an approximate grayscale value for each pixel, and then looks for gray pixels with the value @val.
| LEPT_DLL PIX* pixGenerateMaskByBand32 | ( | PIX * | pixs, |
| l_uint32 | refval, | ||
| l_int32 | delm, | ||
| l_int32 | delp, | ||
| l_float32 | fractm, | ||
| l_float32 | fractp | ||
| ) |
Input: pixs (32 bpp)
refval (reference rgb value)
delm (max amount below the ref value for any component)
delp (max amount above the ref value for any component)
fractm (fractional amount below ref value for all components)
fractp (fractional amount above ref value for all components)
Return: pixd (1 bpp), or null on error
Notes: (1) Generates a 1 bpp mask pixd, the same size as pixs, where the fg pixels in the mask within a band of rgb values surrounding @refval. The band can be chosen in two ways for each component: (a) Use (@delm, @delp) to specify how many levels down and up (b) Use (@fractm, @fractp) to specify the fractional distance toward 0 and 255, respectively. Note that @delm and @delp must be in [0 ... 255], whereas @fractm and @fractp must be in [0.0 - 1.0]. (2) Either (@delm, @delp) or (@fractm, @fractp) can be used. Set each value in the other pair to 0.
| LEPT_DLL PIX* pixGenerateMaskByDiscr32 | ( | PIX * | pixs, |
| l_uint32 | refval1, | ||
| l_uint32 | refval2, | ||
| l_int32 | distflag | ||
| ) |
Input: pixs (32 bpp)
refval1 (reference rgb value)
refval2 (reference rgb value)
distflag (L_MANHATTAN_DISTANCE, L_EUCLIDEAN_DISTANCE)
Return: pixd (1 bpp), or null on error
Notes: (1) Generates a 1 bpp mask pixd, the same size as pixs, where the fg pixels in the mask are those where the pixel in pixs is "closer" to refval1 than to refval2. (2) "Closer" can be defined in several ways, such as:
Input: pixs (2, 4 or 8 bpp, or colormapped)
val (of pixels for which we set 1 in dest)
usecmap (1 to retain cmap values; 0 to convert to gray)
Return: pixd (1 bpp), or null on error
Notes: (1) @val is the pixel value that we are selecting. It can be either a gray value or a colormap index. (2) If pixs is colormapped, @usecmap determines if the colormap index values are used, or if the colormap is removed to gray and the gray values are used. For the latter, it generates an approximate grayscale value for each pixel, and then looks for gray pixels with the value @val.
Input: pixs (1 bpp)
width (of boundary line)
Return: pta, or null on error
Notes: (1) Similar to ptaGetBoundaryPixels(), except here:
| LEPT_DLL SEL* pixGenerateSelBoundary | ( | PIX * | pixs, |
| l_int32 | hitdist, | ||
| l_int32 | missdist, | ||
| l_int32 | hitskip, | ||
| l_int32 | missskip, | ||
| l_int32 | topflag, | ||
| l_int32 | botflag, | ||
| l_int32 | leftflag, | ||
| l_int32 | rightflag, | ||
| PIX ** | ppixe | ||
| ) |
Input: pix (1 bpp, typically small, to be used as a pattern)
hitdist (min distance from fg boundary pixel)
missdist (min distance from bg boundary pixel)
hitskip (number of boundary pixels skipped between hits)
missskip (number of boundary pixels skipped between misses)
topflag (flag for extra pixels of bg added above)
botflag (flag for extra pixels of bg added below)
leftflag (flag for extra pixels of bg added to left)
rightflag (flag for extra pixels of bg added to right)
&pixe (<optional return> input pix expanded by extra pixels)
Return: sel (hit-miss for input pattern), or null on error
Notes: (1) All fg elements selected are exactly hitdist pixels away from the nearest fg boundary pixel, and ditto for bg elements. Valid inputs of hitdist and missdist are 0, 1, 2, 3 and 4. For example, a hitdist of 0 puts the hits at the fg boundary. Usually, the distances should be > 0 avoid the effect of noise at the boundary. (2) Set hitskip < 0 if no hits are to be used. Ditto for missskip. If both hitskip and missskip are < 0, the sel would be empty, and NULL is returned. (3) The 4 flags determine whether the sel is increased on that side to allow bg misses to be placed all along that boundary. The increase in sel size on that side is the minimum necessary to allow the misses to be placed at mindist. For text characters, the topflag and botflag are typically set to 1, and the leftflag and rightflag to 0. (4) The input pix, as extended by the extra pixels on selected sides, can optionally be returned. For debugging, call pixDisplayHitMissSel() to visualize the hit-miss sel superimposed on the generating bitmap. (5) This is probably the best of the three sel generators, in the sense that you have the most flexibility with the smallest number of hits and misses.
| LEPT_DLL SEL* pixGenerateSelRandom | ( | PIX * | pixs, |
| l_float32 | hitfract, | ||
| l_float32 | missfract, | ||
| l_int32 | distance, | ||
| l_int32 | toppix, | ||
| l_int32 | botpix, | ||
| l_int32 | leftpix, | ||
| l_int32 | rightpix, | ||
| PIX ** | ppixe | ||
| ) |
Input: pix (1 bpp, typically small, to be used as a pattern)
hitfract (fraction of allowable fg pixels that are hits)
missfract (fraction of allowable bg pixels that are misses)
distance (min distance from boundary pixel; use 0 for default)
toppix (number of extra pixels of bg added above)
botpix (number of extra pixels of bg added below)
leftpix (number of extra pixels of bg added to left)
rightpix (number of extra pixels of bg added to right)
&pixe (<optional return> input pix expanded by extra pixels)
Return: sel (hit-miss for input pattern), or null on error
Notes: (1) Either of hitfract and missfract can be zero. If both are zero, the sel would be empty, and NULL is returned. (2) No elements are selected that are less than 'distance' pixels away from a boundary pixel of the same color. This makes the match much more robust to edge noise. Valid inputs of 'distance' are 0, 1, 2, 3 and 4. If distance is either 0 or greater than 4, we reset it to the default value. (3) The 4 numbers for adding rectangles of pixels outside the fg can be use if the pattern is expected to be surrounded by bg (white) pixels. On the other hand, if the pattern may be near other fg (black) components on some sides, use 0 for those sides. (4) The input pix, as extended by the extra pixels on selected sides, can optionally be returned. For debugging, call pixDisplayHitMissSel() to visualize the hit-miss sel superimposed on the generating bitmap.
| LEPT_DLL SEL* pixGenerateSelWithRuns | ( | PIX * | pixs, |
| l_int32 | nhlines, | ||
| l_int32 | nvlines, | ||
| l_int32 | distance, | ||
| l_int32 | minlength, | ||
| l_int32 | toppix, | ||
| l_int32 | botpix, | ||
| l_int32 | leftpix, | ||
| l_int32 | rightpix, | ||
| PIX ** | ppixe | ||
| ) |
Input: pix (1 bpp, typically small, to be used as a pattern)
nhlines (number of hor lines along which elements are found)
nvlines (number of vert lines along which elements are found)
distance (min distance from boundary pixel; use 0 for default)
minlength (min runlength to set hit or miss; use 0 for default)
toppix (number of extra pixels of bg added above)
botpix (number of extra pixels of bg added below)
leftpix (number of extra pixels of bg added to left)
rightpix (number of extra pixels of bg added to right)
&pixe (<optional return> input pix expanded by extra pixels)
Return: sel (hit-miss for input pattern), or null on error
Notes: (1) The horizontal and vertical lines along which elements are selected are roughly equally spaced. The actual locations of the hits and misses are the centers of respective run-lengths. (2) No elements are selected that are less than 'distance' pixels away from a boundary pixel of the same color. This makes the match much more robust to edge noise. Valid inputs of 'distance' are 0, 1, 2, 3 and 4. If distance is either 0 or greater than 4, we reset it to the default value. (3) The 4 numbers for adding rectangles of pixels outside the fg can be use if the pattern is expected to be surrounded by bg (white) pixels. On the other hand, if the pattern may be near other fg (black) components on some sides, use 0 for those sides. (4) The pixels added to a side allow you to have miss elements there. There is a constraint between distance, minlength, and the added pixels for this to work. We illustrate using the default values. If you add 5 pixels to the top, and use a distance of 1, then you end up with a vertical run of at least 4 bg pixels along the top edge of the image. If you use a minimum runlength of 3, each vertical line will always find a miss near the center of its run. However, if you use a minimum runlength of 5, you will not get a miss on every vertical line. As another example, if you have 7 added pixels and a distance of 2, you can use a runlength up to 5 to guarantee that the miss element is recorded. We give a warning if the contraint does not guarantee a miss element outside the image proper. (5) The input pix, as extended by the extra pixels on selected sides, can optionally be returned. For debugging, call pixDisplayHitMissSel() to visualize the hit-miss sel superimposed on the generating bitmap.
Input: pixs (1 bpp, assumed to be 150 to 200 ppi)
&pixtext (<optional return> text part of pixs)
&htfound (<optional return> 1 if the mask is not empty)
debug (flag: 1 for debug output)
Return: pixd (halftone mask), or null on error
Notes: (1) This is not intended to work on small thumbnails. The dimensions of pixs must be at least MinWidth x MinHeight.
| LEPT_DLL l_int32 pixGenPhotoHistos | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_int32 | factor, | ||
| l_float32 | thresh, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| NUMAA ** | pnaa, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 | debugflag | ||
| ) |
Input: pix (depth > 1 bpp; colormap OK)
box (<optional> region to be selected; can be null)
factor (subsampling; >= 1)
thresh (threshold for photo/text; use 0 for default)
nx, ny (number of subregions to use for histograms; e.g. 3x3)
&naa (<return> nx * ny 256-entry gray histograms)
&w (<return> width of image used to make histograms)
&h (<return> height of image used to make histograms)
debugflag (1 for debug output; 0 for no debugging)
Return: 0 if OK, 1 on error
Notes: (1) This crops and converts to 8 bpp if necessary. It adds a minimal white boundary such that the centroid of the photo-inverted image is in the center. This allows automatic alignment with histograms of other image regions. (2) The white value in the histogram is removed, because of the padding. (3) Use 0 for conservative default (1.3) for thresh. (4) For an efficient representation of the histogram, normalize using a multiplicative factor so that the number in the maximum bucket is 255. It then takes 256 bytes to store. (5) With debug on, you get a pdf that shows, for each tile, the images and histograms.
Input: pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi)
pixvws (vertical white space mask)
debug (flag: 1 for debug output)
Return: pixd (textblock mask), or null on error
Notes: (1) Both the input masks (textline and vertical white space) and the returned textblock mask are at the same resolution. (2) This is not intended to work on small thumbnails. The dimensions of pixs must be at least MinWidth x MinHeight. (3) The result is somewhat noisy, in that small "blocks" of text may be included. These can be removed by post-processing, using, e.g., pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, NULL);
Input: pixs (1 bpp, assumed to be 150 to 200 ppi)
&pixvws (<return> vertical whitespace mask)
&tlfound (<optional return> 1 if the mask is not empty)
debug (flag: 1 for debug output)
Return: pixd (textline mask), or null on error
Notes: (1) The input pixs should be deskewed. (2) pixs should have no halftone pixels. (3) This is not intended to work on small thumbnails. The dimensions of pixs must be at least MinWidth x MinHeight. (4) Both the input image and the returned textline mask are at the same resolution.
Input: pixs (1 bpp) Return: ccborda, or null on error
Input: pix
&format
Return: 0 if OK, 1 on error
Notes: (1) The output formats are restricted to tiff, jpeg and png because these are the most commonly used image formats and the ones that are typically installed with leptonica. (2) This decides what compression to use based on the pix. It chooses tiff-g4 if 1 bpp without a colormap, jpeg with quality 75 if grayscale, rgb or rgba (where it loses the alpha layer), and lossless png for all other situations.
| LEPT_DLL l_int32 pixGetAverageMasked | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | factor, | ||
| l_int32 | type, | ||
| l_float32 * | pval | ||
| ) |
Input: pixs (8 or 16 bpp, or colormapped)
pixm (<optional> 1 bpp mask over which average is to be taken;
use all pixels if null)
x, y (UL corner of pixm relative to the UL corner of pixs;
can be < 0)
factor (subsampling factor; >= 1)
type (L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE,
L_STANDARD_DEVIATION, L_VARIANCE)
&val (<return> measured value of given 'type')
Return: 0 if OK, 1 on error
Notes: (1) Use L_MEAN_ABSVAL to get the average value of pixels in pixs that are under the fg of the optional mask. If the mask is null, it finds the average of the pixels in pixs. (2) Likewise, use L_ROOT_MEAN_SQUARE to get the rms value of pixels in pixs, either masked or not; L_STANDARD_DEVIATION to get the standard deviation from the mean of the pixels; L_VARIANCE to get the average squared difference from the expected value. The variance is the square of the stdev. For the standard deviation, we use sqrt(<(<x> - x)>^2) = sqrt(<x^2> - <x>^2) (3) Set the subsampling @factor > 1 to reduce the amount of computation. (4) Clipping of pixm (if it exists) to pixs is done in the inner loop. (5) Input x,y are ignored unless pixm exists.
| LEPT_DLL l_int32 pixGetAverageMaskedRGB | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | factor, | ||
| l_int32 | type, | ||
| l_float32 * | prval, | ||
| l_float32 * | pgval, | ||
| l_float32 * | pbval | ||
| ) |
Input: pixs (32 bpp, or colormapped)
pixm (<optional> 1 bpp mask over which average is to be taken;
use all pixels if null)
x, y (UL corner of pixm relative to the UL corner of pixs;
can be < 0)
factor (subsampling factor; >= 1)
type (L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE,
L_STANDARD_DEVIATION, L_VARIANCE)
&rval (<return optional> measured red value of given 'type')
&gval (<return optional> measured green value of given 'type')
&bval (<return optional> measured blue value of given 'type')
Return: 0 if OK, 1 on error
Notes: (1) For usage, see pixGetAverageMasked(). (2) If there is a colormap, it is removed before the 8 bpp component images are extracted.
Input: pixs (8 bpp, or colormapped)
sx, sy (tile size; must be at least 2 x 2)
type (L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE, L_STANDARD_DEVIATION)
Return: pixd (average values in each tile), or null on error
Notes: (1) Only computes for tiles that are entirely contained in pixs. (2) Use L_MEAN_ABSVAL to get the average abs value within the tile; L_ROOT_MEAN_SQUARE to get the rms value within each tile; L_STANDARD_DEVIATION to get the standard dev. from the average within each tile. (3) If colormapped, converts to 8 bpp gray.
| LEPT_DLL l_int32 pixGetAverageTiledRGB | ( | PIX * | pixs, |
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | type, | ||
| PIX ** | ppixr, | ||
| PIX ** | ppixg, | ||
| PIX ** | ppixb | ||
| ) |
Input: pixs (32 bpp, or colormapped)
sx, sy (tile size; must be at least 2 x 2)
type (L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE, L_STANDARD_DEVIATION)
&pixr (<optional return> tiled 'average' of red component)
&pixg (<optional return> tiled 'average' of green component)
&pixb (<optional return> tiled 'average' of blue component)
Return: 0 if OK, 1 on error
Notes: (1) For usage, see pixGetAverageTiled(). (2) If there is a colormap, it is removed before the 8 bpp component images are extracted.
Input: pixs (8 bpp, 32 bpp or colormapped)
factor (subsampling factor; integer >= 1)
type (L_MEAN_ABSVAL, L_ROOT_MEAN_SQUARE,
L_STANDARD_DEVIATION, L_VARIANCE)
&value (<return> pixel value corresponding to input type)
Return: 0 if OK, 1 on error
Notes: (1) Simple function to get average statistical values of an image.
| LEPT_DLL l_int32 pixGetBackgroundGrayMap | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | thresh, | ||
| l_int32 | mincount, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (8 bpp grayscale; not cmapped)
pixim (<optional> 1 bpp 'image' mask; can be null; it
should not have all foreground pixels)
sx, sy (tile size in pixels)
thresh (threshold for determining foreground)
mincount (min threshold on counts in a tile)
&pixd (<return> 8 bpp grayscale map)
Return: 0 if OK, 1 on error
Notes: (1) The background is measured in regions that don't have images. It is then propagated into the image regions, and finally smoothed in each image region.
| LEPT_DLL l_int32 pixGetBackgroundGrayMapMorph | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | reduction, | ||
| l_int32 | size, | ||
| PIX ** | ppixm | ||
| ) |
pixGetBackgroundGrayMapMorph()
Input: pixs (8 bpp grayscale; not cmapped)
pixim (<optional> 1 bpp 'image' mask; can be null; it
should not have all foreground pixels)
reduction (factor at which closing is performed)
size (of square Sel for the closing; use an odd number)
&pixm (<return> grayscale map)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixGetBackgroundRGBMap | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| PIX * | pixg, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | thresh, | ||
| l_int32 | mincount, | ||
| PIX ** | ppixmr, | ||
| PIX ** | ppixmg, | ||
| PIX ** | ppixmb | ||
| ) |
Input: pixs (32 bpp rgb)
pixim (<optional> 1 bpp 'image' mask; can be null; it
should not have all foreground pixels)
pixg (<optional> 8 bpp grayscale version; can be null)
sx, sy (tile size in pixels)
thresh (threshold for determining foreground)
mincount (min threshold on counts in a tile)
&pixmr, &pixmg, &pixmb (<return> rgb maps)
Return: 0 if OK, 1 on error
Notes: (1) If pixg, which is a grayscale version of pixs, is provided, use this internally to generate the foreground mask. Otherwise, a grayscale version of pixs will be generated from the green component only, used, and destroyed.
| LEPT_DLL l_int32 pixGetBackgroundRGBMapMorph | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | reduction, | ||
| l_int32 | size, | ||
| PIX ** | ppixmr, | ||
| PIX ** | ppixmg, | ||
| PIX ** | ppixmb | ||
| ) |
Input: pixs (32 bpp rgb)
pixim (<optional> 1 bpp 'image' mask; can be null; it
should not have all foreground pixels)
reduction (factor at which closing is performed)
size (of square Sel for the closing; use an odd number)
&pixmr (<return> red component map)
&pixmg (<return> green component map)
&pixmb (<return> blue component map)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixGetBinnedColor | ( | PIX * | pixs, |
| PIX * | pixg, | ||
| l_int32 | factor, | ||
| l_int32 | nbins, | ||
| NUMA * | nalut, | ||
| l_uint32 ** | pcarray, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixs (32 bpp)
pixg (8 bpp grayscale version of pixs)
factor (sampling factor along pixel counting direction)
nbins (number of intensity bins)
nalut (LUT for mapping from intensity to bin number)
&carray (<return> array of average color values in each bin)
debugflag (1 to display output debug plots of color
components; 2 to write them as png to file)
Return: 0 if OK; 1 on error
Notes: (1) This takes a color image, a grayscale (intensity) version, a LUT from intensity to bin number, and the number of bins. It computes the average color for pixels whose intensity is in each bin. This is returned as an array of l_uint32 colors in our standard RGBA ordering. (2) This function generates equal width intensity bins and finds the average color in each bin. Compare this with pixGetRankColorArray(), which rank orders the pixels by the value of the selected component in each pixel, sets up bins with equal population (not intensity width!), and gets the average color in each bin.
| LEPT_DLL l_int32 pixGetBinnedComponentRange | ( | PIX * | pixs, |
| l_int32 | nbins, | ||
| l_int32 | factor, | ||
| l_int32 | color, | ||
| l_int32 * | pminval, | ||
| l_int32 * | pmaxval, | ||
| l_uint32 ** | pcarray, | ||
| l_int32 | fontsize | ||
| ) |
Input: pixs (32 bpp rgb)
nbins (number of equal population bins; must be > 1)
factor (subsampling factor; >= 1)
color (L_SELECT_RED, L_SELECT_GREEN or L_SELECT_BLUE)
&minval (<optional return> minimum value of component)
&maxval (<optional return> maximum value of component)
&carray (<optional return> color array of bins)
fontsize (<optional> 0 for no debug; for debug, valid set
is {4,6,8,10,12,14,16,18,20}.)
Return: 0 if OK, 1 on error
Notes: (1) This returns the min and max average values of the selected color component in the set of rank bins, where the ranking is done using the specified component.
Input: pixs (all depths; cmap ok)
op (L_GET_BLACK_VAL, L_GET_WHITE_VAL)
&val (<return> pixel value)
Return: 0 if OK; 1 on error
Notes: (1) Side effect. For a colormapped image, if the requested color is not present and there is room to add it in the cmap, it is added and the new index is returned. If there is no room, the index of the closest color in intensity is returned.
Input: pixs (1 bpp, one 8-connected component)
box (xul, yul, width, height) in global coords
Return: ccbord, or null on error
Notes: (1) We are finding the exterior and interior borders of an 8-connected component. This should be used on a pix that has exactly one 8-connected component. (2) Typically, pixs is a c.c. in some larger pix. The input box gives its location in global coordinates. This box is saved, as well as the boxes for the borders of any holes within the c.c., but the latter are given in relative coords within the c.c. (3) The calculations for the exterior border are done on a pix with a 1-pixel added border, but the saved pixel coordinates are the correct (relative) ones for the input pix (without a 1-pixel border) (4) For the definition of the three tables – xpostab[], ypostab[] and qpostab[] – see above where they are defined.
Input: pixs (colormapped: d = 2, 4 or 8)
factor (subsampling factor; integer >= 1)
Return: na (histogram of cmap indices), or null on error
Notes: (1) This generates a histogram of colormap pixel indices, and is of size 2^d. (2) Set the subsampling @factor > 1 to reduce the amount of computation.
Input: pixs (colormapped: d = 2, 4 or 8)
box (<optional>) over which histogram is to be computed;
use full image if null)
factor (subsampling factor; integer >= 1)
Return: na (histogram), or null on error
Notes: (1) This generates a histogram of colormap pixel indices, and is of size 2^d. (2) Set the subsampling @factor > 1 to reduce the amount of computation. (3) Clipping to the box is done in the inner loop.
| LEPT_DLL NUMA* pixGetCmapHistogramMasked | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | factor | ||
| ) |
Input: pixs (colormapped: d = 2, 4 or 8)
pixm (<optional> 1 bpp mask over which histogram is
to be computed; use all pixels if null)
x, y (UL corner of pixm relative to the UL corner of pixs;
can be < 0; these values are ignored if pixm is null)
factor (subsampling factor; integer >= 1)
Return: na (histogram), or null on error
Notes: (1) This generates a histogram of colormap pixel indices, and is of size 2^d. (2) Set the subsampling @factor > 1 to reduce the amount of computation. (3) Clipping of pixm to pixs is done in the inner loop.
| LEPT_DLL l_int32 pixGetColorHistogram | ( | PIX * | pixs, |
| l_int32 | factor, | ||
| NUMA ** | pnar, | ||
| NUMA ** | pnag, | ||
| NUMA ** | pnab | ||
| ) |
Input: pixs (rgb or colormapped)
factor (subsampling factor; integer >= 1)
&nar (<return> red histogram)
&nag (<return> green histogram)
&nab (<return> blue histogram)
Return: 0 if OK, 1 on error
Notes: (1) This generates a set of three 256 entry histograms, one for each color component (r,g,b). (2) Set the subsampling @factor > 1 to reduce the amount of computation.
| LEPT_DLL l_int32 pixGetColorHistogramMasked | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | factor, | ||
| NUMA ** | pnar, | ||
| NUMA ** | pnag, | ||
| NUMA ** | pnab | ||
| ) |
Input: pixs (32 bpp rgb, or colormapped)
pixm (<optional> 1 bpp mask over which histogram is
to be computed; use all pixels if null)
x, y (UL corner of pixm relative to the UL corner of pixs;
can be < 0; these values are ignored if pixm is null)
factor (subsampling factor; integer >= 1)
&nar (<return> red histogram)
&nag (<return> green histogram)
&nab (<return> blue histogram)
Return: 0 if OK, 1 on error
Notes: (1) This generates a set of three 256 entry histograms, (2) Set the subsampling @factor > 1 to reduce the amount of computation. (3) Clipping of pixm (if it exists) to pixs is done in the inner loop. (4) Input x,y are ignored unless pixm exists.
| LEPT_DLL l_int32 pixGetColorNearMaskBoundary | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| BOX * | box, | ||
| l_int32 | dist, | ||
| l_uint32 * | pval, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (32 bpp rgb)
pixm (1 bpp mask, full image)
box (region of mask; typically b.b. of a component)
dist (distance into BG from mask boundary to use)
&pval (<return> average pixel value)
debug (1 to output mask images)
Return: 0 if OK, 1 on error.
Notes: (1) This finds the average color in a set of pixels that are roughly a distance @dist from the c.c. boundary and in the background of the mask image.
| LEPT_DLL l_int32 pixGetColumnStats | ( | PIX * | pixs, |
| l_int32 | type, | ||
| l_int32 | nbins, | ||
| l_int32 | thresh, | ||
| l_float32 * | rowvect | ||
| ) |
Input: pixs (8 bpp; not cmapped)
type (L_MEAN_ABSVAL, L_MEDIAN_VAL, L_MODE_VAL, L_MODE_COUNT)
nbins (of histogram for median and mode; ignored for mean)
thresh (on histogram for mode val; ignored for all other types)
rowvect (vector of results gathered down the columns of pixs)
Return: 0 if OK, 1 on error
Notes: (1) This computes a row vector of statistics using each column of a Pix. The result is put in @rowvect. (2) The @thresh parameter works with L_MODE_VAL only, and sets a minimum occupancy of the mode bin. If the occupancy of the mode bin is less than @thresh, the mode value is returned as 0. To always return the actual mode value, set @thresh = 0. (3) What is the meaning of this @thresh parameter? For each column, the total count in the histogram is h, the image height. So @thresh, relative to h, gives a measure of the ratio of the bin width to the width of the distribution. The larger @thresh, the narrower the distribution must be for the mode value to be returned (instead of returning 0).
| LEPT_DLL l_int32 pixGetComponentRange | ( | PIX * | pixs, |
| l_int32 | factor, | ||
| l_int32 | color, | ||
| l_int32 * | pminval, | ||
| l_int32 * | pmaxval | ||
| ) |
Input: pixs (8 bpp grayscale, 32 bpp rgb, or colormapped)
factor (subsampling factor; >= 1; ignored if colormapped)
color (L_SELECT_RED, L_SELECT_GREEN or L_SELECT_BLUE)
&minval (<optional return> minimum value of component)
&maxval (<optional return> maximum value of component)
Return: 0 if OK, 1 on error
Notes: (1) If pixs is 8 bpp grayscale, the color selection type is ignored.
Notes: (1) This gives a new handle for the data. The data is still owned by the pix, so do not call LEPT_FREE() on it.
Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped)
pix2 (8 bpp gray or 32 bpp rgb, or colormapped)
factor (subsampling factor; use 0 or 1 for no subsampling)
Return: na (Numa of histogram of differences), or null on error
Notes: (1) The two images are aligned at the UL corner, and do not need to be the same size. If they are not the same size, the comparison will be made over overlapping pixels. (2) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (3) If RGB, the maximum difference between pixel components is saved in the histogram.
| LEPT_DLL l_int32 pixGetDifferenceStats | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | factor, | ||
| l_int32 | mindiff, | ||
| l_float32 * | pfractdiff, | ||
| l_float32 * | pavediff, | ||
| l_int32 | printstats | ||
| ) |
Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped)
pix2 (8 bpp gray or 32 bpp rgb, or colormapped)
factor (subsampling factor; use 0 or 1 for no subsampling)
mindiff (minimum pixel difference to be counted; > 0)
&fractdiff (<return> fraction of pixels with diff greater
than or equal to mindiff)
&avediff (<return> average difference of pixels with diff
greater than or equal to mindiff, less mindiff)
printstats (use 1 to print normalized histogram to stderr)
Return: 0 if OK, 1 on error
Notes: (1) This takes a threshold @mindiff and describes the difference between two images in terms of two numbers: (a) the fraction of pixels, @fractdiff, whose difference equals or exceeds the threshold @mindiff, and (b) the average value @avediff of the difference in pixel value for the pixels in the set given by (a), after you subtract @mindiff. The reason for subtracting @mindiff is that you then get a useful measure for the rate of falloff of the distribution for larger differences. For example, if @mindiff = 10 and you find that @avediff = 2.5, it says that of the pixels with diff > 10, the average of their diffs is just mindiff + 2.5 = 12.5. This is a fast falloff in the histogram with increasing difference. (2) The two images are aligned at the UL corner, and do not need to be the same size. If they are not the same size, the comparison will be made over overlapping pixels. (3) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (4) If RGB, the maximum difference between pixel components is saved in the histogram.
Input: pix
&w, &h, &d (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Input: pixs (1 bpp)
side (L_FROM_LEFT, L_FROM_RIGHT, L_FROM_TOP, L_FROM_BOT)
debugfile (<optional> displays constructed edge; use NULL
for no output)
Return: na (of fg edge pixel locations), or null on error
| LEPT_DLL l_int32 pixGetExtremeValue | ( | PIX * | pixs, |
| l_int32 | factor, | ||
| l_int32 | type, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval, | ||
| l_int32 * | pgrayval | ||
| ) |
Input: pixs (8 bpp grayscale, 32 bpp rgb, or colormapped)
factor (subsampling factor; >= 1; ignored if colormapped)
type (L_SELECT_MIN or L_SELECT_MAX)
&rval (<optional return> red component)
&gval (<optional return> green component)
&bval (<optional return> blue component)
&grayval (<optional return> min or max gray value)
Return: 0 if OK, 1 on error
Notes: (1) If pixs is grayscale, the result is returned in &grayval. Otherwise, if there is a colormap or d == 32, each requested color component is returned. At least one color component (address) must be input.
Input: pixs (1, 2, 4, 8, 16 bpp; can be colormapped)
factor (subsampling factor; integer >= 1)
Return: na (histogram), or null on error
Notes: (1) If pixs has a colormap, it is converted to 8 bpp gray. If you want a histogram of the colormap indices, use pixGetCmapHistogram(). (2) If pixs does not have a colormap, the output histogram is of size 2^d, where d is the depth of pixs. (3) This always returns a 256-value histogram of pixel values. (4) Set the subsampling factor > 1 to reduce the amount of computation.
Input: pixs (8 bpp, or colormapped)
box (<optional>) over which histogram is to be computed;
use full image if null)
factor (subsampling factor; integer >= 1)
Return: na (histogram), or null on error
Notes: (1) If pixs is cmapped, it is converted to 8 bpp gray. If you want a histogram of the colormap indices, use pixGetCmapHistogramInRect(). (2) This always returns a 256-value histogram of pixel values. (3) Set the subsampling @factor > 1 to reduce the amount of computation.
| LEPT_DLL NUMA* pixGetGrayHistogramMasked | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | factor | ||
| ) |
Input: pixs (8 bpp, or colormapped)
pixm (<optional> 1 bpp mask over which histogram is
to be computed; use all pixels if null)
x, y (UL corner of pixm relative to the UL corner of pixs;
can be < 0; these values are ignored if pixm is null)
factor (subsampling factor; integer >= 1)
Return: na (histogram), or null on error
Notes: (1) If pixs is cmapped, it is converted to 8 bpp gray. If you want a histogram of the colormap indices, use pixGetCmapHistogramMasked(). (2) This always returns a 256-value histogram of pixel values. (3) Set the subsampling factor > 1 to reduce the amount of computation. (4) Clipping of pixm (if it exists) to pixs is done in the inner loop. (5) Input x,y are ignored unless pixm exists.
Input: pixs (any depth, colormap OK)
factor (subsampling factor; integer >= 1)
nx, ny (tiling; >= 1; typically small)
Return: naa (set of histograms), or null on error
Notes: (1) If pixs is cmapped, it is converted to 8 bpp gray. (2) This returns a set of 256-value histograms of pixel values. (3) Set the subsampling factor > 1 to reduce the amount of computation.
Input: ccb (the exterior border is already made)
pixs (for the connected component at hand)
box (for the specific hole border, in relative
coordinates to the c.c.)
xs, ys (first pixel on hole border, relative to c.c.)
Return: 0 if OK, 1 on error
Notes: (1) we trace out hole border on pixs without addition of single pixel added border to pixs (2) therefore all coordinates are relative within the c.c. (pixs) (3) same position tables and stopping condition as for exterior borders
| LEPT_DLL PIX* pixGetInvBackgroundMap | ( | PIX * | pixs, |
| l_int32 | bgval, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy | ||
| ) |
Input: pixs (8 bpp grayscale; no colormap)
bgval (target bg val; typ. > 128)
smoothx (half-width of block convolution kernel width)
smoothy (half-width of block convolution kernel height)
Return: pixd (16 bpp), or null on error
Note:
| LEPT_DLL l_int32 pixGetLastOffPixelInRun | ( | PIX * | pixs, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | direction, | ||
| l_int32 * | ploc | ||
| ) |
| LEPT_DLL l_int32 pixGetLastOnPixelInRun | ( | PIX * | pixs, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | direction, | ||
| l_int32 * | ploc | ||
| ) |
Input: pix
&size (<optional return> array size, which is the pix height)
Return: array of line ptrs, or null on error
Notes: (1) This is intended to be used for fast random pixel access. For example, for an 8 bpp image, val = GET_DATA_BYTE(lines8[i], j); is equivalent to, but much faster than, pixGetPixel(pix, j, i, &val); (2) How much faster? For 1 bpp, it's from 6 to 10x faster. For 8 bpp, it's an amazing 30x faster. So if you are doing random access over a substantial part of the image, use this line ptr array. (3) When random access is used in conjunction with a stack, queue or heap, the overall computation time depends on the operations performed on each struct that is popped or pushed, and whether we are using a priority queue (O(logn)) or a queue or stack (O(1)). For example, for maze search, the overall ratio of time for line ptrs vs. pixGet/Set* is Maze type Type Time ratio binary queue 0.4 gray heap (priority queue) 0.6 (4) Because this returns a void** and the accessors take void*, the compiler cannot check the pointer types. It is strongly recommended that you adopt a naming scheme for the returned ptr arrays that indicates the pixel depth. (This follows the original intent of Simonyi's "Hungarian" application notation, where naming is used proactively to make errors visibly obvious.) By doing this, you can tell by inspection if the correct accessor is used. For example, for an 8 bpp pixg: void **lineg8 = pixGetLinePtrs(pixg, NULL); val = GET_DATA_BYTE(lineg8[i], j); // fast access; BYTE, 8 ... LEPT_FREE(lineg8); // don't forget this (5) These are convenient for accessing bytes sequentially in an 8 bpp grayscale image. People who write image processing code on 8 bpp images are accustomed to grabbing pixels directly out of the raster array. Note that for little endians, you first need to reverse the byte order in each 32-bit word. Here's a typical usage pattern: pixEndianByteSwap(pix); // always safe; no-op on big-endians l_uint8 **lineptrs = (l_uint8 **)pixGetLinePtrs(pix, NULL); pixGetDimensions(pix, &w, &h, NULL); for (i = 0; i < h; i++) { l_uint8 *line = lineptrs[i]; for (j = 0; j < w; j++) { val = line[j]; ... } } pixEndianByteSwap(pix); // restore big-endian order LEPT_FREE(lineptrs); This can be done even more simply as follows: l_uint8 **lineptrs = pixSetupByteProcessing(pix, &w, &h); for (i = 0; i < h; i++) { l_uint8 *line = lineptrs[i]; for (j = 0; j < w; j++) { val = line[j]; ... } } pixCleanupByteProcessing(pix, lineptrs);
| LEPT_DLL NUMA* pixGetLocalSkewAngles | ( | PIX * | pixs, |
| l_int32 | nslices, | ||
| l_int32 | redsweep, | ||
| l_int32 | redsearch, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta, | ||
| l_float32 | minbsdelta, | ||
| l_float32 * | pa, | ||
| l_float32 * | pb | ||
| ) |
Input: pixs
nslices (the number of horizontal overlapping slices; must
be larger than 1 and not exceed 20; use 0 for default)
redsweep (sweep reduction factor: 1, 2, 4 or 8;
use 0 for default value)
redsearch (search reduction factor: 1, 2, 4 or 8, and
not larger than redsweep; use 0 for default value)
sweeprange (half the full range, assumed about 0; in degrees;
use 0.0 for default value)
sweepdelta (angle increment of sweep; in degrees;
use 0.0 for default value)
minbsdelta (min binary search increment angle; in degrees;
use 0.0 for default value)
&a (<optional return> slope of skew as fctn of y)
&b (<optional return> intercept at y=0 of skew as fctn of y)
Return: naskew, or null on error
Notes: (1) The local skew is measured in a set of overlapping strips. We then do a least square linear fit parameters to get the slope and intercept parameters a and b in skew-angle = a * y + b (degrees) for the local skew as a function of raster line y. This is then used to make naskew, which can be interpreted as the computed skew angle (in degrees) at the left edge of each raster line. (2) naskew can then be used to find the baselines of text, because each text line has a baseline that should intersect the left edge of the image with the angle given by this array, evaluated at the raster line of intersection.
| LEPT_DLL l_int32 pixGetLocalSkewTransform | ( | PIX * | pixs, |
| l_int32 | nslices, | ||
| l_int32 | redsweep, | ||
| l_int32 | redsearch, | ||
| l_float32 | sweeprange, | ||
| l_float32 | sweepdelta, | ||
| l_float32 | minbsdelta, | ||
| PTA ** | pptas, | ||
| PTA ** | pptad | ||
| ) |
Input: pixs
nslices (the number of horizontal overlapping slices; must
be larger than 1 and not exceed 20; use 0 for default)
redsweep (sweep reduction factor: 1, 2, 4 or 8;
use 0 for default value)
redsearch (search reduction factor: 1, 2, 4 or 8, and
not larger than redsweep; use 0 for default value)
sweeprange (half the full range, assumed about 0; in degrees;
use 0.0 for default value)
sweepdelta (angle increment of sweep; in degrees;
use 0.0 for default value)
minbsdelta (min binary search increment angle; in degrees;
use 0.0 for default value)
&ptas (<return> 4 points in the source)
&ptad (<return> the corresponding 4 pts in the dest)
Return: 0 if OK, 1 on error
Notes: (1) This generates two pairs of points in the src, each pair corresponding to a pair of points that would lie along the same raster line in a transformed (dewarped) image. (2) The sets of 4 src and 4 dest points returned by this function can then be used, in a projective or bilinear transform, to remove keystoning in the src.
| LEPT_DLL l_int32 pixGetMaxValueInRect | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_uint32 * | pmaxval, | ||
| l_int32 * | pxmax, | ||
| l_int32 * | pymax | ||
| ) |
Input: pixs (8, 16 or 32 bpp grayscale; no color space components)
box (<optional> region; set box = NULL to use entire pixs)
&maxval (<optional return> max value in region)
&xmax (<optional return> x location of max value)
&ymax (<optional return> y location of max value)
Return: 0 if OK, 1 on error
Notes: (1) This can be used to find the maximum and its location in a 2-dimensional histogram, where the x and y directions represent two color components (e.g., saturation and hue). (2) Note that here a 32 bpp pixs has pixel values that are simply numbers. They are not 8 bpp components in a colorspace.
Input: pix (1 bpp)
order (of moment, either 1 or 2)
Return: na of first moment of fg pixels, by column, or null on error
| LEPT_DLL l_int32 pixGetMostPopulatedColors | ( | PIX * | pixs, |
| l_int32 | sigbits, | ||
| l_int32 | factor, | ||
| l_int32 | ncolors, | ||
| l_uint32 ** | parray, | ||
| PIXCMAP ** | pcmap | ||
| ) |
pixGetMostPopulatedColors() Input: pixs (32 bpp rgb) sigbits (2-6, significant bits retained in the quantizer for each component of the input image) factor (subsampling factor; use 1 for no subsampling) ncolors (the number of most populated colors to select) &array (<optional return>=""> array of colors, each as 0xrrggbb00) &cmap (<optional return>=""> colormap of the colors) Return: 0 if OK, 1 on error
Notes: (1) This finds the @ncolors most populated cubes in rgb colorspace, where the cube size depends on @sigbits as cube side = (256 >> sigbits) (2) The rgb color components are found at the center of the cube. (3) The output array of colors can be displayed using pixDisplayColorArray(array, ncolors, ...);
Input: ccb (unfilled)
pixs (for the component at hand)
box (for the component, in global coords)
Return: 0 if OK, 1 on error
Notes: (1) the border is saved in relative coordinates within the c.c. (pixs). Because the calculation is done in pixb with added 1 pixel border, we must subtract 1 from each pixel value before storing it. (2) the stopping condition is that after the first pixel is returned to, the next pixel is the second pixel. Having these 2 pixels recur in sequence proves the path is closed, and we do not store the second pixel again.
Input: pixs (1 bpp, one 8-connected component)
box (<optional> of pixs, in global coordinates)
Return: pta (of outer border, in global coords), or null on error
Notes: (1) We are finding the exterior border of a single 8-connected component. (2) If box is NULL, the outline returned is in the local coords of the input pix. Otherwise, box is assumed to give the location of the pix in global coordinates, and the returned pta will be in those global coordinates.
Input: pixs (1 bpp) Return: ptaa (of outer borders, in global coords), or null on error
| LEPT_DLL l_int32 pixGetPerceptualDiff | ( | PIX * | pixs1, |
| PIX * | pixs2, | ||
| l_int32 | sampling, | ||
| l_int32 | dilation, | ||
| l_int32 | mindiff, | ||
| l_float32 * | pfract, | ||
| PIX ** | ppixdiff1, | ||
| PIX ** | ppixdiff2 | ||
| ) |
Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped)
pix2 (8 bpp gray or 32 bpp rgb, or colormapped)
sampling (subsampling factor; use 0 or 1 for no subsampling)
dilation (size of grayscale or color Sel; odd)
mindiff (minimum pixel difference to be counted; > 0)
&fract (<return> fraction of pixels with diff greater than
mindiff)
&pixdiff1 (<optional return> showing difference (gray or color))
&pixdiff2 (<optional return> showing pixels of sufficient diff)
Return: 0 if OK, 1 on error
Notes: (1) This takes 2 pix and determines, using 2 input parameters:
Input: pix
(x,y) pixel coords
&val (<return> pixel value)
Return: 0 if OK; 1 on error
Notes: (1) This returns the value in the data array. If the pix is colormapped, it returns the colormap index, not the rgb value. (2) Because of the function overhead and the parameter checking, this is much slower than using the GET_DATA_*() macros directly. Speed on a 1 Mpixel RGB image, using a 3 GHz machine:
Input: pix1, pix2 (8 or 32 bpp; no colormap)
factor (sampling factor; >= 1)
&psnr (<return> power signal/noise ratio difference)
Return: 0 if OK, 1 on error
Notes: (1) This computes the power S/N ratio, in dB, for the difference between two images. By convention, the power S/N for a grayscale image is ('log' == log base 10, and 'ln == log base e): PSNR = 10 * log((255/MSE)^2) = 4.3429 * ln((255/MSE)^2) = -4.3429 * ln((MSE/255)^2) where MSE is the mean squared error. Here are some examples: MSE PSNR
10 28.1 3 38.6 1 48.1 0.1 68.1 (2) If pix1 and pix2 have the same pixel values, the MSE = 0.0 and the PSNR is infinity. For that case, this returns PSNR = 1000, which corresponds to the very small MSE of about 10^(-48).
Input: pix (any depth; can be colormapped)
&val (<optional return> pixel value)
&x (<optional return> x coordinate chosen; can be null)
&y (<optional return> y coordinate chosen; can be null)
Return: 0 if OK; 1 on error
Notes: (1) If the pix is colormapped, it returns the rgb value.
| LEPT_DLL l_int32 pixGetRankColorArray | ( | PIX * | pixs, |
| l_int32 | nbins, | ||
| l_int32 | type, | ||
| l_int32 | factor, | ||
| l_uint32 ** | pcarray, | ||
| l_int32 | debugflag, | ||
| l_int32 | fontsize | ||
| ) |
Input: pixs (32 bpp or cmapped)
nbins (number of equal population bins; must be > 1)
type (color selection flag)
factor (subsampling factor; integer >= 1)
&carray (<return> array of colors, ranked by intensity)
debugflag (1 to display color squares and plots of color
components; 2 to write them as png to file)
fontsize (<optional> 0 for no debug; for debug, valid set
is {4,6,8,10,12,14,16,18,20}. Ignored if
debugflag == 0. fontsize == 6 is typical.)
Return: 0 if OK, 1 on error
Notes: (1) The color selection flag is one of: L_SELECT_RED, L_SELECT_GREEN, L_SELECT_BLUE, L_SELECT_MIN, L_SELECT_MAX, L_SELECT_AVERAGE. (2) Then it finds the histogram of the selected component in each RGB pixel. For each of the @nbins sets of pixels, ordered by this component value, find the average color, and return this as a "rank color" array. The output array has @nbins colors. (3) Set the subsampling factor > 1 to reduce the amount of computation. Typically you want at least 10,000 pixels for reasonable statistics. (4) The rank color as a function of rank can then be found from rankint = (l_int32)(rank * (nbins - 1) + 0.5); extractRGBValues(array[rankint], &rval, &gval, &bval); where the rank is in [0.0 ... 1.0]. This function is meant to be simple and approximate. (5) Compare this with pixGetBinnedColor(), which generates equal width intensity bins and finds the average color in each bin.
Input: pixs (8 bpp, 32 bpp or colormapped)
factor (subsampling factor; integer >= 1)
rank (between 0.0 and 1.0; 1.0 is brightest, 0.0 is darkest)
&value (<return> pixel value corresponding to input rank)
Return: 0 if OK, 1 on error
Notes: (1) Simple function to get rank values of an image. For a color image, the median value (rank = 0.5) can be used to linearly remap the colors based on the median of a target image, using pixLinearMapToTargetColor().
| LEPT_DLL l_int32 pixGetRankValueMasked | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | factor, | ||
| l_float32 | rank, | ||
| l_float32 * | pval, | ||
| NUMA ** | pna | ||
| ) |
Input: pixs (8 bpp, or colormapped)
pixm (<optional> 1 bpp mask over which rank val is to be taken;
use all pixels if null)
x, y (UL corner of pixm relative to the UL corner of pixs;
can be < 0; these values are ignored if pixm is null)
factor (subsampling factor; integer >= 1)
rank (between 0.0 and 1.0; 1.0 is brightest, 0.0 is darkest)
&val (<return> pixel value corresponding to input rank)
&na (<optional return> of histogram)
Return: 0 if OK, 1 on error
Notes: (1) Computes the rank value of pixels in pixs that are under the fg of the optional mask. If the mask is null, it computes the average of the pixels in pixs. (2) Set the subsampling @factor > 1 to reduce the amount of computation. (3) Clipping of pixm (if it exists) to pixs is done in the inner loop. (4) Input x,y are ignored unless pixm exists. (5) The rank must be in [0.0 ... 1.0], where the brightest pixel has rank 1.0. For the median pixel value, use 0.5. (6) The histogram can optionally be returned, so that other rank values can be extracted without recomputing the histogram. In that case, just use numaHistogramGetValFromRank(na, rank, &val); on the returned Numa for additional rank values.
| LEPT_DLL l_int32 pixGetRankValueMaskedRGB | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | factor, | ||
| l_float32 | rank, | ||
| l_float32 * | prval, | ||
| l_float32 * | pgval, | ||
| l_float32 * | pbval | ||
| ) |
Input: pixs (32 bpp)
pixm (<optional> 1 bpp mask over which rank val is to be taken;
use all pixels if null)
x, y (UL corner of pixm relative to the UL corner of pixs;
can be < 0; these values are ignored if pixm is null)
factor (subsampling factor; integer >= 1)
rank (between 0.0 and 1.0; 1.0 is brightest, 0.0 is darkest)
&rval (<optional return> red component val for input rank)
&gval (<optional return> green component val for input rank)
&bval (<optional return> blue component val for input rank)
Return: 0 if OK, 1 on error
Notes: (1) Computes the rank component values of pixels in pixs that are under the fg of the optional mask. If the mask is null, it computes the average of the pixels in pixs. (2) Set the subsampling @factor > 1 to reduce the amount of computation. (4) Input x,y are ignored unless pixm exists. (5) The rank must be in [0.0 ... 1.0], where the brightest pixel has rank 1.0. For the median pixel value, use 0.5.
Input: pixs (1, 8, 32 bpp)
&data (<return> raster data in memory)
&nbytes (<return> number of bytes in data string)
Return: 0 if OK, 1 on error
Notes: (1) This returns the raster data as a byte string, padded to the byte. For 1 bpp, the first pixel is the MSbit in the first byte. For rgb, the bytes are in (rgb) order. This is the format required for flate encoding of pixels in a PostScript file.
| LEPT_DLL l_int32 pixGetRegionsBinary | ( | PIX * | pixs, |
| PIX ** | ppixhm, | ||
| PIX ** | ppixtm, | ||
| PIX ** | ppixtb, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (1 bpp, assumed to be 300 to 400 ppi)
&pixhm (<optional return> halftone mask)
&pixtm (<optional return> textline mask)
&pixtb (<optional return> textblock mask)
debug (flag: set to 1 for debug output)
Return: 0 if OK, 1 on error
Notes: (1) It is best to deskew the image before segmenting. (2) The debug flag enables a number of outputs. These are included to show how to generate and save/display these results.
Input: pix
&xres, &yres (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Input: pixs (32 bpp, or colormapped)
comp (one of {COLOR_RED, COLOR_GREEN, COLOR_BLUE,
L_ALPHA_CHANNEL})
Return: pixd (the selected 8 bpp component image of the
input 32 bpp image) or null on error
Notes: (1) Three calls to this function generate the r, g and b 8 bpp component images. This is much faster than generating the three images in parallel, by extracting a src pixel and setting the pixels of each component image from it. The reason is there are many more cache misses when writing to three output images simultaneously.
Input: pixs (colormapped)
comp (one of the set: {COLOR_RED, COLOR_GREEN, COLOR_BLUE})
Return: pixd (the selected 8 bpp component image of the
input cmapped image), or null on error
Notes: (1) In leptonica, we do not support alpha in colormaps.
pixGetRGBHistogram() Input: pixs (32 bpp rgb) sigbits (2-6, significant bits retained in the quantizer for each component of the input image) factor (subsampling factor; use 1 for no subsampling) Return: numa (histogram of colors, indexed by RGB components), or null on error
Notes: (1) This uses a simple, fast method of indexing into an rgb image. (2) The output is a 1D histogram of count vs. rgb-index, which uses red sigbits as the most significant and blue as the least. (3) This function produces the same result as pixMedianCutHisto().
| LEPT_DLL l_int32 pixGetRGBLine | ( | PIX * | pixs, |
| l_int32 | row, | ||
| l_uint8 * | bufr, | ||
| l_uint8 * | bufg, | ||
| l_uint8 * | bufb | ||
| ) |
Input: pixs (32 bpp)
row
bufr (array of red samples; size w bytes)
bufg (array of green samples; size w bytes)
bufb (array of blue samples; size w bytes)
Return: 0 if OK; 1 on error
Notes: (1) This puts rgb components from the input line in pixs into the given buffers.
| LEPT_DLL l_int32 pixGetRGBPixel | ( | PIX * | pix, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 * | prval, | ||
| l_int32 * | pgval, | ||
| l_int32 * | pbval | ||
| ) |
Input: pix (32 bpp rgb, not colormapped)
(x,y) pixel coords
&rval (<optional return> red component)
&gval (<optional return> green component)
&bval (<optional return> blue component)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixGetRowStats | ( | PIX * | pixs, |
| l_int32 | type, | ||
| l_int32 | nbins, | ||
| l_int32 | thresh, | ||
| l_float32 * | colvect | ||
| ) |
Input: pixs (8 bpp; not cmapped)
type (L_MEAN_ABSVAL, L_MEDIAN_VAL, L_MODE_VAL, L_MODE_COUNT)
nbins (of histogram for median and mode; ignored for mean)
thresh (on histogram for mode; ignored for mean and median)
colvect (vector of results gathered across the rows of pixs)
Return: 0 if OK, 1 on error
Notes: (1) This computes a column vector of statistics using each row of a Pix. The result is put in @colvect. (2) The @thresh parameter works with L_MODE_VAL only, and sets a minimum occupancy of the mode bin. If the occupancy of the mode bin is less than @thresh, the mode value is returned as 0. To always return the actual mode value, set @thresh = 0. (3) What is the meaning of this @thresh parameter? For each row, the total count in the histogram is w, the image width. So @thresh, relative to w, gives a measure of the ratio of the bin width to the width of the distribution. The larger @thresh, the narrower the distribution must be for the mode value to be returned (instead of returning 0). (4) If the Pix consists of a set of corresponding columns, one for each Pix in a Pixa, the width of the Pix is the number of Pix in the Pixa and the column vector can be stored as a column in a Pix of the same size as each Pix in the Pixa.
Input: pixs (1 bpp)
x, y (set one of these to -1; see notes)
minlength (minimum length of acceptable run)
Return: numa of fg runs, or null on error
Notes: (1) Action: this function computes the fg (black) and bg (white) pixel runlengths along the specified horizontal or vertical line, and returns a Numa of the "center" pixels of each fg run whose length equals or exceeds the minimum length. (2) This only works on horizontal and vertical lines. (3) For horizontal runs, set x = -1 and y to the value for all points along the raster line. For vertical runs, set y = -1 and x to the value for all points along the pixel column. (4) For horizontal runs, the points in the Numa are the x values in the center of fg runs that are of length at least 'minlength'. For vertical runs, the points in the Numa are the y values in the center of fg runs, again of length 'minlength' or greater. (5) If there are no fg runs along the line that satisfy the minlength constraint, the returned Numa is empty. This is not an error.
Input: pixs (1 bpp)
x1, y1, x2, y2
Return: numa, or null on error
Notes: (1) Action: this function uses the bresenham algorithm to compute the pixels along the specified line. It returns a Numa of the runlengths of the fg (black) and bg (white) runs, always starting with a white run. (2) If the first pixel on the line is black, the length of the first returned run (which is white) is 0.
| LEPT_DLL l_int32 pixGetSortedNeighborValues | ( | PIX * | pixs, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | conn, | ||
| l_int32 ** | pneigh, | ||
| l_int32 * | pnvals | ||
| ) |
Input: pixs (8, 16 or 32 bpp, with pixels labelled by c.c.)
x, y (location of pixel)
conn (4 or 8 connected neighbors)
&neigh (<return> array of integers, to be filled with
the values of the neighbors, if any)
&nvals (<return> the number of unique neighbor values found)
Return: 0 if OK, 1 on error
Notes: (1) The returned @neigh array is the unique set of neighboring pixel values, of size nvals, sorted from smallest to largest. The value 0, which represents background pixels that do not belong to any set of connected components, is discarded. (2) If there are no neighbors, this returns @neigh = NULL; otherwise, the caller must free the array. (3) For either 4 or 8 connectivity, the maximum number of unique neighbor values is 4.
Input: pix Return: ptr to existing text string
Notes: (1) The text string belongs to the pix. The caller must NOT free it!
| LEPT_DLL l_int32 pixGetWordBoxesInTextlines | ( | PIX * | pixs, |
| l_int32 | reduction, | ||
| l_int32 | minwidth, | ||
| l_int32 | minheight, | ||
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| BOXA ** | pboxad, | ||
| NUMA ** | pnai | ||
| ) |
Input: pixs (1 bpp, typ. 300 ppi)
reduction (1 for input res; 2 for 2x reduction of input res)
minwidth, minheight (of saved components; smaller are discarded)
maxwidth, maxheight (of saved components; larger are discarded)
&boxad (<return> word boxes sorted in textline line order)
&naindex (<optional return> index of textline for each word)
Return: 0 if OK, 1 on error
Notes: (1) The input should be at a resolution of about 300 ppi. The word masks can be computed at either 150 ppi or 300 ppi. For the former, set reduction = 2. (2) This is a special version of pixGetWordsInTextlines(), that just finds the word boxes in line order, with a numa giving the textline index for each word. See pixGetWordsInTextlines() for more details.
| LEPT_DLL l_int32 pixGetWordsInTextlines | ( | PIX * | pixs, |
| l_int32 | reduction, | ||
| l_int32 | minwidth, | ||
| l_int32 | minheight, | ||
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| BOXA ** | pboxad, | ||
| PIXA ** | ppixad, | ||
| NUMA ** | pnai | ||
| ) |
Input: pixs (1 bpp, typ. 300 ppi)
reduction (1 for input res; 2 for 2x reduction of input res)
minwidth, minheight (of saved components; smaller are discarded)
maxwidth, maxheight (of saved components; larger are discarded)
&boxad (<return> word boxes sorted in textline line order)
&pixad (<return> word images sorted in textline line order)
&naindex (<return> index of textline for each word)
Return: 0 if OK, 1 on error
Notes: (1) The input should be at a resolution of about 300 ppi. The word masks and word images can be computed at either 150 ppi or 300 ppi. For the former, set reduction = 2. (2) The four size constraints on saved components are all scaled by @reduction. (3) The result are word images (and their b.b.), extracted in textline order, at either full res or 2x reduction, and with a numa giving the textline index for each word. (4) The pixa and boxa interfaces should make this type of application simple to put together. The steps are:
| LEPT_DLL PIX* pixGlobalNormNoSatRGB | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 | factor, | ||
| l_float32 | rank | ||
| ) |
Input: pixd (<optional> null, existing or equal to pixs)
pixs (32 bpp rgb)
rval, gval, bval (pixel values in pixs that are
linearly mapped to mapval; but see below)
factor (subsampling factor; integer >= 1)
rank (between 0.0 and 1.0; typ. use a value near 1.0)
Return: pixd (32 bpp rgb), or null on error
Notes: (1) This is a version of pixGlobalNormRGB(), where the output intensity is scaled back so that a controlled fraction of pixel components is allowed to saturate. See comments in pixGlobalNormRGB(). (2) The value of pixd determines if the results are written to a new pix (use NULL), in-place to pixs (use pixs), or to some other existing pix. (3) This does a global normalization of an image where the r,g,b color components are not balanced. Thus, white in pixs is represented by a set of r,g,b values that are not all 255. (4) The input values (rval, gval, bval) can be chosen to be the color that, after normalization, becomes white background. For images that are mostly background, the closer these values are to the median component values, the closer the resulting background will be to gray, becoming white at the brightest places. (5) The mapval used in pixGlobalNormRGB() is computed here to avoid saturation of any component in the image (save for a fraction of the pixels given by the input rank value).
| LEPT_DLL PIX* pixGlobalNormRGB | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_int32 | mapval | ||
| ) |
Input: pixd (<optional> null, existing or equal to pixs)
pixs (32 bpp rgb, or colormapped)
rval, gval, bval (pixel values in pixs that are
linearly mapped to mapval)
mapval (use 255 for mapping to white)
Return: pixd (32 bpp rgb or colormapped), or null on error
Notes: (1) The value of pixd determines if the results are written to a new pix (use NULL), in-place to pixs (use pixs), or to some other existing pix. (2) This does a global normalization of an image where the r,g,b color components are not balanced. Thus, white in pixs is represented by a set of r,g,b values that are not all 255. (3) The input values (rval, gval, bval) should be chosen to represent the gray color (mapval, mapval, mapval) in src. Thus, this function will map (rval, gval, bval) to that gray color. (4) Typically, mapval = 255, so that (rval, gval, bval) corresponds to the white point of src. In that case, these parameters should be chosen so that few pixels have higher values. (5) In all cases, we do a linear TRC separately on each of the components, saturating at 255. (6) If the input pix is 8 bpp without a colormap, you can get this functionality with mapval = 255 by calling: pixGammaTRC(pixd, pixs, 1.0, 0, bgval); where bgval is the value you want to be mapped to 255. Or more generally, if you want bgval to be mapped to mapval: pixGammaTRC(pixd, pixs, 1.0, 0, 255 * bgval / mapval);
| LEPT_DLL PIX* pixGrayMorphSequence | ( | PIX * | pixs, |
| const char * | sequence, | ||
| l_int32 | dispsep, | ||
| l_int32 | dispy | ||
| ) |
Input: pixs
sequence (string specifying sequence)
dispsep (controls debug display of each result in the sequence:
0: no output
> 0: gives horizontal separation in pixels between
successive displays
< 0: pdf output; abs(dispsep) is used for naming)
dispy (if dispsep > 0, this gives the y-value of the
UL corner for display; otherwise it is ignored)
Return: pixd, or null on error
Notes: (1) This works on 8 bpp grayscale images. (2) This runs a pipeline of operations; no branching is allowed. (3) This only uses brick SELs. (4) A new image is always produced; the input image is not changed. (5) This contains an interpreter, allowing sequences to be generated and run. (6) The format of the sequence string is defined below. (7) In addition to morphological operations, the composite morph/subtract tophat can be performed. (8) Sel sizes (width, height) must each be odd numbers. (9) Intermediate results can optionally be displayed (10) The sequence string is formatted as follows:
Input: pixs (8 bpp grayscale without cmap)
cmap (to quantize to; of dest pix)
mindepth (minimum depth of pixd: can be 2, 4 or 8 bpp)
Return: pixd (2, 4 or 8 bpp, colormapped), or null on error
Notes: (1) In use, pixs is an 8 bpp grayscale image without a colormap. If there is an existing colormap, a warning is issued and a copy of the input pixs is returned.
| LEPT_DLL PIX* pixGrayQuantFromHisto | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| PIX * | pixm, | ||
| l_float32 | minfract, | ||
| l_int32 | maxsize | ||
| ) |
Input: pixd (<optional> quantized pix with cmap; can be null)
pixs (8 bpp gray input pix; not cmapped)
pixm (<optional> mask over pixels in pixs to quantize)
minfract (minimum fraction of pixels in a set of adjacent
histo bins that causes the set to be automatically
set aside as a color in the colormap; must be
at least 0.01)
maxsize (maximum number of adjacent bins allowed to represent
a color, regardless of the population of pixels
in the bins; must be at least 2)
Return: pixd (8 bpp, cmapped), or null on error
Notes: (1) This is useful for quantizing images with relatively few colors, but which may have both color and gray pixels. If there are color pixels, it is assumed that an input rgb image has been color quantized first so that:
| LEPT_DLL PIX* pixHalfEdgeByBandpass | ( | PIX * | pixs, |
| l_int32 | sm1h, | ||
| l_int32 | sm1v, | ||
| l_int32 | sm2h, | ||
| l_int32 | sm2v | ||
| ) |
Input: pixs (8 bpp gray or 32 bpp rgb)
sm1h, sm1v ("half-widths" of smoothing filter sm1)
sm2h, sm2v ("half-widths" of smoothing filter sm2)
(require sm2 != sm1)
Return: pixd, or null on error
Notes: (1) We use symmetric smoothing filters of odd dimension, typically use 3, 5, 7, etc. The smoothing parameters for these are 1, 2, 3, etc. The filter size is related to the smoothing parameter by size = 2 * smoothing + 1 (2) Because we take the difference of two lowpass filters, this is actually a bandpass filter. (3) We allow both filters to be anisotropic. (4) Consider either the h or v component of the 2 filters. Depending on whether sm1 > sm2 or sm2 > sm1, we get different halves of the smoothed gradients (or "edges"). This difference of smoothed signals looks more like a second derivative of a transition, which we rectify by not allowing the signal to go below zero. If sm1 < sm2, the sm2 transition is broader, so the difference between sm1 and sm2 signals is positive on the upper half of the transition. Likewise, if sm1 > sm2, the sm1 - sm2 signal difference is positive on the lower half of the transition.
| LEPT_DLL l_int32 pixHasHighlightRed | ( | PIX * | pixs, |
| l_int32 | factor, | ||
| l_float32 | fract, | ||
| l_float32 | fthresh, | ||
| l_int32 * | phasred, | ||
| l_float32 * | pratio, | ||
| PIX ** | ppixdb | ||
| ) |
Input: pixs (32 bpp rgb)
factor (subsampling; an integer >= 1; use 1 for all pixels)
fract (threshold fraction of all image pixels)
fthresh (threshold on a function of the components; typ. ~2.5)
&hasred (<return> 1 if red pixels are above threshold)
&ratio (<optional return> normalized fraction of threshold
red pixels that is actually observed)
&pixdb (<optional return> seed pixel mask)
Return: 0 if OK, 1 on error
Notes: (1) Pixels are identified as red if they satisfy two conditions: (a) The components satisfy (R-B)/B > @fthresh (red or dark fg) (b) The red component satisfied R > 128 (red or light bg) Masks are generated for (a) and (b), and the intersection gives the pixels that are red but not either light bg or dark fg. (2) A typical value for fract = 0.0001, which gives sensitivity to an image where a small fraction of the pixels are printed in red. (3) A typical value for fthresh = 2.5. Higher values give less sensitivity to red, and fewer false positives.
| LEPT_DLL l_int32 pixHaustest | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| PIX * | pix3, | ||
| PIX * | pix4, | ||
| l_float32 | delx, | ||
| l_float32 | dely, | ||
| l_int32 | maxdiffw, | ||
| l_int32 | maxdiffh | ||
| ) |
Input: pix1 (new pix, not dilated)
pix2 (new pix, dilated)
pix3 (exemplar pix, not dilated)
pix4 (exemplar pix, dilated)
delx (x comp of centroid difference)
dely (y comp of centroid difference)
maxdiffw (max width difference of pix1 and pix2)
maxdiffh (max height difference of pix1 and pix2)
Return: 0 (FALSE) if no match, 1 (TRUE) if the new
pix is in the same class as the exemplar.
Note: we check first that the two pix are roughly the same size. Only if they meet that criterion do we compare the bitmaps. The Hausdorff is a 2-way check. The centroid difference is used to align the two images to the nearest integer for each of the checks. These check that the dilated image of one contains ALL the pixels of the undilated image of the other. Checks are done in both direction. A single pixel not contained in either direction results in failure of the test.
Input: pixs (8 bpp, filling mask)
height (of seed below the filling maskhdome; must be >= 0)
connectivity (4 or 8)
Return: pixd (8 bpp), or null on error
Notes: (1) It is more efficient to use a connectivity of 4 for the fill. (2) This fills bumps to some level, and extracts the unfilled part of the bump. To extract the troughs of basins, first invert pixs and then apply pixHDome(). (3) It is useful to compare the HDome operation with the TopHat. The latter extracts peaks or valleys that have a width not exceeding the size of the structuring element used in the opening or closing, rsp. The height of the peak is irrelevant. By contrast, for the HDome, the gray seedfill is used to extract all peaks that have a height not exceeding a given value, regardless of their width! (4) Slightly more precisely, suppose you set 'height' = 40. Then all bumps in pixs with a height greater than or equal to 40 become, in pixd, bumps with a max value of exactly 40. All shorter bumps have a max value in pixd equal to the height of the bump. (5) The method: the filling mask, pixs, is the image whose peaks are to be extracted. The height of a peak is the distance between the top of the peak and the highest "leak" to the outside – think of a sombrero, where the leak occurs at the highest point on the rim. (a) Generate a seed, pixd, by subtracting some value, p, from each pixel in the filling mask, pixs. The value p is the 'height' input to this function. (b) Fill in pixd starting with this seed, clipping by pixs, in the way described in seedfillGrayLow(). The filling stops before the peaks in pixs are filled. For peaks that have a height > p, pixd is filled to the level equal to the (top-of-the-peak - p). For peaks of height < p, the peak is left unfilled from its highest saddle point (the leak to the outside). (c) Subtract the filled seed (pixd) from the filling mask (pixs). Note that in this procedure, everything is done starting with the filling mask, pixs. (6) For segmentation, the resulting image, pixd, can be thresholded and used as a seed for another filling operation.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
sel
Return: pixd
Notes: (1) The hit-miss transform erodes the src, using both hits and misses in the Sel. It ANDs the shifted src for hits and ANDs the inverted shifted src for misses. (2) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (3) For clarity, if the case is known, use these patterns: (a) pixd = pixHMT(NULL, pixs, ...); (b) pixHMT(pixs, pixs, ...); (c) pixHMT(pixd, pixs, ...); (4) The size of the result is determined by pixs.
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
sel name
Return: pixd
Notes: (1) This simply adds a 32 pixel border, calls the appropriate pixFHMTGen_*(), and removes the border. See notes below for that function.
Top-level fast hit-miss transform with auto-generated sels
PIX *pixHMTDwa_1() PIX *pixFHMTGen_1()
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
sel name
Return: pixd
Notes: (1) This simply adds a 32 pixel border, calls the appropriate pixFHMTGen_*(), and removes the border. See notes below for that function.
Input: pixs (1 bpp)
connectivity (4 or 8)
Return: pixd (inverted image of all holes), or null on error
Action: (1) Start with 1-pixel black border on otherwise white pixd (2) Use the inverted pixs as the filling mask to fill in all the pixels from the border to the pixs foreground (3) OR the result with pixs to have an image with all ON pixels except for the holes. (4) Invert the result to get the holes as foreground
Notes: (1) To get 4-c.c. holes of the 8-c.c. as foreground, use 4-connected filling; to get 8-c.c. holes of the 4-c.c. as foreground, use 8-connected filling.
Input: pixd (<optional>, this can be null, equal to pixs,
or different from pixs)
pixs (no restrictions on depth)
yloc (location of horizontal line, measured from origin)
angle (in radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, always
Notes: (1) There are 3 cases: (a) pixd == null (make a new pixd) (b) pixd == pixs (in-place) (c) pixd != pixs (2) For these three cases, use these patterns, respectively: pixd = pixHShear(NULL, pixs, ...); pixHShear(pixs, pixs, ...); pixHShear(pixd, pixs, ...); (3) This shear leaves the horizontal line of pixels at y = yloc invariant. For a positive shear angle, pixels above this line are shoved to the right, and pixels below this line move to the left. (4) With positive shear angle, this can be used, along with pixVShear(), to perform a cw rotation, either with 2 shears (for small angles) or in the general case with 3 shears. (5) Changing the value of yloc is equivalent to translating the result horizontally. (6) This brings in 'incolor' pixels from outside the image. (7) For in-place operation, pixs cannot be colormapped, because the in-place operation only blits in 0 or 1 bits, not an arbitrary colormap index. (8) The angle is brought into the range [-pi, -pi]. It is not permitted to be within MIN_DIFF_FROM_HALF_PI radians from either -pi/2 or pi/2.
Input: pixd (<optional>, if not null, must be equal to pixs)
pixs
angle (in radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, or null on error.
Notes: (1) See pixHShear() for usage. (2) This does a horizontal shear about the center, with (+) shear pushing increasingly leftward (-x) with increasing y.
Input: pixd (<optional>, if not null, must be equal to pixs)
pixs
angle (in radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, or null on error.
Notes: (1) See pixHShear() for usage. (2) This does a horizontal shear about the UL corner, with (+) shear pushing increasingly leftward (-x) with increasing y.
Input: pixs
yloc (location of horizontal line, measured from origin)
angle (in radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place version of pixHShear(); see comments there. (2) This brings in 'incolor' pixels from outside the image. (3) pixs cannot be colormapped, because the in-place operation only blits in 0 or 1 bits, not an arbitrary colormap index. (4) Does a horizontal full-band shear about the line with (+) shear pushing increasingly leftward (-x) with increasing y.
Input: pixs (8 bpp or 32 bpp, or colormapped)
yloc (location of horizontal line, measured from origin)
angle (in radians, in range (-pi/2 ... pi/2))
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd (sheared), or null on error
Notes: (1) This does horizontal shear with linear interpolation for accurate results on 8 bpp gray, 32 bpp rgb, or cmapped images. It is relatively slow compared to the sampled version implemented by rasterop, but the result is much smoother. (2) This shear leaves the horizontal line of pixels at y = yloc invariant. For a positive shear angle, pixels above this line are shoved to the right, and pixels below this line move to the left. (3) Any colormap is removed. (4) The angle is brought into the range [-pi/2 + del, pi/2 - del], where del == MIN_DIFF_FROM_HALF_PI.
| LEPT_DLL l_int32 pixHtmlViewer | ( | const char * | dirin, |
| const char * | dirout, | ||
| const char * | rootname, | ||
| l_int32 | thumbwidth, | ||
| l_int32 | viewwidth, | ||
| l_int32 | copyorig | ||
| ) |
Input: dirin: directory of input image files
dirout: directory for output files
rootname: root name for output files
thumbwidth: width of thumb images
(in pixels; use 0 for default)
viewwidth: maximum width of view images (no up-scaling)
(in pixels; use 0 for default)
copyorig: 1 to copy originals to dirout; 0 otherwise
Return: 0 if OK; 1 on error
Notes: (1) The thumb and view reduced images are generated, along with two html files: <rootname>.html and <rootname>-links.html (2) The thumb and view files are named <rootname>_thumb_xxx.jpg <rootname>_view_xxx.jpg With this naming scheme, any number of input directories of images can be processed into views and thumbs and placed in the same output directory.
Input: w, h (of accumulate array)
offset (initialize the 32 bpp to have this
value; not more than 0x40000000)
Return: pixd (32 bpp), or null on error
Notes: (1) The offset must be >= 0. (2) The offset is used so that we can do arithmetic with negative number results on l_uint32 data; it prevents the l_uint32 data from going negative. (3) Because we use l_int32 intermediate data results, these should never exceed the max of l_int32 (0x7fffffff). We do not permit the offset to be above 0x40000000, which is half way between 0 and the max of l_int32. (4) The same offset should be used for initialization, multiplication by a constant, and final extraction! (5) If you're only adding positive values, offset can be 0.
Input: pixs (binary)
sela
type (L_MORPH_DILATE, etc.)
Return: pixd (intersection of the specified morphological operation
on pixs for each Sel in the Sela), or null on error
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs
Return: pixd, or null on error
Notes: (1) This inverts pixs, for all pixel depths. (2) There are 3 cases: (a) pixd == null, ~src --> new pixd (b) pixd == pixs, ~src --> src (in-place) (c) pixd != pixs, ~src --> input pixd (3) For clarity, if the case is known, use these patterns: (a) pixd = pixInvert(NULL, pixs); (b) pixInvert(pixs, pixs); (c) pixInvert(pixd, pixs);
| LEPT_DLL l_int32 pixItalicWords | ( | PIX * | pixs, |
| BOXA * | boxaw, | ||
| PIX * | pixw, | ||
| BOXA ** | pboxa, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixs (1 bpp)
boxaw (<optional> word bounding boxes; can be NULL)
pixw (<optional> word box mask; can be NULL)
&boxa (<return> boxa of italic words)
debugflag (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) You can input the bounding boxes for the words in one of two forms: as bounding boxes (@boxaw) or as a word mask with the word bounding boxes filled (@pixw). For example, to compute @pixw, you can use pixWordMaskByDilation(). (2) Alternatively, you can set both of these inputs to NULL, in which case the word mask is generated here. This is done by dilating and closing the input image to connect letters within a word, while leaving the words separated. The parameters are chosen under the assumption that the input is 10 to 12 pt text, scanned at about 300 ppi. (3) sel_ital1 and sel_ital2 detect the right edges that are nearly vertical, at approximately the angle of italic strokes. We use the right edge to avoid getting seeds from lower-case 'y'. The typical italic slant has a smaller angle with the vertical than the 'W', so in most cases we will not trigger on the slanted lines in the 'W'. (4) Note that sel_ital2 is shorter than sel_ital1. It is more appropriate for a typical font scanned at 200 ppi.
| LEPT_DLL PIX* pixLinearMapToTargetColor | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_uint32 | srcval, | ||
| l_uint32 | dstval | ||
| ) |
Input: pixd (<optional>; either NULL or equal to pixs for in-place)
pixs (32 bpp rgb)
srcval (source color: 0xrrggbb00)
dstval (target color: 0xrrggbb00)
Return: pixd (with all pixels mapped based on the srcval/destval
mapping), or pixd on error
Notes: (1) For each component (r, b, g) separately, this does a piecewise linear mapping of the colors in pixs to colors in pixd. If rs and rd are the red src and dest components in @srcval and @dstval, then the range [0 ... rs] in pixs is mapped to [0 ... rd] in pixd. Likewise, the range [rs ... 255] in pixs is mapped to [rd ... 255] in pixd. And similarly for green and blue. (2) The mapping will in general change the hue of the pixels. However, if the src and dst targets are related by a transformation given by pixelFractionalShift(), the hue is invariant. (3) For inplace operation, call it this way: pixLinearMapToTargetColor(pixs, pixs, ... ) (4) For generating a new pixd: pixd = pixLinearMapToTargetColor(NULL, pixs, ...)
| LEPT_DLL PIX* pixLinearTRCTiled | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| PIX * | pixmin, | ||
| PIX * | pixmax | ||
| ) |
Input: pixd (<optional> 8 bpp)
pixs (8 bpp, not colormapped)
sx, sy (tile dimensions)
pixmin (pix of min values in tiles)
pixmax (pix of max values in tiles)
Return: pixd always
Notes: (1) pixd can be equal to pixs (in-place operation) or null (makes a new pixd). (2) sx and sy give the tile size; they are typically at least 20. (3) pixmin and pixmax are generated by pixMinMaxTiles() (4) For each tile, this does a linear expansion of the dynamic range so that the min value in the tile becomes 0 and the max value in the tile becomes 255. (5) The LUTs that do the mapping are generated as needed and stored for reuse in an integer array within the ptr array iaa[].
| LEPT_DLL l_int32 pixLocalExtrema | ( | PIX * | pixs, |
| l_int32 | maxmin, | ||
| l_int32 | minmax, | ||
| PIX ** | ppixmin, | ||
| PIX ** | ppixmax | ||
| ) |
Input: pixs (8 bpp)
maxmin (max allowed for the min in a 3x3 neighborhood;
use 0 for default which is to have no upper bound)
minmax (min allowed for the max in a 3x3 neighborhood;
use 0 for default which is to have no lower bound)
&ppixmin (<optional return> mask of local minima)
&ppixmax (<optional return> mask of local maxima)
Return: 0 if OK, 1 on error
Notes: (1) This gives the actual local minima and maxima. A local minimum is a pixel whose surrounding pixels all have values at least as large, and likewise for a local maximum. For the local minima, @maxmin is the upper bound for the value of pixs. Likewise, for the local maxima, @minmax is the lower bound for the value of pixs. (2) The minima are found by starting with the erosion-and-equality approach of pixSelectedLocalExtrema(). This is followed by a qualification step, where each c.c. in the resulting minimum mask is extracted, the pixels bordering it are located, and they are queried. If all of those pixels are larger than the value of that minimum, it is a true minimum and its c.c. is saved; otherwise the c.c. is rejected. Note that if a bordering pixel has the same value as the minimum, it must then have a neighbor that is smaller, so the component is not a true minimum. (3) The maxima are found by inverting the image and looking for the minima there. (4) The generated masks can be used as markers for further operations.
Input: pixs (any depth)
thresh (for binarization of edge filter output; typ. 20)
&pixb (<optional return> binarized edge filtered input image)
&pixm (<optional return> mask over barcodes)
Return: boxa (location of barcodes), or null if none found or on error
Input: pixs (1 bpp) Return: pixd (32 bpp rgb), or null on error
Notes: (1) This generates an RGB image where each component value is coded depending on the (x.y) location and the size of the fg connected component that the pixel in pixs belongs to. It is independent of the 4-fold orthogonal orientation, and only weakly depends on translations and small angle rotations. Background pixels are black. (2) Such encodings can be compared between two 1 bpp images by performing this transform and calculating the "earth-mover" distance on the resulting R,G,B histograms.
Input: pixs (1 bpp)
dist (blending distance; typically 10 - 30)
&box (<optional return>, use null to get the full size
Return: pixd (8 bpp gray), or null on error
Notes: (1) This generates a 8 bpp alpha layer that is opaque (256) over the FG of pixs, and goes transparent linearly away from the FG pixels, decaying to 0 (transparent) is an 8-connected distance given by @dist. If @dist == 0, this does a simple conversion from 1 to 8 bpp. (2) If &box == NULL, this returns an alpha mask that is the full size of pixs. Otherwise, the returned mask pixd covers just the FG pixels of pixs, expanded by @dist in each direction (if possible), and the returned box gives the location of the returned mask relative to pixs. (3) This is useful for painting through a mask and allowing blending of the painted image with an underlying image in the mask background for pixels near foreground mask pixels. For example, with an underlying rgb image pix1, an overlaying image rgb pix2, binary mask pixm, and dist > 0, this blending is achieved with: pix3 = pixMakeAlphaFromMask(pixm, dist, &box); boxGetGeometry(box, &x, &y, NULL, NULL); pix4 = pixBlendWithGrayMask(pix1, pix2, pix3, x, y);
| LEPT_DLL PIX* pixMakeFrameMask | ( | l_int32 | w, |
| l_int32 | h, | ||
| l_float32 | hf1, | ||
| l_float32 | hf2, | ||
| l_float32 | vf1, | ||
| l_float32 | vf2 | ||
| ) |
Input: w, h (dimensions of output 1 bpp pix)
hf1 (horizontal fraction of half-width at outer frame bdry)
hf2 (horizontal fraction of half-width at inner frame bdry)
vf1 (vertical fraction of half-width at outer frame bdry)
vf2 (vertical fraction of half-width at inner frame bdry)
Return: pixd (1 bpp), or null on error.
Notes: (1) This makes an arbitrary 1-component mask with a centered frame. Input fractions are in [0.0 ... 1.0]; hf1 <= hf2 and vf1 <= vf2. Horizontal and vertical frame widths are independently specified. (2) Special case: to get a full fg mask, set all input values to 0.0. An empty fg mask has hf1 = vf1 = 1.0. A fg rectangle with no hole has hf2 == 1.0 or hv2 == 1.0. (3) The vertical thickness of the horizontal mask parts is 0.5 * (vf2 - vf1) * h. The horizontal thickness of the vertical mask parts is 0.5 * (hf2 - hf1) * w.
Input: pixs (HSV colorspace)
factor (subsampling factor; integer)
&nahue (<optional return> hue histogram)
&nasat (<optional return> saturation histogram)
Return: pixd (32 bpp histogram in hue and saturation), or null on error
Notes: (1) pixs is a 32 bpp image in HSV colorspace; hue is in the "red" byte, saturation is in the "green" byte. (2) In pixd, hue is displayed vertically; saturation horizontally. The dimensions of pixd are w = 256, h = 240, and the depth is 32 bpp. The value at each point is simply the number of pixels found at that value of hue and saturation.
Input: pixs (HSV colorspace)
factor (subsampling factor; integer)
&nahue (<optional return> hue histogram)
&naval (<optional return> max intensity (value) histogram)
Return: pixd (32 bpp histogram in hue and value), or null on error
Notes: (1) pixs is a 32 bpp image in HSV colorspace; hue is in the "red" byte, max intensity ("value") is in the "blue" byte. (2) In pixd, hue is displayed vertically; intensity horizontally. The dimensions of pixd are w = 256, h = 240, and the depth is 32 bpp. The value at each point is simply the number of pixels found at that value of hue and intensity.
Input: pixs (HSV colorspace)
factor (subsampling factor; integer)
&nasat (<optional return> sat histogram)
&naval (<optional return> max intensity (value) histogram)
Return: pixd (32 bpp histogram in sat and value), or null on error
Notes: (1) pixs is a 32 bpp image in HSV colorspace; sat is in the "green" byte, max intensity ("value") is in the "blue" byte. (2) In pixd, sat is displayed vertically; intensity horizontally. The dimensions of pixd are w = 256, h = 256, and the depth is 32 bpp. The value at each point is simply the number of pixels found at that value of saturation and intensity.
Input: pixs (2, 4 or 8 bpp; can be colormapped)
tab (256-entry LUT; 1 means to write to mask)
Return: pixd (1 bpp mask), or null on error
Notes: (1) This generates a 1 bpp mask image, where a 1 is written in the mask for each pixel in pixs that has a value corresponding to a 1 in the LUT. (2) The LUT should be of size 256.
| LEPT_DLL PIX* pixMakeRangeMaskHS | ( | PIX * | pixs, |
| l_int32 | huecenter, | ||
| l_int32 | huehw, | ||
| l_int32 | satcenter, | ||
| l_int32 | sathw, | ||
| l_int32 | regionflag | ||
| ) |
Input: pixs (32 bpp rgb)
huecenter (center value of hue range)
huehw (half-width of hue range)
satcenter (center value of saturation range)
sathw (half-width of saturation range)
regionflag (L_INCLUDE_REGION, L_EXCLUDE_REGION)
Return: pixd (1 bpp mask over selected pixels), or null on error
Notes: (1) The pixels are selected based on the specified ranges of hue and saturation. For selection or exclusion, the pixel HS component values must be within both ranges. Care must be taken in finding the hue range because of wrap-around. (2) Use @regionflag == L_INCLUDE_REGION to take only those pixels within the rectangular region specified in HS space. Use @regionflag == L_EXCLUDE_REGION to take all pixels except those within the rectangular region specified in HS space.
| LEPT_DLL PIX* pixMakeRangeMaskHV | ( | PIX * | pixs, |
| l_int32 | huecenter, | ||
| l_int32 | huehw, | ||
| l_int32 | valcenter, | ||
| l_int32 | valhw, | ||
| l_int32 | regionflag | ||
| ) |
Input: pixs (32 bpp rgb)
huecenter (center value of hue range)
huehw (half-width of hue range)
valcenter (center value of max intensity range)
valhw (half-width of max intensity range)
regionflag (L_INCLUDE_REGION, L_EXCLUDE_REGION)
Return: pixd (1 bpp mask over selected pixels), or null on error
Notes: (1) The pixels are selected based on the specified ranges of hue and max intensity values. For selection or exclusion, the pixel HV component values must be within both ranges. Care must be taken in finding the hue range because of wrap-around. (2) Use @regionflag == L_INCLUDE_REGION to take only those pixels within the rectangular region specified in HV space. Use @regionflag == L_EXCLUDE_REGION to take all pixels except those within the rectangular region specified in HV space.
| LEPT_DLL PIX* pixMakeRangeMaskSV | ( | PIX * | pixs, |
| l_int32 | satcenter, | ||
| l_int32 | sathw, | ||
| l_int32 | valcenter, | ||
| l_int32 | valhw, | ||
| l_int32 | regionflag | ||
| ) |
Input: pixs (32 bpp rgb)
satcenter (center value of saturation range)
sathw (half-width of saturation range)
valcenter (center value of max intensity range)
valhw (half-width of max intensity range)
regionflag (L_INCLUDE_REGION, L_EXCLUDE_REGION)
Return: pixd (1 bpp mask over selected pixels), or null on error
Notes: (1) The pixels are selected based on the specified ranges of saturation and max intensity (val). For selection or exclusion, the pixel SV component values must be within both ranges. (2) Use @regionflag == L_INCLUDE_REGION to take only those pixels within the rectangular region specified in SV space. Use @regionflag == L_EXCLUDE_REGION to take all pixels except those within the rectangular region specified in SV space.
Input: pixd (<optional> may be null)
pixs (any depth; not cmapped)
boxa (of boxes, to paint)
op (L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS)
Return: pixd (with masking op over the boxes), or null on error
Notes: (1) This can be used with: pixd = NULL (makes a new pixd) pixd = pixs (in-place) (2) If pixd == NULL, this first makes a copy of pixs, and then bit-twiddles over the boxes. Otherwise, it operates directly on pixs. (3) This simple function is typically used with 1 bpp images. It uses the 1-image rasterop function, rasteropUniLow(), to set, clear or flip the pixels in pixd. (4) If you want to generate a 1 bpp mask of ON pixels from the boxes in a Boxa, in a pix of size (w,h): pix = pixCreate(w, h, 1); pixMaskBoxa(pix, pix, boxa, L_SET_PIXELS);
Input: pixs (1 bpp)
connectivity (4 or 8)
&boxa (<optional return> bounding boxes of c.c.)
Return: pixd (1 bpp mask over the c.c.), or null on error
Notes: (1) This generates a mask image with ON pixels over the b.b. of the c.c. in pixs. If there are no ON pixels in pixs, pixd will also have no ON pixels.
| LEPT_DLL PIX* pixMaskedThreshOnBackgroundNorm | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | thresh, | ||
| l_int32 | mincount, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy, | ||
| l_float32 | scorefract, | ||
| l_int32 * | pthresh | ||
| ) |
pixMaskedThreshOnBackgroundNorm()
Input: pixs (8 bpp grayscale; not colormapped)
pixim (<optional> 1 bpp 'image' mask; can be null)
sx, sy (tile size in pixels)
thresh (threshold for determining foreground)
mincount (min threshold on counts in a tile)
smoothx (half-width of block convolution kernel width)
smoothy (half-width of block convolution kernel height)
scorefract (fraction of the max Otsu score; typ. ~ 0.1)
&thresh (<optional return> threshold value that was
used on the normalized image)
Return: pixd (1 bpp thresholded image), or null on error
Notes: (1) This begins with a standard background normalization. Additionally, there is a flexible background norm, that will adapt to a rapidly varying background, and this puts white pixels in the background near regions with significant foreground. The white pixels are turned into a 1 bpp selection mask by binarization followed by dilation. Otsu thresholding is performed on the input image to get an estimate of the threshold in the non-mask regions. The background normalized image is thresholded with two different values, and the result is combined using the selection mask. (2) Note that the numbers 255 (for bgval target) and 190 (for thresholding on pixn) are tied together, and explicitly defined in this function. (3) See pixBackgroundNorm() for meaning and typical values of input parameters. For a start, you can try: sx, sy = 10, 15 thresh = 100 mincount = 50 smoothx, smoothy = 2
Input: pixs (32 bpp rgb or 8 bpp colormapped)
threshdiff (threshold for minimum of the max difference
between components)
mindist (minimum allowed distance from nearest non-color pixel)
Return: pixd (1 bpp, mask over color pixels), or null on error
Notes: (1) The generated mask identifies each pixel as either color or non-color. For a pixel to be color, it must satisfy two constraints: (a) The max difference between the r,g and b components must equal or exceed a threshold @threshdiff. (b) It must be at least @mindist (in an 8-connected way) from the nearest non-color pixel. (2) The distance constraint (b) is only applied if @mindist > 1. For example, if @mindist == 2, the color pixels identified by (a) are eroded by a 3x3 Sel. In general, the Sel size for erosion is 2 * (@mindist - 1) + 1. Why have this constraint? In scanned images that are essentially gray, color artifacts are typically introduced in transition regions near sharp edges that go from dark to light, so this allows these transition regions to be removed.
| LEPT_DLL PIX* pixMaskOverColorRange | ( | PIX * | pixs, |
| l_int32 | rmin, | ||
| l_int32 | rmax, | ||
| l_int32 | gmin, | ||
| l_int32 | gmax, | ||
| l_int32 | bmin, | ||
| l_int32 | bmax | ||
| ) |
Input: pixs (32 bpp rgb or 8 bpp colormapped)
rmin, rmax (min and max allowed values for red component)
gmin, gmax
bmin, bmax
Return: pixd (1 bpp, mask over color pixels), or null on error
Input: pixs (4, 8, 16 or 32 bpp source)
type (L_LINEAR_SCALE or L_LOG_SCALE)
Return: pixd (8 bpp), or null on error
Notes: (1) Scales pixel values to fit maximally within the dest 8 bpp pixd (2) Uses a LUT for log scaling
Input: pix (8 bpp)
box (region to compute mean value)
pixma (mean accumulator)
&val (<return> mean value
Return: 0 if OK, 1 on error
Notes: (1) This function is intended to be used for many rectangles on the same image. It can find the mean within a rectangle in O(1), independent of the size of the rectangle.
Input: pixs (8 bpp grayscale) Return: dpix (64 bit array), or null on error
Notes: (1) Similar to pixBlockconvAccum(), this computes the sum of the squares of the pixel values in such a way that the value at (i,j) is the sum of all squares in the rectangle from the origin to (i,j). (2) The general recursion relation (v are squared pixel values) is a(i,j) = v(i,j) + a(i-1, j) + a(i, j-1) - a(i-1, j-1) For the first line, this reduces to the special case a(i,j) = v(i,j) + a(i, j-1) For the first column, the special case is a(i,j) = v(i,j) + a(i-1, j)
| LEPT_DLL l_int32 pixMeasureEdgeSmoothness | ( | PIX * | pixs, |
| l_int32 | side, | ||
| l_int32 | minjump, | ||
| l_int32 | minreversal, | ||
| l_float32 * | pjpl, | ||
| l_float32 * | pjspl, | ||
| l_float32 * | prpl, | ||
| const char * | debugfile | ||
| ) |
Input: pixs (1 bpp)
side (L_FROM_LEFT, L_FROM_RIGHT, L_FROM_TOP, L_FROM_BOT)
minjump (minimum jump to be counted; >= 1)
minreversal (minimum reversal size for new peak or valley)
&jpl (<optional return> jumps/length: number of jumps,
normalized to length of component side)
&jspl (<optional return> jumpsum/length: sum of all
sufficiently large jumps, normalized to length
of component side)
&rpl (<optional return> reversals/length: number of
peak-to-valley or valley-to-peak reversals,
normalized to length of component side)
debugfile (<optional> displays constructed edge; use NULL
for no output)
Return: 0 if OK, 1 on error
Notes: (1) This computes three measures of smoothness of the edge of a connected component:
Input: pixs (32 bpp rgb)
factor (subsampling factor; integer >= 1)
&sat (<return> average saturation)
Return: pixd, or null on error
Input: pixs (32 bpp; rgb color)
sigbits (valid: 5 or 6)
subsample (integer > 0)
Return: histo (1-d array, giving the number of pixels in
each quantized region of color space), or null on error
Notes: (1) Array is indexed by (3 * sigbits) bits. The array size is 2^(3 * sigbits). (2) Indexing into the array from rgb uses red sigbits as most significant and blue as least.
Input: pixs (32 bpp; rgb color)
ditherflag (1 for dither; 0 for no dither)
Return: pixd (8 bit with colormap), or null on error
Notes: (1) Simple interface. See pixMedianCutQuantGeneral() for use of defaulted parameters.
| LEPT_DLL PIX* pixMedianCutQuantGeneral | ( | PIX * | pixs, |
| l_int32 | ditherflag, | ||
| l_int32 | outdepth, | ||
| l_int32 | maxcolors, | ||
| l_int32 | sigbits, | ||
| l_int32 | maxsub, | ||
| l_int32 | checkbw | ||
| ) |
Input: pixs (32 bpp; rgb color)
ditherflag (1 for dither; 0 for no dither)
outdepth (output depth; valid: 0, 1, 2, 4, 8)
maxcolors (between 2 and 256)
sigbits (valid: 5 or 6; use 0 for default)
maxsub (max subsampling, integer; use 0 for default;
1 for no subsampling)
checkbw (1 to check if color content is very small,
0 to assume there is sufficient color)
Return: pixd (8 bit with colormap), or null on error
Notes: (1) @maxcolors must be in the range [2 ... 256]. (2) Use @outdepth = 0 to have the output depth computed as the minimum required to hold the actual colors found, given the @maxcolors constraint. (3) Use @outdepth = 1, 2, 4 or 8 to specify the output depth. In that case, @maxcolors must not exceed 2^(outdepth). (4) If there are fewer quantized colors in the image than @maxcolors, the colormap is simply generated from those colors. (5) @maxsub is the maximum allowed subsampling to be used in the computation of the color histogram and region of occupied color space. The subsampling is chosen internally for efficiency, based on the image size, but this parameter limits it. Use @maxsub = 0 for the internal default, which is the maximum allowed subsampling. Use @maxsub = 1 to prevent subsampling. In general use @maxsub >= 1 to specify the maximum subsampling to be allowed, where the actual subsampling will be the minimum of this value and the internally determined default value. (6) If the image appears gray because either most of the pixels are gray or most of the pixels are essentially black or white, the image is trivially quantized with a grayscale colormap. The reason is that median cut divides the color space into rectangular regions, and it does a very poor job if all the pixels are near the diagonal of the color space cube.
| LEPT_DLL PIX* pixMedianCutQuantMixed | ( | PIX * | pixs, |
| l_int32 | ncolor, | ||
| l_int32 | ngray, | ||
| l_int32 | darkthresh, | ||
| l_int32 | lightthresh, | ||
| l_int32 | diffthresh | ||
| ) |
Input: pixs (32 bpp; rgb color)
ncolor (maximum number of colors assigned to pixels with
significant color)
ngray (number of gray colors to be used; must be >= 2)
darkthresh (threshold near black; if the lightest component
is below this, the pixel is not considered to
be gray or color; uses 0 for default)
lightthresh (threshold near white; if the darkest component
is above this, the pixel is not considered to
be gray or color; use 0 for default)
diffthresh (thresh for the max difference between component
values; for differences below this, the pixel
is considered to be gray; use 0 for default)
Return: pixd (8 bpp cmapped), or null on error
Notes: (1) ncolor + ngray must not exceed 255. (2) The method makes use of pixMedianCutQuantGeneral() with minimal addition. (a) Preprocess the image, setting all pixels with little color to black, and populating an auxiliary 8 bpp image with the expected colormap values corresponding to the set of quantized gray values. (b) Color quantize the altered input image to n + 1 colors. (c) Augment the colormap with the gray indices, and substitute the gray quantized values from the auxiliary image for those in the color quantized output that had been quantized as black. (3) Median cut color quantization is relatively poor for grayscale images with many colors, when compared to octcube quantization. Thus, for images with both gray and color, it is important to quantize the gray pixels by another method. Here, we are conservative in detecting color, preferring to use a few extra bits to encode colorful pixels that push them to gray. This is particularly reasonable with this function, because it handles the gray and color pixels separately, using median cut color quantization for the color pixels and equal-bin grayscale quantization for the non-color pixels.
Input: pixs (8 or 32 bpp; no colormap)
wf, hf (width and height of filter; each is >= 1)
Return: pixd (of median values), or null on error
| LEPT_DLL l_int32 pixMinMaxNearLine | ( | PIX * | pixs, |
| l_int32 | x1, | ||
| l_int32 | y1, | ||
| l_int32 | x2, | ||
| l_int32 | y2, | ||
| l_int32 | dist, | ||
| l_int32 | direction, | ||
| NUMA ** | pnamin, | ||
| NUMA ** | pnamax, | ||
| l_float32 * | pminave, | ||
| l_float32 * | pmaxave | ||
| ) |
Input: pixs (8 bpp; no colormap)
x1, y1 (starting pt for line)
x2, y2 (end pt for line)
dist (distance to search from line in each direction)
direction (L_SCAN_NEGATIVE, L_SCAN_POSITIVE, L_SCAN_BOTH)
&namin (<optional return> minimum values)
&namax (<optional return> maximum values)
&minave (<optional return> average of minimum values)
&maxave (<optional return> average of maximum values)
Return: 0 if OK; 1 on error or if there are no sampled points
within the image.
Notes: (1) If the line is more horizontal than vertical, the values are computed for [x1, x2], and the pixels are taken below and/or above the local y-value. Otherwise, the values are computed for [y1, y2] and the pixels are taken to the left and/or right of the local x value. (2) @direction specifies which side (or both sides) of the line are scanned for min and max values. (3) There are two ways to tell if the returned values of min and max averages are valid: the returned values cannot be negative and the function must return 0. (4) All accessed pixels are clipped to the pix.
| LEPT_DLL l_int32 pixMinMaxTiles | ( | PIX * | pixs, |
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | mindiff, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy, | ||
| PIX ** | ppixmin, | ||
| PIX ** | ppixmax | ||
| ) |
Input: pixs (8 bpp grayscale; not colormapped)
sx, sy (tile dimensions)
mindiff (minimum difference to accept as valid)
smoothx, smoothy (half-width of convolution kernel applied to
min and max arrays: use 0 for no smoothing)
&pixmin (<return> tiled minima)
&pixmax (<return> tiled maxima)
Return: 0 if OK, 1 on error
Notes: (1) This computes filtered and smoothed values for the min and max pixel values in each tile of the image. (2) See pixContrastNorm() for usage.
Input: pixd (<optional> destination: this can be null,
equal to pixs1, or different from pixs1)
pixs1 (can be == to pixd)
pixs2
type (L_CHOOSE_MIN, L_CHOOSE_MAX)
Return: pixd always
Notes: (1) This gives the min or max of two images, component-wise. (2) The depth can be 8 or 16 bpp for 1 component, and 32 bpp for a 3 component image. For 32 bpp, ignore the LSB of each word (the alpha channel) (3) There are 3 cases:
Input: pixs (1 bpp, deskewed, English text)
&conf (<return> confidence that text is not LR mirror reversed)
mincount (min number of left + right; use 0 for default)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) For this test, it is necessary that the text is horizontally oriented, with ascenders going up. (2) conf is the normalized difference between the number of right and left facing characters with ascenders. Left-facing are {d}; right-facing are {b, h, k}. At least that was the expectation. In practice, we can really just say that it is the normalized difference in hits using two specific hit-miss filters, textsel1 and textsel2, after the image has been suitably pre-filtered so that these filters are effective. See (4) for what's really happening. (3) A large positive conf value indicates normal text, whereas a large negative conf value means the page is mirror reversed. (4) The implementation is a bit tricky. The general idea is to fill the x-height part of characters, but not the space between them, before doing the HMT. This is done by finding pixels added using two different operations – a horizontal close and a vertical dilation – and adding the intersection of these sets to the original. It turns out that the original intuition about the signal was largely in error: much of the signal for right-facing characters comes from the lower part of common x-height characters, like the e and c, that remain open after these operations. So it's important that the operations to close the x-height parts of the characters are purposely weakened sufficiently to allow these characters to remain open. The wonders of morphology!
| LEPT_DLL l_int32 pixMirrorDetectDwa | ( | PIX * | pixs, |
| l_float32 * | pconf, | ||
| l_int32 | mincount, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (1 bpp, deskewed, English text)
&conf (<return> confidence that text is not LR mirror reversed)
mincount (min number of left + right; use 0 for default)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) We assume the text is horizontally oriented, with ascenders going up. (2) See notes in pixMirrorDetect().
Input: pixs (8 or 32 bpp, small tile; to be replicated)
w, h (dimensions of output pix)
Return: pixd (usually larger pix, mirror-tiled with pixs),
or null on error
Notes: (1) This uses mirrored tiling, where each row alternates with LR flips and every column alternates with TB flips, such that the result is a tiling with identical
Input: pixd (<optional> can be null, existing or equal to pixs)
pixs (32 bpp rgb)
fract (between -1.0 and 1.0)
Return: pixd, or null on error
Notes: (1) If fract > 0.0, it gives the fraction that the v-parameter, which is max(r,g,b), is moved from its initial value toward 255. If fract < 0.0, it gives the fraction that the v-parameter is moved from its initial value toward 0. The limiting values for fract = -1.0 (1.0) thus set the v-parameter to 0 (255). (2) If fract = 0, no modification is requested; return a copy unless in-place, in which case this is a no-op. (3) See discussion of color-modification methods, in coloring.c.
Input: pixd (<optional> can be null or equal to pixs)
pixs (32 bpp rgb)
fract (between -1.0 and 1.0)
Return: pixd, or null on error
Notes: (1) pixd must either be null or equal to pixs. For in-place operation, set pixd == pixs: pixEqualizeTRC(pixs, pixs, ...); To get a new image, set pixd == null: pixd = pixEqualizeTRC(NULL, pixs, ...); (1) Use fract > 0.0 to increase hue value; < 0.0 to decrease it. 1.0 (or -1.0) represents a 360 degree rotation; i.e., no change. (2) If no modification is requested (fract = -1.0 or 0 or 1.0), return a copy unless in-place, in which case this is a no-op. (3) See discussion of color-modification methods, in coloring.c.
Input: pixd (<optional> can be null, existing or equal to pixs)
pixs (32 bpp rgb)
fract (between -1.0 and 1.0)
Return: pixd, or null on error
Notes: (1) If fract > 0.0, it gives the fraction that the pixel saturation is moved from its initial value toward 255. If fract < 0.0, it gives the fraction that the pixel saturation is moved from its initial value toward 0. The limiting values for fract = -1.0 (1.0) thus set the saturation to 0 (255). (2) If fract = 0, no modification is requested; return a copy unless in-place, in which case this is a no-op. (3) See discussion of color-modification methods, in coloring.c.
Input: pixs
sequence (string specifying sequence)
dispsep (controls debug display of each result in the sequence:
0: no output
> 0: gives horizontal separation in pixels between
successive displays
< 0: pdf output; abs(dispsep) is used for naming)
Return: pixd, or null on error
Notes: (1) This does rasterop morphology on binary images, using composite operations for extra speed on large Sels. (2) Safe closing is used atomically. However, if you implement a closing as a sequence with a dilation followed by an erosion, it will not be safe, and to ensure that you have no boundary effects you must add a border in advance and remove it at the end. (3) For other usage details, see the notes for pixMorphSequence(). (4) The sequence string is formatted as follows:
Input: pixs
sequence (string specifying sequence)
dispsep (controls debug display of each result in the sequence:
0: no output
> 0: gives horizontal separation in pixels between
successive displays
< 0: pdf output; abs(dispsep) is used for naming)
Return: pixd, or null on error
Notes: (1) This does dwa morphology on binary images, using brick Sels. (2) This runs a pipeline of operations; no branching is allowed. (3) It implements all brick Sels that have dimensions up to 63 on each side, using a composite (linear + comb) when useful. (4) A new image is always produced; the input image is not changed. (5) This contains an interpreter, allowing sequences to be generated and run. (6) See pixMorphSequence() for further information about usage.
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
operation (L_MORPH_DILATE, L_MORPH_ERODE,
L_MORPH_OPEN, L_MORPH_CLOSE)
sel name
Return: pixd
Notes: (1) This simply adds a border, calls the appropriate pixFMorphopGen_*(), and removes the border. See the notes for that function. (2) The size of the border depends on the operation and the boundary conditions.
Top-level fast binary morphology with auto-generated sels
PIX *pixMorphDwa_1() PIX *pixFMorphopGen_1()
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
operation (L_MORPH_DILATE, L_MORPH_ERODE,
L_MORPH_OPEN, L_MORPH_CLOSE)
sel name
Return: pixd
Notes: (1) This simply adds a border, calls the appropriate pixFMorphopGen_*(), and removes the border. See the notes for that function. (2) The size of the border depends on the operation and the boundary conditions.
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
operation (L_MORPH_DILATE, L_MORPH_ERODE,
L_MORPH_OPEN, L_MORPH_CLOSE)
sel name
Return: pixd
Notes: (1) This simply adds a border, calls the appropriate pixFMorphopGen_*(), and removes the border. See the notes for that function. (2) The size of the border depends on the operation and the boundary conditions.
Top-level fast binary morphology with auto-generated sels
PIX *pixMorphDwa_2() PIX *pixFMorphopGen_2()
Input: pixd (usual 3 choices: null, == pixs, != pixs)
pixs (1 bpp)
operation (L_MORPH_DILATE, L_MORPH_ERODE,
L_MORPH_OPEN, L_MORPH_CLOSE)
sel name
Return: pixd
Notes: (1) This simply adds a border, calls the appropriate pixFMorphopGen_*(), and removes the border. See the notes for that function. (2) The size of the border depends on the operation and the boundary conditions.
Input: pixs
hsize (of Sel; must be odd; origin implicitly in center)
vsize (ditto)
smoothing (half-width of convolution smoothing filter.
The width is (2 * smoothing + 1), so 0 is no-op.
Return: pixd, or null on error
Input: pixs
sequence (string specifying sequence)
dispsep (controls debug display of each result in the sequence:
0: no output
> 0: gives horizontal separation in pixels between
successive displays
< 0: pdf output; abs(dispsep) is used for naming)
Return: pixd, or null on error
Notes: (1) This does rasterop morphology on binary images. (2) This runs a pipeline of operations; no branching is allowed. (3) This only uses brick Sels, which are created on the fly. In the future this will be generalized to extract Sels from a Sela by name. (4) A new image is always produced; the input image is not changed. (5) This contains an interpreter, allowing sequences to be generated and run. (6) The format of the sequence string is defined below. (7) In addition to morphological operations, rank order reduction and replicated expansion allow operations to take place downscaled by a power of 2. (8) Intermediate results can optionally be displayed. (9) Thanks to Dar-Shyang Lee, who had the idea for this and built the first implementation. (10) The sequence string is formatted as follows:
| LEPT_DLL PIX* pixMorphSequenceByComponent | ( | PIX * | pixs, |
| const char * | sequence, | ||
| l_int32 | connectivity, | ||
| l_int32 | minw, | ||
| l_int32 | minh, | ||
| BOXA ** | pboxa | ||
| ) |
Input: pixs (1 bpp)
sequence (string specifying sequence)
connectivity (4 or 8)
minw (minimum width to consider; use 0 or 1 for any width)
minh (minimum height to consider; use 0 or 1 for any height)
&boxa (<optional> return boxa of c.c. in pixs)
Return: pixd, or null on error
Notes: (1) See pixMorphSequence() for composing operation sequences. (2) This operates separately on each c.c. in the input pix. (3) The dilation does NOT increase the c.c. size; it is clipped to the size of the original c.c. This is necessary to keep the c.c. independent after the operation. (4) You can specify that the width and/or height must equal or exceed a minimum size for the operation to take place. (5) Use NULL for boxa to avoid returning the boxa.
| LEPT_DLL PIX* pixMorphSequenceByRegion | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| const char * | sequence, | ||
| l_int32 | connectivity, | ||
| l_int32 | minw, | ||
| l_int32 | minh, | ||
| BOXA ** | pboxa | ||
| ) |
Input: pixs (1 bpp)
pixm (mask specifying regions)
sequence (string specifying sequence)
connectivity (4 or 8, used on mask)
minw (minimum width to consider; use 0 or 1 for any width)
minh (minimum height to consider; use 0 or 1 for any height)
&boxa (<optional> return boxa of c.c. in pixm)
Return: pixd, or null on error
Notes: (1) See pixMorphCompSequence() for composing operation sequences. (2) This operates separately on the region in pixs corresponding to each c.c. in the mask pixm. It differs from pixMorphSequenceByComponent() in that the latter does not have a pixm (mask), but instead operates independently on each component in pixs. (3) Dilation will NOT increase the region size; the result is clipped to the size of the mask region. This is necessary to make regions independent after the operation. (4) You can specify that the width and/or height of a region must equal or exceed a minimum size for the operation to take place. (5) Use NULL for @pboxa to avoid returning the boxa.
Input: pixs
sequence (string specifying sequence)
dispsep (controls debug display of each result in the sequence:
0: no output
> 0: gives horizontal separation in pixels between
successive displays
< 0: pdf output; abs(dispsep) is used for naming)
Return: pixd, or null on error
Notes: (1) This does dwa morphology on binary images. (2) This runs a pipeline of operations; no branching is allowed. (3) This only uses brick Sels that have been pre-compiled with dwa code. (4) A new image is always produced; the input image is not changed. (5) This contains an interpreter, allowing sequences to be generated and run. (6) See pixMorphSequence() for further information about usage.
| LEPT_DLL PIX* pixMorphSequenceMasked | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| const char * | sequence, | ||
| l_int32 | dispsep | ||
| ) |
Input: pixs (1 bpp)
pixm (<optional> 1 bpp mask)
sequence (string specifying sequence of operations)
dispsep (horizontal separation in pixels between
successive displays; use zero to suppress display)
Return: pixd, or null on error
Notes: (1) This applies the morph sequence to the image, but only allows changes in pixs for pixels under the background of pixm. (5) If pixm is NULL, this is just pixMorphSequence().
Input: pixs (32 bpp)
factor
offset (same as used for initialization)
Return: 0 if OK; 1 on error
Notes: (1) The offset must be >= 0 and should not exceed 0x40000000. (2) This multiplies each pixel, relative to offset, by the input factor (3) The result is returned with the offset back in place.
| LEPT_DLL PIX* pixMultConstantColor | ( | PIX * | pixs, |
| l_float32 | rfact, | ||
| l_float32 | gfact, | ||
| l_float32 | bfact | ||
| ) |
Input: pixs (8, 16 or 32 bpp)
val (>= 0.0; amount to multiply by each pixel)
Return: 0 if OK, 1 on error
Notes: (1) In-place operation; val must be >= 0. (2) No clipping for 32 bpp. (3) For 8 and 16 bpp, the result is clipped to 0xff and 0xffff, rsp.
Input: pixd (can be NULL or pixs)
pixs (32 bpp rgb)
box (region for filtering; can be NULL))
color (32 bit color in 0xrrggbb00 format)
Return: pixd always
Notes: (1) This filters all pixels in the specified region by multiplying each component by the input color. This leaves black invariant and transforms white to the input color. (2) If pixd == pixs, this is done in-place. (3) If box == NULL, this is performed on all of pixs.
| LEPT_DLL l_int32 pixNumberOccupiedOctcubes | ( | PIX * | pix, |
| l_int32 | level, | ||
| l_int32 | mincount, | ||
| l_float32 | minfract, | ||
| l_int32 * | pncolors | ||
| ) |
Input: pix (32 bpp)
level (of octcube)
mincount (minimum num pixels in an octcube to be counted;
-1 to not use)
minfract (minimum fract of pixels in an octcube to be
counted; -1 to not use)
&ncolors (<return> number of occupied octcubes)
Return: 0 if OK, 1 on error
Notes: (1) Exactly one of (@mincount, @minfract) must be -1, so, e.g., if @mincount == -1, then we use @minfract. (2) If all occupied octcubes are to count, set @mincount == 1. Setting @minfract == 0.0 is taken to mean the same thing.
pixNumColors() Input: pixs (2, 4, 8, 32 bpp) factor (subsampling factor; integer) &ncolors (<return> the number of colors found, or 0 if there are more than 256) Return: 0 if OK, 1 on error.
Notes: (1) This returns the actual number of colors found in the image, even if there is a colormap. If @factor == 1 and the number of colors differs from the number of entries in the colormap, a warning is issued. (2) Use @factor == 1 to find the actual number of colors. Use @factor > 1 to quickly find the approximate number of colors. (3) For d = 2, 4 or 8 bpp grayscale, this returns the number of colors found in the image in 'ncolors'. (4) For d = 32 bpp (rgb), if the number of colors is greater than 256, this returns 0 in 'ncolors'.
| LEPT_DLL l_int32 pixNumSignificantGrayColors | ( | PIX * | pixs, |
| l_int32 | darkthresh, | ||
| l_int32 | lightthresh, | ||
| l_float32 | minfract, | ||
| l_int32 | factor, | ||
| l_int32 * | pncolors | ||
| ) |
Input: pixs (8 bpp gray)
darkthresh (dark threshold for minimum intensity to be
considered; typ. 20)
lightthresh (threshold near white, for maximum intensity
to be considered; typ. 236)
minfract (minimum fraction of all pixels to include a level
as significant; typ. 0.0001; should be < 0.001)
factor (subsample factor; integer >= 1)
&ncolors (<return> number of significant colors; 0 on error)
Return: 0 if OK, 1 on error
Notes: (1) This function is asking the question: how many perceptually significant gray color levels is in this pix? A color level must meet 3 criteria to be significant:
Input: pixs (32 bpp rgb)
level (significant bits for each of RGB; valid in [1...6])
&ncolors (<optional return> number of occupied cubes)
Return: numa (histogram of color pixels, or null on error)
Notes: (1) Input NULL for &ncolors to prevent computation and return value.
| LEPT_DLL PIX* pixOctcubeQuantFromCmap | ( | PIX * | pixs, |
| PIXCMAP * | cmap, | ||
| l_int32 | mindepth, | ||
| l_int32 | level, | ||
| l_int32 | metric | ||
| ) |
Input: pixs (32 bpp rgb)
cmap (to quantize to; insert copy into dest pix)
mindepth (minimum depth of pixd: can be 2, 4 or 8 bpp)
level (of octcube used for finding nearest color in cmap)
metric (L_MANHATTAN_DISTANCE, L_EUCLIDEAN_DISTANCE)
Return: pixd (2, 4 or 8 bpp, colormapped), or null on error
Notes: (1) In typical use, we are doing an operation, such as interpolative scaling, on a colormapped pix, where it is necessary to remove the colormap before the operation. We then want to re-quantize the RGB result using the same colormap. (2) The level is used to divide the color space into octcubes. Each input pixel is, in effect, placed at the center of an octcube at the given level, and it is mapped into the exact color (given in the colormap) that is the closest to that location. We need to know that distance, for each color in the colormap. The higher the level of the octtree, the smaller the octcubes in the color space, and hence the more accurately we can determine the closest color in the colormap; however, the size of the LUT, which is the total number of octcubes, increases by a factor of 8 for each increase of 1 level. The time required to acquire a level 4 mapping table, which has about 4K entries, is less than 1 msec, so that is the recommended minimum size to be used. At that size, the octcubes have their centers 16 units apart in each (r,g,b) direction. If two colors are in the same octcube, the one closest to the center will always be chosen. The maximum error for any component occurs when the correct color is at a cube corner and there is an incorrect color just inside the cube next to the opposite corner, giving an error of 14 units (out of 256) for each component. Using a level 5 mapping table reduces the maximum error to 6 units. (3) Typically you should use the Euclidean metric, because the resulting voronoi cells (which are generated using the actual colormap values as seeds) are convex for Euclidean distance but not for Manhattan distance. In terms of the octcubes, convexity of the voronoi cells means that if the 8 corners of any cube (of which the octcubes are special cases) are all within a cell, then every point in the cube will lie within the cell. (4) The depth of the output pixd is equal to the maximum of (a) @mindepth and (b) the minimum (2, 4 or 8 bpp) necessary to hold the indices in the colormap. (5) We build a mapping table from octcube to colormap index so that this function can run in a time (otherwise) independent of the number of colors in the colormap. This avoids a brute-force search for the closest colormap color to each pixel in the image. (6) This is similar to the function pixAssignToNearestColor() used for color segmentation. (7) Except for very small images or when using level > 4, it takes very little time to generate the tables, compared to the generation of the colormapped dest pix, so one would not typically use the low-level version.
| LEPT_DLL PIX* pixOctcubeQuantMixedWithGray | ( | PIX * | pixs, |
| l_int32 | depth, | ||
| l_int32 | graylevels, | ||
| l_int32 | delta | ||
| ) |
pixOctcubeQuantMixedWithGray()
Input: pixs (32 bpp rgb)
depth (of output pix)
graylevels (grayscale)
delta (threshold for deciding if a pix is color or grayscale)
Return: pixd (quantized to octcube and gray levels) or null on error
Notes: (1) Generates a colormapped image, where the colormap table values have two components: octcube values representing pixels with color content, and grayscale values for the rest. (2) The threshold (delta) is the maximum allowable difference of the max abs value of | r - g |, | r - b | and | g - b |. (3) The octcube values are the averages of all pixels that are found in the octcube, and that are far enough from gray to be considered color. This can roughly be visualized as all the points in the rgb color cube that are not within a "cylinder" of diameter approximately 'delta' along the main diagonal. (4) We want to guarantee full coverage of the rgb color space; thus, if the output depth is 4, the octlevel is 1 (2 x 2 x 2 = 8 cubes) and if the output depth is 8, the octlevel is 2 (4 x 4 x 4 = 64 cubes). (5) Consequently, we have the following constraint on the number of allowed gray levels: for 4 bpp, 8; for 8 bpp, 192.
Input: pixs (32 bpp; 24-bit color)
colors (in colormap; some number in range [128 ... 256];
the actual number of colors used will be smaller)
ditherflag (1 to dither, 0 otherwise)
Return: pixd (8 bpp with colormap), or null on error
I found one description in the literature of octree color quantization, using progressive truncation of the octree, by M. Gervautz and W. Purgathofer in Graphics Gems, pp. 287-293, ed. A. Glassner, Academic Press, 1990. Rather than setting up a fixed partitioning of the color space ab initio, as we do here, they allow the octree to be progressively truncated as new pixels are added. They need to set up some data structures that are traversed with the addition of each 24 bit pixel, in order to decide either (1) in which cluster (sub-branch of the octree) to put the pixel, or (2) whether to truncate the octree further to place the pixel in an existing cluster, or (3) which two existing clusters should be merged so that the pixel can be left to start a truncated leaf of the octree. Such dynamic truncation is considerably more complicated, and Gervautz et al. did not explain how they did it in anywhere near the detail required to check their implementation.
The simple method in pixFixedOctcubeQuant256() is very fast, and with dithering the results are good, but you can do better if the color clusters are selected adaptively from the image. We want a method that makes much better use of color samples in regions of color space with high pixel density, while also fairly representing small numbers of color pixels in low density regions. Such adaptation requires two passes through the image: the first for generating the pruned tree of color cubes and the second for computing the index into the color table for each pixel.
A relatively simple adaptive method is pixOctreeQuantByPopulation(). That function first determines if the image has very few colors, and, if so, quantizes to those colors. If there are more than 256 colors, it generates a histogram of octcube leaf occupancy at level 4, chooses the 192 most populated such leaves as the first 192 colors, and sets the remaining 64 colors to the residual average pixel values in each of the 64 level 2 octcubes. This is a bit faster than pixOctreeColorQuant(), and does very well without dithering, but for most images with dithering it is clearly inferior.
We now describe pixOctreeColorQuant(). The first pass is done on a subsampled image, because we do not need to use all the pixels in the image to generate the tree. Subsampling down to 0.25 (1/16 of the pixels) makes the program run about 1.3 times faster.
Instead of dividing the color space into 256 equal-sized regions, we initially divide it into 2^12 or 2^15 or 2^18 equal-sized octcubes. Suppose we choose to use 2^18 octcubes. This gives us 6 octree levels. We then prune back, starting from level 6. For every cube at level 6, there are 8 cubes at level 5. Call the operation of putting a cube aside as a color table entry (CTE) a "saving." We use a (in general) level-dependent threshold, and save those level 6 cubes that are above threshold. The rest are combined into the containing level 5 cube. If between 1 and 7 level 6 cubes within a level 5 cube have been saved by thresholding, then the remaining level 6 cubes in that level 5 cube are automatically saved as well, without applying a threshold. This greatly simplifies both the description of the CTEs and the later classification of each pixel as belonging to a CTE. This procedure is iterated through every cube, starting at level 5, and then 4, 3, and 2, successively. The result is that each CTE contains the entirety of a set of from 1 to 7 cubes from a given level that all belong to a single cube at the level above. We classify the CTEs in terms of the condition in which they are made as either being "threshold" or "residual." They are "threshold" CTEs if no subcubes are CTEs (that is, they contain every pixel within the cube) and the number of pixels exceeds the threshold for making a CTE. They are "residual" CTEs if at least one but not more than 7 of the subcubes have already been determined to be CTEs; this happens automatically – no threshold is applied. If all 8 subcubes are determined to be CTEs, the cube is marked as having all pixels accounted for ('bleaf' = 1) but is not saved as a CTE.
We stop the pruning at level 2, at which there are 64 sub-cubes. Any pixels not already claimed in a CTE are put in these cubes.
As the cubes are saved as color samples in the color table, the number of remaining pixels P and the number of remaining colors in the color table N are recomputed, along with the average number of pixels P/N (ppc) to go in each of the remaining colors. This running average number is used to set the threshold at the current level.
Because we are going to very small cubes at levels 6 or 5, and will dither the colors for errors, it is not necessary to compute the color center of each cluster; we can simply use the center of the cube. This gives us a minimax error condition: the maximum error is half the width of the level 2 cubes – 32 color values out of 256 – for each color sample. In practice, most of the pixels will be very much closer to the center of their cells. And with dithering, the average pixel color in a small region will be closer still. Thus with the octree quantizer, we are able to capture regions of high color pdf (probability density function) in small but accurate CTEs, and to have only a small number of pixels that end up a significant distance (with a guaranteed maximum) from their true color.
How should the threshold factor vary? Threshold factors are required for levels 2, 3, 4 and 5 in the pruning stage. The threshold for level 5 is actually applied to cubes at level 6, etc. From various experiments, it appears that the results do not vary appreciably for threshold values near 1.0. If you want more colors in smaller cubes, the threshold factors can be set lower than 1.0 for cubes at levels 4 and 5. However, if the factor is set much lower than 1.0 for levels 2 and 3, we can easily run out of colors. We put aside 64 colors in the calculation of the threshold values, because we must have 64 color centers at level 2, that will have very few pixels in most of them. If we reduce the factor for level 5 to 0.4, this will generate many level 6 CTEs, and consequently many residual cells will be formed up from those leaves, resulting in the possibility of running out of colors. Remember, the residual CTEs are mandatory, and are formed without using the threshold, regardless of the number of pixels that are absorbed.
The implementation logically has four parts:
(1) accumulation into small, fixed cells
(2) pruning back into selected CTE cubes
(3) organizing the CTEs for fast search to find
the CTE to which any image pixel belongs
(4) doing a second scan to code the image pixels by CTE
Step (1) is straightforward; we use 2^15 cells.
We've already discussed how the pruning step (2) will be performed.
Steps (3) and (4) are related, in that the organization used by step (3) determines how the search actually takes place for each pixel in step (4).
There are many ways to do step (3). Let's explore a few.
(a) The simplest is to order the cubes from highest occupancy to lowest, and traverse the list looking for the deepest match. To make this more efficient, so that we know when to stop looking, any cube that has separate CTE subcubes would be marked as such, so that we know when we hit a true leaf.
(b) Alternatively, we can order the cubes by highest occupancy separately each level, and work upward, starting at level 5, so that when we find a match we know that it will be correct.
(c) Another approach would be to order the cubes by "address" and use a hash table to find the cube corresponding to a pixel color. I don't know how to do this with a variable length address, as each CTE will have 3*n bits, where n is the level.
(d) Another approach entirely is to put the CTE cubes into a tree, in such a way that starting from the root, and using 3 bits of address at a time, the correct branch of each octree can be taken until a leaf is found. Because a given cube can be both a leaf and also have branches going to sub-cubes, the search stops only when no marked subcubes have addresses that match the given pixel.
In the tree method, we can start with a dense infrastructure, and place the leaves corresponding to the N colors in the tree, or we can grow from the root only those branches that end directly on leaves.
What we do here is to take approach (d), and implement the tree "virtually", as a set of arrays, one array for each level of the tree. Initially we start at level 5, an array with 2^15 cubes, each with 8 subcubes. We then build nodes at levels closer to the root; at level 4 there are 2^12 nodes each with 8 subcubes; etc. Using these arrays has several advantages:
Canonical addressing guarantees that the i-th node at level k has 8 subnodes given by the 8*i ... 8*i+7 nodes at level k+1.
The pruning step works as follows. We go from the lowest level up. At each level, the threshold is found from the product of a factor near 1.0 and the ratio of unmarked pixels to remaining colors (minus the 64). We march through the space, sequentially considering a cube and its 8 subcubes. We first check those subcubes that are not already marked as CTE to see if any are above threshold, and if so, generate a CTE and mark them as such. We then determine if any of the subcubes have been marked. If so, and there are subcubes that are not marked, we generate a CTE for the cube from the remaining unmarked subcubes; this is mandatory and does not depend on how many pixels are in the set of subcubes. If none of the subcubes are marked, we aggregate their pixels into the cube containing them, but do not mark it as a CTE; that will be determined when iterating through the next level up.
When all the pixels in a cube are accounted for in one or more colors, we set the boolean 'bleaf' to true. This is the flag used to mark the cubes in the pruning step. If a cube is marked, and all 8 subcubes are marked, then it is not itself given a CTE because all pixels have already been accounted for.
Note that the pruning of the tree and labelling of the CTEs (step 2) accomplishes step 3 implicitly, because the marked and pruned tree is ready for use in labelling each pixel in step 4. We now, for every pixel in the image, traverse the tree from the root, looking for the lowest cube that is a leaf. At each level we have a cube and subcube. If we reach a subcube leaf that is marked 0, we know that the color is stored in the cube above, and we've found the CTE. Otherwise, the subcube leaf is marked 1. If we're at the last level, we've reached the final leaf and must use it. Otherwise, continue the process at the next level down.
For robustness, efficiency and high quality output, we do the following:
(1) Measure the color content of the image. If there is very little color, quantize in grayscale. (2) For efficiency, build the octree with a subsampled image if the image is larger than some threshold size. (3) Reserve an extra set of colors to prevent running out of colors when pruning the octree; specifically, during the assignment of those level 2 cells (out of the 64) that have unassigned pixels. The problem of running out is more likely to happen with small images, because the estimation we use for the number of pixels available is not accurate. (4) In the unlikely event that we run out of colors, the dithered image can be very poor. As this would only happen with very small images, and dithering is not particularly noticeable with such images, turn it off.
| LEPT_DLL PIX* pixOctreeColorQuantGeneral | ( | PIX * | pixs, |
| l_int32 | colors, | ||
| l_int32 | ditherflag, | ||
| l_float32 | validthresh, | ||
| l_float32 | colorthresh | ||
| ) |
Input: pixs (32 bpp; 24-bit color)
colors (in colormap; some number in range [128 ... 240];
the actual number of colors used will be smaller)
ditherflag (1 to dither, 0 otherwise)
validthresh (minimum fraction of pixels neither near white
nor black, required for color quantization;
typically ~0.01, but smaller for images that have
color but are nearly all white)
colorthresh (minimum fraction of pixels with color that are
not near white or black, that are required
for color quantization; typ. ~0.01, but smaller
for images that have color along with a
significant fraction of gray)
Return: pixd (8 bit with colormap), or null on error
Notes: (1) The parameters @validthresh and @colorthresh are used to determine if color quantization should be used on an image, or whether, instead, it should be quantized in grayscale. If the image has very few non-white and non-black pixels, or if those pixels that are non-white and non-black are all very close to either white or black, it is usually better to treat the color as accidental and to quantize the image to gray only. These parameters are useful if you know something a priori about the image. Perhaps you know that there is only a very small fraction of color pixels, but they're important to preserve; then you want to use a smaller value for these parameters. To disable conversion to gray and force color quantization, use @validthresh = 0.0 and @colorthresh = 0.0. (2) See pixOctreeColorQuant() for algorithmic and implementation details. This function has a more general interface. (3) See pixColorFraction() for computing the fraction of pixels that are neither white nor black, and the fraction of those pixels that have little color. From the documentation there: If pixfract is very small, there are few pixels that are neither black nor white. If colorfract is very small, the pixels that are neither black nor white have very little color content. The product 'pixfract * colorfract' gives the fraction of pixels with significant color content. We test against the product @validthresh * @colorthresh to find color in images that have either very few intermediate gray pixels or that have many such gray pixels.
Input: pixs (32 bpp rgb)
level (significant bits for each of RGB; valid for {3,4},
Use 0 for default (level 4; recommended)
ditherflag (1 to dither, 0 otherwise)
Return: pixd (quantized to octcubes) or null on error
Notes: (1) This color quantization method works very well without dithering, using octcubes at two different levels: (a) the input @level, which is either 3 or 4 (b) level 2 (64 octcubes to cover the entire color space) (2) For best results, using @level = 4 is recommended. Why do we provide an option for using level 3? Because there are 512 octcubes at level 3, and for many images not more than 256 are filled. As a result, on some images a very accurate quantized representation is possible using @level = 3. (3) This first breaks up the color space into octcubes at the input @level, and computes, for each octcube, the average value of the pixels that are in it. (4) Then there are two possible situations: (a) If there are not more than 256 populated octcubes, it returns a cmapped pix with those values assigned. (b) Otherwise, it selects 192 octcubes containing the largest number of pixels and quantizes pixels within those octcubes to their average. Then, to handle the residual pixels that are not in those 192 octcubes, it generates a level 2 octree consisting of 64 octcubes, and within each octcube it quantizes the residual pixels to their average within each of those level 2 octcubes. (5) Unpopulated level 2 octcubes are represented in the colormap by their centers. This, of course, has no effect unless dithering is used for the output image. (6) The depth of pixd is the miniumum required to suppport the number of colors found at @level; namely, 2, 4 or 8. (7) This function works particularly well on images such as maps, where there are a relatively small number of well-populated colors, but due to antialiasing and compression artifacts there may be a large number of different colors. This will pull out and represent accurately the highly populated colors, while still making a reasonable approximation for the others. (8) The highest level of octcubes allowed is 4. Use of higher levels typically results in having a small fraction of pixels in the most populated 192 octcubes. As a result, most of the pixels are represented at level 2, which is not sufficiently accurate. (9) Dithering shows artifacts on some images. If you plan to dither, pixOctreeColorQuant() and pixFixedOctcubeQuant256() usually give better results.
Input: pixs (32 bpp rgb)
maxcolors (8 to 256; the actual number of colors used
may be less than this)
subsample (factor for computing color distribution;
use 0 for default)
Return: pixd (4 or 8 bpp, colormapped), or null on error
pixOctreeColorQuant() is very flexible in terms of the relative depth of different cubes of the octree. By contrast, this function, pixOctreeQuantNumColors() is also adaptive, but it supports octcube leaves at only two depths: a smaller depth that guarantees full coverage of the color space and octcubes at one level deeper for more accurate colors. Its main virutes are simplicity and speed, which are both derived from the natural indexing of the octcubes from the RGB values.
Before describing pixOctreeQuantNumColors(), consider an even simpler approach for 4 bpp with either 8 or 16 colors. With 8 colors, you simply go to level 1 octcubes and use the average color found in each cube. For 16 colors, you find which of the three colors has the largest variance at the second level, and use two indices for that color. The result is quite poor, because (1) some of the cubes are nearly empty and (2) you don't get much color differentiation for the extra 8 colors. Trust me, this method may be simple, but it isn't worth anything.
In pixOctreeQuantNumColors(), we generate colormapped images at either 4 bpp or 8 bpp. For 4 bpp, we have a minimum of 8 colors for the level 1 octcubes, plus up to 8 additional colors that are determined from the level 2 popularity. If the number of colors is between 8 and 16, the output is a 4 bpp image. If the number of colors is greater than 16, the output is a 8 bpp image.
We use a priority queue, implemented with a heap, to select the requisite number of most populated octcubes at the deepest level (level 2 for 64 or fewer colors; level 3 for more than 64 colors). These are combined with one color for each octcube one level above, which is used to span the color space of octcubes that were not included at the deeper level.
If the deepest level is 2, we combine the popular level 2 octcubes (out of a total of 64) with the 8 level 1 octcubes. If the deepest level is 3, we combine the popular level 3 octcubes (out of a total 512) with the 64 level 2 octcubes that span the color space. In the latter case, we require a minimum of 64 colors for the level 2 octcubes, plus up to 192 additional colors determined from level 3 popularity.
The parameter 'maxlevel' is the deepest octcube level that is used. The implementation also uses two LUTs, which are employed in two successive traversals of the dest image. The first maps from the src octindex at 'maxlevel' to the color table index, which is the value that is stored in the 4 or 8 bpp dest pixel. The second LUT maps from that colormap value in the dest to a new colormap value for a minimum sized colormap, stored back in the dest. It is used to remove any color map entries that correspond to color space regions that have no pixels in the source image. These regions can be either from the higher level (e.g., level 1 for 4 bpp), or from octcubes at 'maxlevel' that are unoccupied. This remapping results in the minimum number of colors used according to the constraints induced by the input 'maxcolors'. We also compute the average R, G and B color values in each region of the color space represented by a colormap entry, and store them in the colormap.
The maximum number of colors is input, which determines the following properties of the dest image and octcube regions used:
Number of colors dest image depth maxlevel
8 to 16 4 bpp 2 17 to 64 8 bpp 2 65 to 256 8 bpp 3
It may turn out that the number of extra colors, beyond the minimum (8 and 64 for maxlevel 2 and 3, respectively), is larger than the actual number of occupied cubes at these levels In that case, all the pixels are contained in this subset of cubes at maxlevel, and no colormap colors are needed to represent the remainder pixels one level above. Thus, for example, in use one often finds that the pixels in an image occupy less than 192 octcubes at level 3, so they can be represented by a colormap for octcubes at level 3 only.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
sel
Return: pixd
Notes: (1) Generic morphological opening, using hits in the Sel. (2) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (3) For clarity, if the case is known, use these patterns: (a) pixd = pixOpen(NULL, pixs, ...); (b) pixOpen(pixs, pixs, ...); (c) pixOpen(pixd, pixs, ...); (4) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do separably if both hsize and vsize are > 1. (4) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (5) For clarity, if the case is known, use these patterns: (a) pixd = pixOpenBrick(NULL, pixs, ...); (b) pixOpenBrick(pixs, pixs, ...); (c) pixOpenBrick(pixd, pixs, ...); (6) The size of the result is determined by pixs.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) These implement 2D brick Sels, using linear Sels generated with selaAddBasic(). (2) A brick Sel has hits for all elements. (3) The origin of the Sel is at (x, y) = (hsize/2, vsize/2) (4) Do separably if both hsize and vsize are > 1. (5) It is necessary that both horizontal and vertical Sels of the input size are defined in the basic sela. (6) Note that we must always set or clear the border pixels before each operation, depending on the the b.c. (symmetric or asymmetric). (7) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (8) For clarity, if the case is known, use these patterns: (a) pixd = pixOpenBrickDwa(NULL, pixs, ...); (b) pixOpenBrickDwa(pixs, pixs, ...); (c) pixOpenBrickDwa(pixd, pixs, ...); (9) The size of the result is determined by pixs. (10) If either linear Sel is not found, this calls the appropriate decomposible function.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) The origin is at (x, y) = (hsize/2, vsize/2) (3) Do compositely for each dimension > 1. (4) Do separably if both hsize and vsize are > 1. (5) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (6) For clarity, if the case is known, use these patterns: (a) pixd = pixOpenCompBrick(NULL, pixs, ...); (b) pixOpenCompBrick(pixs, pixs, ...); (c) pixOpenCompBrick(pixd, pixs, ...); (7) The dimensions of the resulting image are determined by pixs. (8) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
Notes: (1) These implement a separable composite opening with 2D brick Sels. (2) For efficiency, it may decompose each linear morphological operation into two (brick + comb). (3) A brick Sel has hits for all elements. (4) The origin of the Sel is at (x, y) = (hsize/2, vsize/2) (5) Do separably if both hsize and vsize are > 1. (6) It is necessary that both horizontal and vertical Sels of the input size are defined in the basic sela. (7) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (8) For clarity, if the case is known, use these patterns: (a) pixd = pixOpenCompBrickDwa(NULL, pixs, ...); (b) pixOpenCompBrickDwa(pixs, pixs, ...); (c) pixOpenCompBrickDwa(pixd, pixs, ...); (9) The size of pixd is determined by pixs. (10) CAUTION: both hsize and vsize are being decomposed. The decomposer chooses a product of sizes (call them 'terms') for each that is close to the input size, but not necessarily equal to it. It attempts to optimize: (a) for consistency with the input values: the product of terms is close to the input size (b) for efficiency of the operation: the sum of the terms is small; ideally about twice the square root of the input size. So, for example, if the input hsize = 37, which is a prime number, the decomposer will break this into two terms, 6 and 6, so that the net result is a dilation with hsize = 36.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
hsize (width of brick Sel)
vsize (height of brick Sel)
Return: pixd
(1) There are three cases:
(a) pixd == null (result into new pixd)
(b) pixd == pixs (in-place; writes result back to pixs)
(c) pixd != pixs (puts result into existing pixd)
(2) There is no need to call this directly: pixOpenCompBrickDwa()
calls this function if either brick dimension exceeds 63.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs)
pixs (1 bpp)
sel
Return: pixd
Notes: (1) Generalized morphological opening, using both hits and misses in the Sel. (2) This does a hit-miss transform, followed by a dilation using the hits. (3) There are three cases: (a) pixd == null (result into new pixd) (b) pixd == pixs (in-place; writes result back to pixs) (c) pixd != pixs (puts result into existing pixd) (4) For clarity, if the case is known, use these patterns: (a) pixd = pixOpenGeneralized(NULL, pixs, ...); (b) pixOpenGeneralized(pixs, pixs, ...); (c) pixOpenGeneralized(pixd, pixs, ...); (5) The size of the result is determined by pixs.
Input: pixs
hsize (of Sel; must be odd; origin implicitly in center)
vsize (ditto)
Return: pixd
Notes: (1) Sel is a brick with all elements being hits (2) If hsize = vsize = 1, just returns a copy.
Input: pixs (8 bpp, not cmapped)
hsize (1 or 3)
vsize (1 or 3)
Return: pixd, or null on error
Notes: (1) Special case for 1x3, 3x1 or 3x3 brick sel (all hits) (2) If hsize = vsize = 1, just returns a copy. (3) It would be nice not to add a border, but it is required to get the same results as for the general case.
Input: pixd (<optional>; this can be null, equal to pixs1,
different from pixs1)
pixs1 (can be == pixd)
pixs2 (must be != pixd)
Return: pixd always
Notes: (1) This gives the union of two images with equal depth, aligning them to the the UL corner. pixs1 and pixs2 need not have the same width and height. (2) There are 3 cases: (a) pixd == null, (src1 | src2) --> new pixd (b) pixd == pixs1, (src1 | src2) --> src1 (in-place) (c) pixd != pixs1, (src1 | src2) --> input pixd (3) For clarity, if the case is known, use these patterns: (a) pixd = pixOr(NULL, pixs1, pixs2); (b) pixOr(pixs1, pixs1, pixs2); (c) pixOr(pixd, pixs1, pixs2); (4) The size of the result is determined by pixs1. (5) The depths of pixs1 and pixs2 must be equal. (6) Note carefully that the order of pixs1 and pixs2 only matters for the in-place case. For in-place, you must have pixd == pixs1. Setting pixd == pixs2 gives an incorrect result: the copy puts pixs1 image data in pixs2, and the rasterop is then between pixs2 and pixs2 (a no-op).
| LEPT_DLL l_int32 pixOrientDetect | ( | PIX * | pixs, |
| l_float32 * | pupconf, | ||
| l_float32 * | pleftconf, | ||
| l_int32 | mincount, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi)
&upconf (<optional return> ; may be null)
&leftconf (<optional return> ; may be null)
mincount (min number of up + down; use 0 for default)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) See "Measuring document image skew and orientation" Dan S. Bloomberg, Gary E. Kopec and Lakshmi Dasari IS&T/SPIE EI'95, Conference 2422: Document Recognition II pp 302-316, Feb 6-7, 1995, San Jose, CA (2) upconf is the normalized difference between up ascenders and down ascenders. The image is analyzed without rotation for being rightside-up or upside-down. Set &upconf to null to skip this operation. (3) leftconf is the normalized difference between up ascenders and down ascenders in the image after it has been rotated 90 degrees clockwise. With that rotation, ascenders projecting to the left in the source image will project up in the rotated image. We compute this by rotating 90 degrees clockwise and testing for up and down ascenders. Set &leftconf to null to skip this operation. (4) Note that upconf and leftconf are not linear measures of confidence, e.g., in a range between 0 and 100. They measure how far you are out on the tail of a (presumably) normal distribution. For example, a confidence of 10 means that it is nearly certain that the difference did not happen at random. However, these values must be interpreted cautiously, taking into consideration the estimated prior for a particular orientation or mirror flip. The up-down signal is very strong if applied to text with ascenders up and down, and relatively weak for text at 90 degrees, but even at 90 degrees, the difference can look significant. For example, suppose the ascenders are oriented horizontally, but the test is done vertically. Then upconf can be < -MIN_CONF_FOR_UP_DOWN, suggesting the text may be upside-down. However, if instead the test were done horizontally, leftconf will be very much larger (in absolute value), giving the correct orientation. (5) If you compute both upconf and leftconf, and there is sufficient signal, the following table determines the cw angle necessary to rotate pixs so that the text is rightside-up: 0 deg : upconf >> 1, abs(upconf) >> abs(leftconf) 90 deg : leftconf >> 1, abs(leftconf) >> abs(upconf) 180 deg : upconf << -1, abs(upconf) >> abs(leftconf) 270 deg : leftconf << -1, abs(leftconf) >> abs(upconf) (6) One should probably not interpret the direction unless there are a sufficient number of counts for both orientations, in which case neither upconf nor leftconf will be 0.0. (7) Uses rasterop implementation of HMT.
| LEPT_DLL l_int32 pixOrientDetectDwa | ( | PIX * | pixs, |
| l_float32 * | pupconf, | ||
| l_float32 * | pleftconf, | ||
| l_int32 | mincount, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (1 bpp, deskewed, English text)
&upconf (<optional return> ; may be null)
&leftconf (<optional return> ; may be null)
mincount (min number of up + down; use 0 for default)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) Same interface as for pixOrientDetect(). See notes there for usage. (2) Uses auto-gen'd code for the Sels defined at the top of this file, with some renaming of functions. The auto-gen'd code is in fliphmtgen.c, and can be generated by a simple executable; see prog/flipselgen.c. (3) This runs about 2.5 times faster than the pixOrientDetect().
| LEPT_DLL l_int32 pixOtsuAdaptiveThreshold | ( | PIX * | pixs, |
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy, | ||
| l_float32 | scorefract, | ||
| PIX ** | ppixth, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (8 bpp)
sx, sy (desired tile dimensions; actual size may vary)
smoothx, smoothy (half-width of convolution kernel applied to
threshold array: use 0 for no smoothing)
scorefract (fraction of the max Otsu score; typ. 0.1;
use 0.0 for standard Otsu)
&pixth (<optional return> array of threshold values
found for each tile)
&pixd (<optional return> thresholded input pixs, based on
the threshold array)
Return: 0 if OK, 1 on error
Notes: (1) The Otsu method finds a single global threshold for an image. This function allows a locally adapted threshold to be found for each tile into which the image is broken up. (2) The array of threshold values, one for each tile, constitutes a highly downscaled image. This array is optionally smoothed using a convolution. The full width and height of the convolution kernel are (2 * @smoothx + 1) and (2 * @smoothy + 1). (3) The minimum tile dimension allowed is 16. If such small tiles are used, it is recommended to use smoothing, because without smoothing, each small tile determines the splitting threshold independently. A tile that is entirely in the image bg will then hallucinate fg, resulting in a very noisy binarization. The smoothing should be large enough that no tile is only influenced by one type (fg or bg) of pixels, because it will force a split of its pixels. (4) To get a single global threshold for the entire image, use input values of @sx and @sy that are larger than the image. For this situation, the smoothing parameters are ignored. (5) The threshold values partition the image pixels into two classes: one whose values are less than the threshold and another whose values are greater than or equal to the threshold. This is the same use of 'threshold' as in pixThresholdToBinary(). (6) The scorefract is the fraction of the maximum Otsu score, which is used to determine the range over which the histogram minimum is searched. See numaSplitDistribution() for details on the underlying method of choosing a threshold. (7) This uses enables a modified version of the Otsu criterion for splitting the distribution of pixels in each tile into a fg and bg part. The modification consists of searching for a minimum in the histogram over a range of pixel values where the Otsu score is within a defined fraction, @scorefract, of the max score. To get the original Otsu algorithm, set @scorefract == 0. (8) N.B. This method is NOT recommended for images with weak text and significant background noise, such as bleedthrough, because of the problem noted in (3) above for tiling. Use Sauvola.
| LEPT_DLL PIX* pixOtsuThreshOnBackgroundNorm | ( | PIX * | pixs, |
| PIX * | pixim, | ||
| l_int32 | sx, | ||
| l_int32 | sy, | ||
| l_int32 | thresh, | ||
| l_int32 | mincount, | ||
| l_int32 | bgval, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy, | ||
| l_float32 | scorefract, | ||
| l_int32 * | pthresh | ||
| ) |
pixOtsuThreshOnBackgroundNorm()
Input: pixs (8 bpp grayscale; not colormapped)
pixim (<optional> 1 bpp 'image' mask; can be null)
sx, sy (tile size in pixels)
thresh (threshold for determining foreground)
mincount (min threshold on counts in a tile)
bgval (target bg val; typ. > 128)
smoothx (half-width of block convolution kernel width)
smoothy (half-width of block convolution kernel height)
scorefract (fraction of the max Otsu score; typ. 0.1)
&thresh (<optional return> threshold value that was
used on the normalized image)
Return: pixd (1 bpp thresholded image), or null on error
Notes: (1) This does background normalization followed by Otsu thresholding. Otsu binarization attempts to split the image into two roughly equal sets of pixels, and it does a very poor job when there are large amounts of dark background. By doing a background normalization first, to get the background near 255, we remove this problem. Then we use a modified Otsu to estimate the best global threshold on the normalized image. (2) See pixBackgroundNorm() for meaning and typical values of input parameters. For a start, you can try: sx, sy = 10, 15 thresh = 100 mincount = 50 bgval = 255 smoothx, smoothy = 2
Input: pixs (any depth, colormap OK)
factor (subsampling for centroid; >= 1)
Return: pixd (padded with white pixels), or NULL on error.
Notes: (1) This add minimum white padding to an 8 bpp pix, such that the centroid of the photometric inverse is in the center of the resulting image. Thus in computing the centroid, black pixels have weight 255, and white pixels have weight 0.
Input: pixs (any depth, can be cmapped)
boxa (of boxes, to paint)
val (rgba color to paint)
Return: pixd (with painted boxes), or null on error
Notes: (1) If pixs is 1 bpp or is colormapped, it is converted to 8 bpp and the boxa is painted using a colormap; otherwise, it is converted to 32 bpp rgb. (2) There are several ways to display a box on an image:
Input: pixs (any depth, can be cmapped)
boxa (of boxes, to paint)
Return: pixd (with painted boxes), or null on error
Notes: (1) If pixs is 1 bpp, we paint the boxa using a colormap; otherwise, we convert to 32 bpp. (2) We use up to 254 different colors for painting the regions. (3) If boxes overlap, the later ones paint over earlier ones.
| LEPT_DLL l_int32 pixPaintSelfThroughMask | ( | PIX * | pixd, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | searchdir, | ||
| l_int32 | mindist, | ||
| l_int32 | tilesize, | ||
| l_int32 | ntiles, | ||
| l_int32 | distblend | ||
| ) |
Input: pixd (8 bpp gray or 32 bpp rgb; not colormapped)
pixm (1 bpp mask)
x, y (origin of pixm relative to pixd; must not be negative)
searchdir (L_HORIZ, L_VERT or L_BOTH_DIRECTIONS)
mindist (min distance of nearest tile edge to box; >= 0)
tilesize (requested size for tiling; may be reduced)
ntiles (number of tiles tested in each row/column)
distblend (distance outside the fg used for blending with pixs)
Return: 0 if OK; 1 on error
Notes: (1) In-place operation; pixd is changed. (2) If pixm == NULL, it's a no-op. (3) The mask origin is placed at (x,y) on pixd, and the operation is clipped to the intersection of pixd and the fg of the mask. (4) @tsize is the the requested size for tiling. The actual actual size for each c.c. will be bounded by the minimum dimension of the c.c. (5) For @mindist, @searchdir and @ntiles, see pixFindRepCloseTile(). They determine the set of possible tiles that can be used to build a larger mirrored tile to paint onto pixd through the c.c. of pixm. (6) @distblend is used for alpha blending. It is only applied if there is exactly one c.c. in the mask. Use distblend == 0 to skip blending and just paint through the 1 bpp mask. (7) To apply blending to more than 1 component, call this function repeatedly with @pixm, @x and @y representing one component of the mask each time. This would be done as follows, for an underlying image pixs and mask pixm of components to fill: Boxa *boxa = pixConnComp(pixm, &pixa, 8); n = boxaGetCount(boxa); for (i = 0; i < n; i++) { Pix *pix = pixaGetPix(pixa, i, L_CLONE); Box *box = pixaGetBox(pixa, i, L_CLONE); boxGetGeometry(box, &bx, &by, &bw, &bh); pixPaintSelfThroughMask(pixs, pix, bx, by, searchdir, mindist, tilesize, ntiles, distblend); pixDestroy(&pix); boxDestroy(&box); } pixaDestroy(&pixa); boxaDestroy(&boxa); (8) If no tiles can be found, this falls back to estimating the color near the boundary of the region to be textured. (9) This can be used to replace the pixels in some regions of an image by selected neighboring pixels. The mask represents the pixels to be replaced. For each connected component in the mask, this function selects up to two tiles of neighboring pixels to be used for replacement of pixels represented by the component (i.e., under the FG of that component in the mask). After selection, mirror replication is used to generate an image that is large enough to cover the component. Alpha blending can also be used outside of the component, but near the edge, to blur the transition between painted and original pixels.
Input: pixd (1, 2, 4, 8, 16 or 32 bpp; or colormapped)
pixm (<optional> 1 bpp mask)
x, y (origin of pixm relative to pixd; can be negative)
val (pixel value to set at each masked pixel)
Return: 0 if OK; 1 on error
Notes: (1) In-place operation. Calls pixSetMaskedCmap() for colormapped images. (2) For 1, 2, 4, 8 and 16 bpp gray, we take the appropriate number of least significant bits of val. (3) If pixm == NULL, it's a no-op. (4) The mask origin is placed at (x,y) on pixd, and the operation is clipped to the intersection of rectangles. (5) For rgb, the components in val are in the canonical locations, with red in location COLOR_RED, etc. (6) Implementation detail 1: For painting with val == 0 or val == maxval, you can use rasterop. If val == 0, invert the mask so that it's 0 over the region into which you want to write, and use PIX_SRC & PIX_DST to clear those pixels. To write with val = maxval (all 1's), use PIX_SRC | PIX_DST to set all bits under the mask. (7) Implementation detail 2: The rasterop trick can be used for depth > 1 as well. For val == 0, generate the mask for depth d from the binary mask using pixmd = pixUnpackBinary(pixm, d, 1); and use pixRasterop() with PIX_MASK. For val == maxval, pixmd = pixUnpackBinary(pixm, d, 0); and use pixRasterop() with PIX_PAINT. But note that if d == 32 bpp, it is about 3x faster to use the general implementation (not pixRasterop()). (8) Implementation detail 3: It might be expected that the switch in the inner loop will cause large branching delays and should be avoided. This is not the case, because the entrance is always the same and the compiler can correctly predict the jump.
Input: pixs (any depth)
pta (set of points on which to plot)
outformat (GPLOT_PNG, GPLOT_PS, GPLOT_EPS, GPLOT_X11,
GPLOT_LATEX)
title (<optional> for plot; can be null)
Return: 0 if OK, 1 on error
Notes: (1) We remove any existing colormap and clip the pta to the input pixs. (2) This is a debugging function, and does not remove temporary plotting files that it generates. (3) If the image is RGB, three separate plots are generated.
Input: fp (file stream)
pix
text (<optional> identifying string; can be null)
Return: 0 if OK, 1 on error
| LEPT_DLL SARRAY* pixProcessBarcodes | ( | PIX * | pixs, |
| l_int32 | format, | ||
| l_int32 | method, | ||
| SARRAY ** | psaw, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixs (any depth)
format (L_BF_ANY, L_BF_CODEI2OF5, L_BF_CODE93, ...)
method (L_USE_WIDTHS, L_USE_WINDOWS)
&saw (<optional return> sarray of bar widths)
debugflag (use 1 to generate debug output)
Return: sarray (text of barcodes), or null if none found or on error
Input: pixs (all depths; colormap ok)
vc (vector of 8 coefficients for projective transformation)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary (2) Removes any existing colormap, if necessary, before transforming
Input: pixs (32 bpp)
vc (vector of 8 coefficients for projective transformation)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Input: pixs (8 bpp)
vc (vector of 8 coefficients for projective transformation)
grayval (0 to bring in BLACK, 255 for WHITE)
Return: pixd, or null on error
Input: pixs (all depths; colormap ok)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary (2) Removes any existing colormap, if necessary, before transforming
Input: pixs (32 bpp)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Input: pixs (8 bpp)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
grayval (0 to bring in BLACK, 255 for WHITE)
Return: pixd, or null on error
| LEPT_DLL PIX* pixProjectivePtaWithAlpha | ( | PIX * | pixs, |
| PTA * | ptad, | ||
| PTA * | ptas, | ||
| PIX * | pixg, | ||
| l_float32 | fract, | ||
| l_int32 | border | ||
| ) |
Input: pixs (32 bpp rgb)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
pixg (<optional> 8 bpp, for alpha channel, can be null)
fract (between 0.0 and 1.0, with 0.0 fully transparent
and 1.0 fully opaque)
border (of pixels added to capture transformed source pixels)
Return: pixd, or null on error
Notes: (1) The alpha channel is transformed separately from pixs, and aligns with it, being fully transparent outside the boundary of the transformed pixs. For pixels that are fully transparent, a blending function like pixBlendWithGrayMask() will give zero weight to corresponding pixels in pixs. (2) If pixg is NULL, it is generated as an alpha layer that is partially opaque, using @fract. Otherwise, it is cropped to pixs if required and @fract is ignored. The alpha channel in pixs is never used. (3) Colormaps are removed. (4) When pixs is transformed, it doesn't matter what color is brought in because the alpha channel will be transparent (0) there. (5) To avoid losing source pixels in the destination, it may be necessary to add a border to the source pix before doing the projective transformation. This can be any non-negative number. (6) The input @ptad and @ptas are in a coordinate space before the border is added. Internally, we compensate for this before doing the projective transform on the image after the border is added. (7) The default setting for the border values in the alpha channel is 0 (transparent) for the outermost ring of pixels and (0.5 * fract * 255) for the second ring. When blended over a second image, this (a) shrinks the visible image to make a clean overlap edge with an image below, and (b) softens the edges by weakening the aliasing there. Use l_setAlphaMaskBorder() to change these values.
Input: pixs (all depths)
vc (vector of 8 coefficients for projective transformation)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary. (2) Retains colormap, which you can do for a sampled transform.. (3) For 8 or 32 bpp, much better quality is obtained by the somewhat slower pixProjective(). See that function for relative timings between sampled and interpolated.
Input: pixs (all depths)
ptad (4 pts of final coordinate space)
ptas (4 pts of initial coordinate space)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Brings in either black or white pixels from the boundary. (2) Retains colormap, which you can do for a sampled transform.. (3) No 3 of the 4 points may be collinear. (4) For 8 and 32 bpp pix, better quality is obtained by the somewhat slower pixProjectivePta(). See that function for relative timings between sampled and interpolated.
| LEPT_DLL PIX* pixQuadraticVShear | ( | PIX * | pixs, |
| l_int32 | dir, | ||
| l_int32 | vmaxt, | ||
| l_int32 | vmaxb, | ||
| l_int32 | operation, | ||
| l_int32 | incolor | ||
| ) |
| LEPT_DLL PIX* pixQuadraticVShearLI | ( | PIX * | pixs, |
| l_int32 | dir, | ||
| l_int32 | vmaxt, | ||
| l_int32 | vmaxb, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs (8 or 32 bpp, or colormapped)
dir (L_WARP_TO_LEFT or L_WARP_TO_RIGHT)
vmaxt (max vertical displacement at edge and at top)
vmaxb (max vertical displacement at edge and at bottom)
incolor (L_BRING_IN_WHITE or L_BRING_IN_BLACK)
Return: pixd (stretched), or null on error
Notes: (1) See pixQuadraticVShear() for details.
| LEPT_DLL PIX* pixQuadraticVShearSampled | ( | PIX * | pixs, |
| l_int32 | dir, | ||
| l_int32 | vmaxt, | ||
| l_int32 | vmaxb, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs (1, 8 or 32 bpp)
dir (L_WARP_TO_LEFT or L_WARP_TO_RIGHT)
vmaxt (max vertical displacement at edge and at top)
vmaxb (max vertical displacement at edge and at bottom)
incolor (L_BRING_IN_WHITE or L_BRING_IN_BLACK)
Return: pixd (stretched), or null on error
Notes: (1) See pixQuadraticVShear() for details.
Input: pixs (8 bpp, no colormap)
nlevels (in quadtree; max allowed depends on image size)
*pix_ma (input mean accumulator; can be null)
*pfpixa (<return> mean values in quadtree)
Return: 0 if OK, 1 on error
Notes: (1) The returned fpixa has @nlevels of fpix, each containing the mean values at its level. Level 0 has a single value; level 1 has 4 values; level 2 has 16; etc.
| LEPT_DLL l_int32 pixQuadtreeVariance | ( | PIX * | pixs, |
| l_int32 | nlevels, | ||
| PIX * | pix_ma, | ||
| DPIX * | dpix_msa, | ||
| FPIXA ** | pfpixa_v, | ||
| FPIXA ** | pfpixa_rv | ||
| ) |
Input: pixs (8 bpp, no colormap)
nlevels (in quadtree)
*pix_ma (input mean accumulator; can be null)
*dpix_msa (input mean square accumulator; can be null)
*pfpixa_v (<optional return> variance values in quadtree)
*pfpixa_rv (<optional return> root variance values in quadtree)
Return: 0 if OK, 1 on error
Notes: (1) The returned fpixav and fpixarv have @nlevels of fpix, each containing at the respective levels the variance and root variance values.
| LEPT_DLL PIX* pixQuantFromCmap | ( | PIX * | pixs, |
| PIXCMAP * | cmap, | ||
| l_int32 | mindepth, | ||
| l_int32 | level, | ||
| l_int32 | metric | ||
| ) |
Input: pixs (8 bpp grayscale without cmap, or 32 bpp rgb)
cmap (to quantize to; insert copy into dest pix)
mindepth (minimum depth of pixd: can be 2, 4 or 8 bpp)
level (of octcube used for finding nearest color in cmap)
metric (L_MANHATTAN_DISTANCE, L_EUCLIDEAN_DISTANCE)
Return: pixd (2, 4 or 8 bpp, colormapped), or null on error
Notes: (1) This is a top-level wrapper for quantizing either grayscale or rgb images to a specified colormap. (2) The actual output depth is constrained by @mindepth and by the number of colors in @cmap. (3) For grayscale, @level and @metric are ignored. (4) If the cmap has color and pixs is grayscale, the color is removed from the cmap before quantizing pixs.
| LEPT_DLL l_int32 pixQuantizeIfFewColors | ( | PIX * | pixs, |
| l_int32 | maxcolors, | ||
| l_int32 | mingraycolors, | ||
| l_int32 | octlevel, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (8 bpp gray or 32 bpp rgb)
maxcolors (max number of colors allowed to be returned
from pixColorsForQuantization(); use 0 for default)
mingraycolors (min number of gray levels that a grayscale
image is quantized to; use 0 for default)
octlevel (for octcube quantization: 3 or 4)
&pixd (<return> 2,4 or 8 bpp quantized; null if too many colors)
Return: 0 if OK, 1 on error or if pixs can't be quantized into
a small number of colors.
Notes: (1) This is a wrapper that tests if the pix can be quantized with good quality using a small number of colors. If so, it does the quantization, defining a colormap and using pixels whose value is an index into the colormap. (2) If the image has color, it is quantized with 8 bpp pixels. If the image is essentially grayscale, the pixels are either 4 or 8 bpp, depending on the size of the required colormap. (3) @octlevel = 4 generates a larger colormap and larger compressed image than @octlevel = 3. If image quality is important, you should use @octlevel = 4. (4) If the image already has a colormap, it returns a clone.
| LEPT_DLL PIX* pixRandomHarmonicWarp | ( | PIX * | pixs, |
| l_float32 | xmag, | ||
| l_float32 | ymag, | ||
| l_float32 | xfreq, | ||
| l_float32 | yfreq, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_uint32 | seed, | ||
| l_int32 | grayval | ||
| ) |
Input: pixs (8 bpp; no colormap)
xmag, ymag (maximum magnitude of x and y distortion)
xfreq, yfreq (maximum magnitude of x and y frequency)
nx, ny (number of x and y harmonic terms)
seed (of random number generator)
grayval (color brought in from the outside;
0 for black, 255 for white)
Return: pixd (8 bpp; no colormap), or null on error
Notes: (1) To generate the warped image p(x',y'), set up the transforms that are in getWarpTransform(). For each (x',y') in the dest, the warp function computes the originating location (x, y) in the src. The differences (x - x') and (y - y') are given as a sum of products of sinusoidal terms. Each term is multiplied by a maximum amplitude (in pixels), and the angle is determined by a frequency and phase, and depends on the (x', y') value of the dest. Random numbers with a variable input seed are used to allow the warping to be unpredictable. A linear interpolation is used to find the value for the source at (x, y); this value is written into the dest. (2) This can be used to generate 'captcha's, which are somewhat randomly distorted images of text. A typical set of parameters for a captcha are: xmag = 4.0 ymag = 6.0 xfreq = 0.10 yfreq = 0.13 nx = 3 ny = 3 Other examples can be found in prog/warptest.c.
| LEPT_DLL PIX* pixRankBinByStrip | ( | PIX * | pixs, |
| l_int32 | direction, | ||
| l_int32 | size, | ||
| l_int32 | nbins, | ||
| l_int32 | type | ||
| ) |
Input: pixs (32 bpp or cmapped)
direction (L_SCAN_HORIZONTAL or L_SCAN_VERTICAL)
size (of strips in scan direction)
nbins (number of equal population bins; must be > 1)
type (color selection flag)
Return: pixd (result), or null on error
Notes: (1) This generates a pix where each column represents a strip of the input image. If @direction == L_SCAN_HORIZONTAL, the input impage is tiled into vertical strips of width @size, where @size is a compromise between getting better spatial columnwise resolution (small @size) and getting better columnwise statistical information (larger @size). Likewise with rows of the image if @direction == L_SCAN_VERTICAL. (2) For L_HORIZONTAL_SCAN, the output pix contains rank binned median colors in each column that correspond to a vertical strip of width @size in the input image. (3) The color selection flag is one of: L_SELECT_RED, L_SELECT_GREEN, L_SELECT_BLUE, L_SELECT_MIN, L_SELECT_MAX, L_SELECT_AVERAGE. It determines how the rank ordering is done. (4) Typical input values might be @size = 5, @nbins = 10.
Input: pixs (8 bpp; no colormap)
Return: pixd (with pixels sorted in each column, from
min to max value)
Notes: (1) The time is O(n) in the number of pixels and runs about 50 Mpixels/sec on a 3 GHz machine.
Input: pixs (8 or 32 bpp; no colormap)
wf, hf (width and height of filter; each is >= 1)
rank (in [0.0 ... 1.0])
Return: pixd (of rank values), or null on error
Notes: (1) This defines, for each pixel in pixs, a neighborhood of pixels given by a rectangle "centered" on the pixel. This set of wf*hf pixels has a distribution of values. For each component, if the values are sorted in increasing order, we choose the component such that rank*(wf*hf-1) pixels have a lower or equal value and (1-rank)*(wf*hf-1) pixels have an equal or greater value. (2) See notes in pixRankFilterGray() for further details.
Input: pixs (8 bpp; no colormap)
wf, hf (width and height of filter; each is >= 1)
rank (in [0.0 ... 1.0])
Return: pixd (of rank values), or null on error
Notes: (1) This defines, for each pixel in pixs, a neighborhood of pixels given by a rectangle "centered" on the pixel. This set of wf*hf pixels has a distribution of values, and if they are sorted in increasing order, we choose the pixel such that rank*(wf*hf-1) pixels have a lower or equal value and (1-rank)*(wf*hf-1) pixels have an equal or greater value. (2) By this definition, the rank = 0.0 pixel has the lowest value, and the rank = 1.0 pixel has the highest value. (3) We add mirrored boundary pixels to avoid boundary effects, and put the filter center at (0, 0). (4) This dispatches to grayscale erosion or dilation if the filter dimensions are odd and the rank is 0.0 or 1.0, rsp. (5) Returns a copy if both wf and hf are 1. (6) Uses row-major or column-major incremental updates to the histograms depending on whether hf > wf or hv <= wf, rsp.
Input: pixs (32 bpp)
wf, hf (width and height of filter; each is >= 1)
rank (in [0.0 ... 1.0])
Return: pixd (of rank values), or null on error
Notes: (1) This defines, for each pixel in pixs, a neighborhood of pixels given by a rectangle "centered" on the pixel. This set of wf*hf pixels has a distribution of values. For each component, if the values are sorted in increasing order, we choose the component such that rank*(wf*hf-1) pixels have a lower or equal value and (1-rank)*(wf*hf-1) pixels have an equal or greater value. (2) Apply gray rank filtering to each component independently. (3) See notes in pixRankFilterGray() for further details.
| LEPT_DLL PIX* pixRankFilterWithScaling | ( | PIX * | pixs, |
| l_int32 | wf, | ||
| l_int32 | hf, | ||
| l_float32 | rank, | ||
| l_float32 | scalefactor | ||
| ) |
Input: pixs (8 or 32 bpp; no colormap)
wf, hf (width and height of filter; each is >= 1)
rank (in [0.0 ... 1.0])
scalefactor (scale factor; must be >= 0.2 and <= 0.7)
Return: pixd (of rank values), or null on error
Notes: (1) This is a convenience function that downscales, does the rank filtering, and upscales. Because the down- and up-scaling functions are very fast compared to rank filtering, the time it takes is reduced from that for the simple rank filtering operation by approximately the square of the scaling factor.
| LEPT_DLL l_int32 pixRankHaustest | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| PIX * | pix3, | ||
| PIX * | pix4, | ||
| l_float32 | delx, | ||
| l_float32 | dely, | ||
| l_int32 | maxdiffw, | ||
| l_int32 | maxdiffh, | ||
| l_int32 | area1, | ||
| l_int32 | area3, | ||
| l_float32 | rank, | ||
| l_int32 * | tab8 | ||
| ) |
Input: pix1 (new pix, not dilated)
pix2 (new pix, dilated)
pix3 (exemplar pix, not dilated)
pix4 (exemplar pix, dilated)
delx (x comp of centroid difference)
dely (y comp of centroid difference)
maxdiffw (max width difference of pix1 and pix2)
maxdiffh (max height difference of pix1 and pix2)
area1 (fg pixels in pix1)
area3 (fg pixels in pix3)
rank (rank value of test, each way)
tab8 (table of pixel sums for byte)
Return: 0 (FALSE) if no match, 1 (TRUE) if the new
pix is in the same class as the exemplar.
Note: we check first that the two pix are roughly the same size. Only if they meet that criterion do we compare the bitmaps. We convert the rank value to a number of pixels by multiplying the rank fraction by the number of pixels in the undilated image. The Hausdorff is a 2-way check. The centroid difference is used to align the two images to the nearest integer for each of the checks. The rank hausdorff checks that the dilated image of one contains the rank fraction of the pixels of the undilated image of the other. Checks are done in both direction. Failure of the test in either direction results in failure of the test.
Input: pixs (8 bpp; no colormap)
Return: pixd (with pixels sorted in each row, from
min to max value)
Notes: (1) The time is O(n) in the number of pixels and runs about 100 Mpixels/sec on a 3 GHz machine.
| LEPT_DLL l_int32 pixRasterop | ( | PIX * | pixd, |
| l_int32 | dx, | ||
| l_int32 | dy, | ||
| l_int32 | dw, | ||
| l_int32 | dh, | ||
| l_int32 | op, | ||
| PIX * | pixs, | ||
| l_int32 | sx, | ||
| l_int32 | sy | ||
| ) |
Input: pixd (dest pix)
dx (x val of UL corner of dest rectangle)
dy (y val of UL corner of dest rectangle)
dw (width of dest rectangle)
dh (height of dest rectangle)
op (op code)
pixs (src pix)
sx (x val of UL corner of src rectangle)
sy (y val of UL corner of src rectangle)
Return: 0 if OK; 1 on error.
Notes: (1) This has the standard set of 9 args for rasterop. This function is your friend; it is worth memorizing! (2) If the operation involves only dest, this calls rasteropUniLow(). Otherwise, checks depth of the src and dest, and if they match, calls rasteropLow(). (3) For the two-image operation, where both pixs and pixd are defined, they are typically different images. However there are cases, such as pixSetMirroredBorder(), where in-place operations can be done, blitting pixels from one part of pixd to another. Consequently, we permit such operations. If you use them, be sure that there is no overlap between the source and destination rectangles in pixd (!)
There are 18 operations, described by the op codes in pix.h.
One, PIX_DST, is a no-op.
Three, PIX_CLR, PIX_SET, and PIX_NOT(PIX_DST) operate only on the dest. These are handled by the low-level rasteropUniLow().
The other 14 involve the both the src and the dest, and depend on the bit values of either just the src or the bit values of both src and dest. They are handled by rasteropLow():
PIX_SRC s
PIX_NOT(PIX_SRC) ~s
PIX_SRC | PIX_DST s | d
PIX_SRC & PIX_DST s & d
PIX_SRC ^ PIX_DST s ^ d
PIX_NOT(PIX_SRC) | PIX_DST ~s | d
PIX_NOT(PIX_SRC) & PIX_DST ~s & d
PIX_NOT(PIX_SRC) ^ PIX_DST ~s ^ d
PIX_SRC | PIX_NOT(PIX_DST) s | ~d
PIX_SRC & PIX_NOT(PIX_DST) s & ~d
PIX_SRC ^ PIX_NOT(PIX_DST) s ^ ~d
PIX_NOT(PIX_SRC | PIX_DST) ~(s | d)
PIX_NOT(PIX_SRC & PIX_DST) ~(s & d)
PIX_NOT(PIX_SRC ^ PIX_DST) ~(s ^ d)
Each of these is implemented with one of three low-level functions, depending on the alignment of the left edge of the src and dest rectangles:
Of the 14 binary rasterops above, only 12 are unique logical combinations (out of a possible 16) of src and dst bits:
(sd) (11) (10) (01) (00)
s 1 1 0 0 ~s 0 1 0 1 s | d 1 1 1 0 s & d 1 0 0 0 s ^ d 0 1 1 0 ~s | d 1 0 1 1 ~s & d 0 0 1 0 ~s ^ d 1 0 0 1 s | ~d 1 1 0 1 s & ~d 0 1 0 0 s ^ ~d 1 0 0 1 ~(s | d) 0 0 0 1 ~(s & d) 0 1 1 1 ~(s ^ d) 1 0 0 1
Note that the following three operations are equivalent: ~(s ^ d) ~s ^ d s ^ ~d and in the implementation, we call them out with the first form; namely, ~(s ^ d).
Of the 16 possible binary combinations of src and dest bits, the remaining 4 unique ones are independent of the src bit. They depend on either just the dest bit or on neither the src nor dest bits:
d 1 0 1 0 (indep. of s) ~d 0 1 0 1 (indep. of s) CLR 0 0 0 0 (indep. of both s & d) SET 1 1 1 1 (indep. of both s & d)
As mentioned above, three of these are implemented by rasteropUniLow(), and one is a no-op.
How can these operation codes be represented by bits in such a way that when the basic operations are performed on the bits the results are unique for unique operations, and mimic the logic table given above?
The answer is to choose a particular order of the pairings: (sd) (11) (10) (01) (00) (which happens to be the same as in the above table) and to translate the result into 4-bit representations of s and d. For example, the Sun rasterop choice (omitting the extra bit for clipping) is
PIX_SRC 0xc PIX_DST 0xa
This corresponds to our pairing order given above: (sd) (11) (10) (01) (00) where for s = 1 we get the bit pattern PIX_SRC: 1 1 0 0 (0xc) and for d = 1 we get the pattern PIX_DST: 1 0 1 0 (0xa)
OK, that's the pairing order that Sun chose. How many different ways can we assign bit patterns to PIX_SRC and PIX_DST to get the boolean ops to work out? Any of the 4 pairs can be put in the first position, any of the remaining 3 pairs can go in the second; and one of the remaining 2 pairs can go the the third. There is a total of 4*3*2 = 24 ways these pairs can be permuted.
Input: pixd
pixs
op (any of the op-codes)
Return: 0 if OK; 1 on error
Notes:
| LEPT_DLL l_int32 pixRasteropHip | ( | PIX * | pixd, |
| l_int32 | by, | ||
| l_int32 | bh, | ||
| l_int32 | hshift, | ||
| l_int32 | incolor | ||
| ) |
Input: pixd (in-place operation)
by (top of horizontal band)
bh (height of horizontal band)
hshift (horizontal shift of band; hshift > 0 is to right)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: 0 if OK; 1 on error
Notes: (1) This rasterop translates a horizontal band of the image either left or right, bringing in either white or black pixels from outside the image. (2) The horizontal band extends the full width of pixd. (3) If a colormap exists, the nearest color to white or black is brought in.
Input: pixd (in-place translation)
hshift (horizontal shift; hshift > 0 is to right)
vshift (vertical shift; vshift > 0 is down)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixRasteropVip | ( | PIX * | pixd, |
| l_int32 | bx, | ||
| l_int32 | bw, | ||
| l_int32 | vshift, | ||
| l_int32 | incolor | ||
| ) |
Input: pixd (in-place)
bx (left edge of vertical band)
bw (width of vertical band)
vshift (vertical shift of band; vshift > 0 is down)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: 0 if OK; 1 on error
Notes: (1) This rasterop translates a vertical band of the image either up or down, bringing in either white or black pixels from outside the image. (2) The vertical band extends the full height of pixd. (3) If a colormap exists, the nearest color to white or black is brought in.
Input: filename (with full pathname or in local directory) Return: pix if OK; null on error
Notes: (1) See at top of file for supported formats.
| LEPT_DLL SARRAY* pixReadBarcodes | ( | PIXA * | pixa, |
| l_int32 | format, | ||
| l_int32 | method, | ||
| SARRAY ** | psaw, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixa (of 8 bpp deskewed and cropped barcodes)
format (L_BF_ANY, L_BF_CODEI2OF5, L_BF_CODE93, ...)
method (L_USE_WIDTHS, L_USE_WINDOWS);
&saw (<optional return> sarray of bar widths)
debugflag (use 1 to generate debug output)
Return: sa (sarray of widths, one string for each barcode found),
or null on error
Input: pixs (of 8 bpp deskewed and cropped barcode)
method (L_USE_WIDTHS, L_USE_WINDOWS);
debugflag (use 1 to generate debug output)
Return: na (numa of widths (each in set {1,2,3,4}), or null on error
| LEPT_DLL l_int32 pixReadHeader | ( | const char * | filename, |
| l_int32 * | pformat, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | piscmap | ||
| ) |
Input: filename (with full pathname or in local directory)
&format (<optional return> file format)
&w, &h (<optional returns> width and height)
&bps <optional return> bits/sample
&spp <optional return> samples/pixel (1, 3 or 4)
&iscmap (<optional return> 1 if cmap exists; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) This reads the actual headers for jpeg, png, tiff and pnm. For bmp and gif, we cheat and read the entire file into a pix, from which we extract the "header" information.
| LEPT_DLL l_int32 pixReadHeaderMem | ( | const l_uint8 * | data, |
| size_t | size, | ||
| l_int32 * | pformat, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | piscmap | ||
| ) |
Input: data (const; encoded)
datasize (size of data)
&format (<optional returns> image format)
&w, &h (<optional returns> width and height)
&bps <optional return> bits/sample
&spp <optional return> samples/pixel (1, 3 or 4)
&iscmap (<optional return> 1 if cmap exists; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) This reads the actual headers for jpeg, png, tiff, jp2k and pnm. For bmp and gif, we cheat and read all the data into a pix, from which we extract the "header" information. (2) The amount of data required depends on the format. For png, it requires less than 30 bytes, but for jpeg it can require most of the compressed file. In practice, the data is typically the entire compressed file in memory. (3) findFileFormatBuffer() requires up to 8 bytes to decide on the format, which we require.
Input: sarray (of full pathnames)
index (into pathname array)
Return: pix if OK; null if not found
Notes: (1) This function is useful for selecting image files from a directory, where the integer @index is embedded into the file name. (2) This is typically done by generating the sarray using getNumberedPathnamesInDirectory(), so that the @index pathname would have the number @index in it. The size of the sarray should be the largest number (plus 1) appearing in the file names, respecting the constraints in the call to getNumberedPathnamesInDirectory(). (3) Consequently, for some indices into the sarray, there may be no pathnames in the directory containing that number. By convention, we place empty C strings ("") in those locations in the sarray, and it is not an error if such a string is encountered and no pix is returned. Therefore, the caller must verify that a pix is returned. (4) See convertSegmentedPagesToPS() in src/psio1.c for an example of usage.
| LEPT_DLL PIX* pixReadJp2k | ( | const char * | filename, |
| l_uint32 | reduction, | ||
| BOX * | box, | ||
| l_int32 | hint, | ||
| l_int32 | debug | ||
| ) |
| LEPT_DLL PIX* pixReadJpeg | ( | const char * | filename, |
| l_int32 | cmapflag, | ||
| l_int32 | reduction, | ||
| l_int32 * | pnwarn, | ||
| l_int32 | hint | ||
| ) |
Input: filename
cmapflag (0 for no colormap in returned pix;
1 to return an 8 bpp cmapped pix if spp = 3 or 4)
reduction (scaling factor: 1, 2, 4 or 8)
&nwarn (<optional return> number of warnings about
corrupted data)
hint (a bitwise OR of L_JPEG_* values; 0 for default)
Return: pix, or null on error
Notes: (1) This is a special function for reading jpeg files. (2) Use this if you want the jpeg library to create an 8 bpp colormapped image. (3) Images reduced by factors of 2, 4 or 8 can be returned significantly faster than full resolution images. (4) If the jpeg data is bad, the jpeg library will continue silently, or return warnings, or attempt to exit. Depending on the severity of the data corruption, there are two possible outcomes: (a) a possibly damaged pix can be generated, along with zero or more warnings, or (b) the library will attempt to exit (caught by our error handler) and no pix will be returned. If a pix is generated with at least one warning of data corruption, and if L_JPEG_FAIL_ON_BAD_DATA is included in @hint, no pix will be returned. (5) The possible hint values are given in the enum in imageio.h:
Input: data (const; encoded)
datasize (size of data)
Return: pix, or null on error
Notes: (1) This is a variation of pixReadStream(), where the data is read from a memory buffer rather than a file. (2) On windows, this only reads tiff formatted files directly from memory. For other formats, it write to a temp file and decompress from file. (3) findFileFormatBuffer() requires up to 12 bytes to decide on the format. That determines the constraint here. But in fact the data must contain the entire compressed string for the image.
Input: cdata (const; bmp-encoded)
size (of data)
Return: pix, or null on error
Notes: (1) The @size byte of @data must be a null character.
| LEPT_DLL PIX* pixReadMemJp2k | ( | const l_uint8 * | data, |
| size_t | size, | ||
| l_uint32 | reduction, | ||
| BOX * | box, | ||
| l_int32 | hint, | ||
| l_int32 | debug | ||
| ) |
| LEPT_DLL PIX* pixReadMemJpeg | ( | const l_uint8 * | data, |
| size_t | size, | ||
| l_int32 | cmflag, | ||
| l_int32 | reduction, | ||
| l_int32 * | pnwarn, | ||
| l_int32 | hint | ||
| ) |
Input: data (const; jpeg-encoded)
size (of data)
colormap flag (0 means return RGB image if color;
1 means create a colormap and return
an 8 bpp colormapped image if color)
reduction (scaling factor: 1, 2, 4 or 8)
&nwarn (<optional return> number of warnings)
hint (a bitwise OR of L_JPEG_* values; 0 for default)
Return: pix, or null on error
Notes: (1) The @size byte of @data must be a null character. (2) The only hint flag so far is L_JPEG_READ_LUMINANCE, given in the enum in imageio.h. (3) See pixReadJpeg() for usage.
Input: cdata (const; png-encoded)
size (of data)
Return: pix, or null on error
Notes: (1) The @size byte of @data must be a null character.
Input: cdata (const; pnm-encoded)
size (of data)
Return: pix, or null on error
Notes: (1) The @size byte of @data must be a null character.
Input: data (const; uncompressed)
size (of data)
Return: pix, or null on error
Input: data (const; tiff-encoded)
datasize (size of data)
n (page image number: 0-based)
Return: pix, or null on error
Notes: (1) This is a version of pixReadTiff(), where the data is read from a memory buffer and uncompressed. (2) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream. (3) No warning messages on failure, because of how multi-page TIFF reading works. You are supposed to keep trying until it stops working.
Input: fp (file stream)
hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint)
Return: pix if OK; null on error
Notes: (1) The hint only applies to jpeg.
Input: stream opened for read Return: pix, or null on error
Notes: (1) Here are references on the bmp file format: http://en.wikipedia.org/wiki/BMP_file_format http://www.fortunecity.com/skyscraper/windows/364/bmpffrmt.html
| LEPT_DLL PIX* pixReadStreamJp2k | ( | FILE * | fp, |
| l_uint32 | reduction, | ||
| BOX * | box, | ||
| l_int32 | hint, | ||
| l_int32 | debug | ||
| ) |
| LEPT_DLL PIX* pixReadStreamJpeg | ( | FILE * | fp, |
| l_int32 | cmapflag, | ||
| l_int32 | reduction, | ||
| l_int32 * | pnwarn, | ||
| l_int32 | hint | ||
| ) |
Input: stream
cmapflag (0 for no colormap in returned pix;
1 to return an 8 bpp cmapped pix if spp = 3 or 4)
reduction (scaling factor: 1, 2, 4 or 8)
&nwarn (<optional return> number of warnings)
hint (a bitwise OR of L_JPEG_* values; 0 for default)
Return: pix, or null on error
Usage: see pixReadJpeg() Notes: (1) The jpeg comment, if it exists, is not stored in the pix.
Input: stream Return: pix, or null on error
Notes: (1) If called from pixReadStream(), the stream is positioned at the beginning of the file. (2) To do sequential reads of png format images from a stream, use pixReadStreamPng() (3) Any image with alpha is converted to RGBA (spp = 4, with equal red, green and blue channels) on reading. There are three important cases with alpha: (a) grayscale-with-alpha (spp = 2), where bpp = 8, and each pixel has an associated alpha (transparency) value in the second component of the image data. (b) spp = 1, d = 1 with colormap and alpha in the trans array. Transparency is usually associated with the white background. (c) spp = 1, d = 8 with colormap and alpha in the trans array. Each color in the colormap has a separate transparency value. (4) We use the high level png interface, where the transforms are set up in advance and the header and image are read with a single call. The more complicated interface, where the header is read first and the buffers for the raster image are user- allocated before reading the image, works for single images, but I could not get it to work properly for the successive png reads that are required by pixaReadStream().
Input: stream opened for read Return: pix, or null on error
Input: stream Return: pix, or null on error.
Notes: (1) If called from pixReadStream(), the stream is positioned at the beginning of the file.
Input: stream
n (page number: 0 based)
Return: pix, or null on error (e.g., if the page number is invalid)
Notes: (1) No warning messages on failure, because of how multi-page TIFF reading works. You are supposed to keep trying until it stops working.
Input: filename
page number (0 based)
Return: pix, or null on error
Notes: (1) This is a version of pixRead(), specialized for tiff files, that allows specification of the page to be returned (2) No warning messages on failure, because of how multi-page TIFF reading works. You are supposed to keep trying until it stops working.
Input: filename (with full pathname or in local directory)
hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint)
Return: pix if OK; null on error
Notes: (1) The hint is not binding, but may be used to optimize jpeg decoding. Use 0 for no hinting.
Input: pixs
tab (<optional>; if null, a table is made here
and destroyed before exit)
Return: pixd (2x subsampled), or null on error
Notes: (1) After folding, the data is in bytes 0 and 2 of the word, and the bits in each byte are in the following order (with 0 being the leftmost originating pair and 7 being the rightmost originating pair): 0 4 1 5 2 6 3 7 These need to be permuted to 0 1 2 3 4 5 6 7 which is done with an 8-bit table generated by makeSubsampleTab2x().
Input: pixs (1 bpp)
level (rank threshold: 1, 2, 3, 4)
intab (<optional>; if null, a table is made here
and destroyed before exit)
Return: pixd (1 bpp, 2x rank threshold reduced), or null on error
Notes: (1) pixd is downscaled by 2x from pixs. (2) The rank threshold specifies the minimum number of ON pixels in each 2x2 region of pixs that are required to set the corresponding pixel ON in pixd. (3) Rank filtering is done to the UL corner of each 2x2 pixel block, using only logical operations. Then these pixels are chosen in the 2x subsampling process, subsampled, as described above in pixReduceBinary2().
| LEPT_DLL PIX* pixReduceRankBinaryCascade | ( | PIX * | pixs, |
| l_int32 | level1, | ||
| l_int32 | level2, | ||
| l_int32 | level3, | ||
| l_int32 | level4 | ||
| ) |
Input: pixs (1 bpp)
level1, ... level 4 (thresholds, in the set {0, 1, 2, 3, 4})
Return: pixd, or null on error
Notes: (1) This performs up to four cascaded 2x rank reductions. (2) Use level = 0 to truncate the cascade.
Input: pixs (any depth)
Return: pixd (if 32 bpp rgba, pixs blended over a white background;
a clone of pixs otherwise), and null on error
Notes: (1) This is a wrapper on pixAlphaBlendUniform()
Input: pixs (all depths; colormap ok)
npix (number to be removed from each of the 4 sides)
Return: pixd (with pixels removed around border), or null on error
Input: pixs (1 bpp)
filling connectivity (4 or 8)
Return: pixd (all pixels in the src that are not touching the
border) or null on error
Notes: (1) This removes all fg components touching the border.
| LEPT_DLL PIX* pixRemoveBorderGeneral | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: pixs (all depths; colormap ok)
left, right, top, bot (number of pixels removed)
Return: pixd (with pixels removed around border), or null on error
Input: pixs (all depths; colormap ok)
wd (target width; use 0 if only removing from height)
hd (target height; use 0 if only removing from width)
Return: pixd (with pixels removed around border), or null on error
Notes: (1) Removes pixels as evenly as possible from the sides of the image, leaving the central part. (2) Returns clone if no pixels requested removed, or the target sizes are larger than the image.
Input: pixs (see restrictions below)
type (REMOVE_CMAP_TO_BINARY,
REMOVE_CMAP_TO_GRAYSCALE,
REMOVE_CMAP_TO_FULL_COLOR,
REMOVE_CMAP_WITH_ALPHA,
REMOVE_CMAP_BASED_ON_SRC)
Return: pixd (without colormap), or null on error
Notes: (1) If pixs does not have a colormap, a clone is returned. (2) Otherwise, the input pixs is restricted to 1, 2, 4 or 8 bpp. (3) Use REMOVE_CMAP_TO_BINARY only on 1 bpp pix. (4) For grayscale conversion from RGB, use a weighted average of RGB values, and always return an 8 bpp pix, regardless of whether the input pixs depth is 2, 4 or 8 bpp. (5) REMOVE_CMAP_TO_FULL_COLOR ignores the alpha component and returns a 32 bpp pix with spp == 3 and the alpha bytes are 0. (6) For REMOVE_CMAP_BASED_ON_SRC, if there is no color, this returns either a 1 bpp or 8 bpp grayscale pix. If there is color, this returns a 32 bpp pix, with either:
Input: pixs (any depth, with or without colormap)
type (REMOVE_CMAP_TO_BINARY,
REMOVE_CMAP_TO_GRAYSCALE,
REMOVE_CMAP_TO_FULL_COLOR,
REMOVE_CMAP_WITH_ALPHA,
REMOVE_CMAP_BASED_ON_SRC)
ifnocmap (L_CLONE, L_COPY)
Return: pixd (always a new pix; without colormap), or null on error
Notes: (1) Convenience function that allows choice between returning a clone or a copy if pixs does not have a colormap. (2) See pixRemoveColormap().
| LEPT_DLL l_int32 pixRemoveMatchedPattern | ( | PIX * | pixs, |
| PIX * | pixp, | ||
| PIX * | pixe, | ||
| l_int32 | x0, | ||
| l_int32 | y0, | ||
| l_int32 | dsize | ||
| ) |
Input: pixs (input image, 1 bpp)
pixp (pattern to be removed from image, 1 bpp)
pixe (image after erosion by Sel that approximates pixp, 1 bpp)
x0, y0 (center of Sel)
dsize (number of pixels on each side by which pixp is
dilated before being subtracted from pixs;
valid values are {0, 1, 2, 3, 4})
Return: 0 if OK, 1 on error
Notes: (1) This is in-place. (2) You can use various functions in selgen to create a Sel that is used to generate pixe from pixs. (3) This function is applied after pixe has been computed. It finds the centroid of each c.c., and subtracts (the appropriately dilated version of) pixp, with the center of the Sel used to align pixp with pixs.
| LEPT_DLL PIX* pixRemoveSeededComponents | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| PIX * | pixm, | ||
| l_int32 | connectivity, | ||
| l_int32 | bordersize | ||
| ) |
Input: pixd (<optional>; this can be null or equal to pixm; 1 bpp)
pixs (1 bpp seed)
pixm (1 bpp filling mask)
connectivity (4 or 8)
bordersize (amount of border clearing)
Return: pixd, or null on error
Notes: (1) This removes each component in pixm for which there is at least one seed in pixs. If pixd == NULL, this returns the result in a new pixd. Otherwise, it is an in-place operation on pixm. In no situation is pixs altered, because we do the filling with a copy of pixs. (2) If bordersize > 0, it also clears all pixels within a distance @bordersize of the edge of pixd. This is here because pixLocalExtrema() typically finds local minima at the border. Use @bordersize >= 2 to remove these.
Input: pixs (colormapped) Return: 0 if OK, 1 on error
Notes: (1) This is an in-place operation. (2) If the image doesn't have a colormap, returns without error. (3) Unusued colors are removed from the colormap, and the image pixels are re-numbered.
Input: pixs (1 bpp pix from which components are removed; in-place)
pixa (of connected components in pixs)
na (numa indicator: remove components corresponding to 1s)
Return: 0 if OK, 1 on error
Notes: (1) This complements pixAddWithIndicator(). Here, the selected components are set subtracted from pixs.
Input: pix
box
width (thickness of box lines)
op (one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS)
Return: 0 if OK, 1 on error
Input: pix
boxa
width (thickness of line)
op (one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderBoxaArb | ( | PIX * | pix, |
| BOXA * | boxa, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval | ||
| ) |
Input: pix
boxa
width (thickness of line)
rval, gval, bval
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderBoxaBlend | ( | PIX * | pix, |
| BOXA * | boxa, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval, | ||
| l_float32 | fract, | ||
| l_int32 | removedups | ||
| ) |
Input: pix
boxa
width (thickness of line)
rval, gval, bval
fract (in [0.0 - 1.0]; complete transparency (no effect)
if 0.0; no transparency if 1.0)
removedups (1 to remove; 0 otherwise)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderBoxArb | ( | PIX * | pix, |
| BOX * | box, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval | ||
| ) |
Input: pix (any depth, cmapped ok)
box
width (thickness of box lines)
rval, gval, bval
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderBoxBlend | ( | PIX * | pix, |
| BOX * | box, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval, | ||
| l_float32 | fract | ||
| ) |
Input: pix
box
width (thickness of box lines)
rval, gval, bval
fract (in [0.0 - 1.0]; complete transparency (no effect)
if 0.0; no transparency if 1.0)
Return: 0 if OK, 1 on error
Input: pixs (8 or 16 bpp; no colormap)
startval (value of lowest contour; must be in [0 ... maxval])
incr (increment to next contour; must be > 0)
outdepth (either 1 or depth of pixs)
Return: pixd, or null on error
Notes: (1) The output can be either 1 bpp, showing just the contour lines, or a copy of the input pixs with the contour lines superposed.
| LEPT_DLL l_int32 pixRenderGridArb | ( | PIX * | pix, |
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval | ||
| ) |
Input: pix (any depth, cmapped ok)
nx, ny (number of rectangles in each direction)
width (thickness of grid lines)
rval, gval, bval
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderHashBox | ( | PIX * | pix, |
| BOX * | box, | ||
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline, | ||
| l_int32 | op | ||
| ) |
Input: pix
box
spacing (spacing between lines; must be > 1)
width (thickness of box and hash lines)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
op (one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderHashBoxa | ( | PIX * | pix, |
| BOXA * | boxa, | ||
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline, | ||
| l_int32 | op | ||
| ) |
Input: pix
boxa
spacing (spacing between lines; must be > 1)
width (thickness of box and hash lines)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
op (one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderHashBoxaArb | ( | PIX * | pix, |
| BOXA * | boxa, | ||
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pix
boxa
spacing (spacing between lines; must be > 1)
width (thickness of box and hash lines)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
rval, gval, bval
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderHashBoxaBlend | ( | PIX * | pix, |
| BOXA * | boxa, | ||
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_float32 | fract | ||
| ) |
Input: pix
boxa
spacing (spacing between lines; must be > 1)
width (thickness of box and hash lines)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
rval, gval, bval
fract (in [0.0 - 1.0]; complete transparency (no effect)
if 0.0; no transparency if 1.0)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderHashBoxArb | ( | PIX * | pix, |
| BOX * | box, | ||
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pix
box
spacing (spacing between lines; must be > 1)
width (thickness of box and hash lines)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
rval, gval, bval
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderHashBoxBlend | ( | PIX * | pix, |
| BOX * | box, | ||
| l_int32 | spacing, | ||
| l_int32 | width, | ||
| l_int32 | orient, | ||
| l_int32 | outline, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval, | ||
| l_float32 | fract | ||
| ) |
Input: pix
box
spacing (spacing between lines; must be > 1)
width (thickness of box and hash lines)
orient (orientation of lines: L_HORIZONTAL_LINE, ...)
outline (0 to skip drawing box outline)
rval, gval, bval
fract (in [0.0 - 1.0]; complete transparency (no effect)
if 0.0; no transparency if 1.0)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderLine | ( | PIX * | pix, |
| l_int32 | x1, | ||
| l_int32 | y1, | ||
| l_int32 | x2, | ||
| l_int32 | y2, | ||
| l_int32 | width, | ||
| l_int32 | op | ||
| ) |
Input: pix
x1, y1
x2, y2
width (thickness of line)
op (one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderLineArb | ( | PIX * | pix, |
| l_int32 | x1, | ||
| l_int32 | y1, | ||
| l_int32 | x2, | ||
| l_int32 | y2, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval | ||
| ) |
Input: pix
x1, y1
x2, y2
width (thickness of line)
rval, gval, bval
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderLineBlend | ( | PIX * | pix, |
| l_int32 | x1, | ||
| l_int32 | y1, | ||
| l_int32 | x2, | ||
| l_int32 | y2, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval, | ||
| l_float32 | fract | ||
| ) |
Input: pix
x1, y1
x2, y2
width (thickness of line)
rval, gval, bval
fract
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRenderPlotFromNuma | ( | PIX ** | ppix, |
| NUMA * | na, | ||
| l_int32 | plotloc, | ||
| l_int32 | linewidth, | ||
| l_int32 | max, | ||
| l_uint32 | color | ||
| ) |
Input: &pix (any type; replaced if not 32 bpp rgb)
numa (to be plotted)
plotloc (location of plot: L_PLOT_AT_TOP, etc)
linewidth (width of "line" that is drawn; between 1 and 7)
max (maximum excursion in pixels from baseline)
color (plot color: 0xrrggbb00)
Return: 0 if OK, 1 on error
Notes: (1) Simplified interface for plotting row or column aligned data on a pix. (2) This replaces @pix with a 32 bpp rgb version if it is not already 32 bpp. It then draws the plot on the pix. (3) See makePlotPtaFromNumaGen() for more details.
| LEPT_DLL l_int32 pixRenderPlotFromNumaGen | ( | PIX ** | ppix, |
| NUMA * | na, | ||
| l_int32 | orient, | ||
| l_int32 | linewidth, | ||
| l_int32 | refpos, | ||
| l_int32 | max, | ||
| l_int32 | drawref, | ||
| l_uint32 | color | ||
| ) |
Input: &pix (any type; replaced if not 32 bpp rgb)
numa (to be plotted)
orient (L_HORIZONTAL_LINE, L_VERTICAL_LINE)
linewidth (width of "line" that is drawn; between 1 and 7)
refpos (reference position: y for horizontal and x for vertical)
max (maximum excursion in pixels from baseline)
drawref (1 to draw the reference line and the normal to it)
color (plot color: 0xrrggbb00)
Return: 0 if OK, 1 on error
Notes: (1) General interface for plotting row or column aligned data on a pix. (2) This replaces @pix with a 32 bpp rgb version if it is not already 32 bpp. It then draws the plot on the pix. (3) See makePlotPtaFromNumaGen() for other input parameters.
Input: ptas (of vertices, none repeated)
width (of polygon outline)
&xmin (<optional return> min x value of input pts)
&ymin (<optional return> min y value of input pts)
Return: pix (1 bpp, with outline generated), or null on error
Notes: (1) The pix is the minimum size required to contain the origin and the polygon. For example, the max x value of the input points is w - 1, where w is the pix width. (2) The rendered line is 4-connected, so that an interior or exterior 8-c.c. flood fill operation works properly.
| LEPT_DLL l_int32 pixRenderPolyline | ( | PIX * | pix, |
| PTA * | ptas, | ||
| l_int32 | width, | ||
| l_int32 | op, | ||
| l_int32 | closeflag | ||
| ) |
Input: pix
ptas
width (thickness of line)
op (one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS)
closeflag (1 to close the contour; 0 otherwise)
Return: 0 if OK, 1 on error
Note: this renders a closed contour.
| LEPT_DLL l_int32 pixRenderPolylineArb | ( | PIX * | pix, |
| PTA * | ptas, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval, | ||
| l_int32 | closeflag | ||
| ) |
Input: pix
ptas
width (thickness of line)
rval, gval, bval
closeflag (1 to close the contour; 0 otherwise)
Return: 0 if OK, 1 on error
Note: this renders a closed contour.
| LEPT_DLL l_int32 pixRenderPolylineBlend | ( | PIX * | pix, |
| PTA * | ptas, | ||
| l_int32 | width, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval, | ||
| l_float32 | fract, | ||
| l_int32 | closeflag, | ||
| l_int32 | removedups | ||
| ) |
Input: pix
ptas
width (thickness of line)
rval, gval, bval
fract (in [0.0 - 1.0]; complete transparency (no effect)
if 0.0; no transparency if 1.0)
closeflag (1 to close the contour; 0 otherwise)
removedups (1 to remove; 0 otherwise)
Return: 0 if OK, 1 on error
Input: pix
pta (arbitrary set of points)
op (one of L_SET_PIXELS, L_CLEAR_PIXELS, L_FLIP_PIXELS)
Return: 0 if OK, 1 on error
Notes: (1) L_SET_PIXELS puts all image bits in each pixel to 1 (black for 1 bpp; white for depth > 1) (2) L_CLEAR_PIXELS puts all image bits in each pixel to 0 (white for 1 bpp; black for depth > 1) (3) L_FLIP_PIXELS reverses all image bits in each pixel (4) This function clips the rendering to the pix. It performs clipping for functions such as pixRenderLine(), pixRenderBox() and pixRenderBoxa(), that call pixRenderPta().
Input: pix (any depth, cmapped ok)
pta (arbitrary set of points)
rval, gval, bval
Return: 0 if OK, 1 on error
Notes: (1) If pix is colormapped, render this color (or the nearest color if the cmap is full) on each pixel. (2) If pix is not colormapped, do the best job you can using the input colors:
| LEPT_DLL l_int32 pixRenderPtaBlend | ( | PIX * | pix, |
| PTA * | pta, | ||
| l_uint8 | rval, | ||
| l_uint8 | gval, | ||
| l_uint8 | bval, | ||
| l_float32 | fract | ||
| ) |
Input: pix (32 bpp rgb)
pta (arbitrary set of points)
rval, gval, bval
Return: 0 if OK, 1 on error
Notes: (1) This function clips the rendering to the pix.
| LEPT_DLL PIX* pixRenderRandomCmapPtaa | ( | PIX * | pix, |
| PTAA * | ptaa, | ||
| l_int32 | polyflag, | ||
| l_int32 | width, | ||
| l_int32 | closeflag | ||
| ) |
Input: pix (1, 2, 4, 8, 16, 32 bpp)
ptaa
polyflag (1 to interpret each Pta as a polyline; 0 to simply
render the Pta as a set of pixels)
width (thickness of line; use only for polyline)
closeflag (1 to close the contour; 0 otherwise;
use only for polyline mode)
Return: pixd (cmapped, 8 bpp) or null on error
Notes: (1) This is a debugging routine, that displays a set of pixels, selected by the set of Ptas in a Ptaa, in a random color in a pix. (2) If @polyflag == 1, each Pta is considered to be a polyline, and is rendered using @width and @closeflag. Each polyline is rendered in a random color. (3) If @polyflag == 0, all points in each Pta are rendered in a random color. The @width and @closeflag parameters are ignored. (4) The output pix is 8 bpp and colormapped. Up to 254 different, randomly selected colors, can be used. (5) The rendered pixels replace the input pixels. They will be clipped silently to the input pix.
Input: pixd (gets new uninitialized buffer for image data)
pixs (determines the size of the buffer; not changed)
Return: 0 if OK, 1 on error
Notes: (1) This removes any existing image data from pixd and allocates an uninitialized buffer that will hold the amount of image data that is in pixs.
Input: pixs (1, 2, 4, 8, 16, 32 bpp; colormap ok)
pixt (can be null; we use only the size)
w, h (ignored if pixt is defined)
Return: pixd (resized to match) or null on error
Notes: (1) This resizes pixs to make pixd, without scaling, by either cropping or extending separately in both width and height. Extension is done by replicating the last row or column. This is useful in a situation where, due to scaling operations, two images that are expected to be the same size can differ slightly in each dimension. (2) You can use either an existing pixt or specify both @w and @h. If pixt is defined, the values in @w and @h are ignored. (3) If pixt is larger than pixs (or if w and/or d is larger than the dimension of pixs, replicate the outer row and column of pixels in pixs into pixd.
| LEPT_DLL NUMA* pixReversalProfile | ( | PIX * | pixs, |
| l_float32 | fract, | ||
| l_int32 | dir, | ||
| l_int32 | first, | ||
| l_int32 | last, | ||
| l_int32 | minreversal, | ||
| l_int32 | factor1, | ||
| l_int32 | factor2 | ||
| ) |
| LEPT_DLL PIX* pixRotate | ( | PIX * | pixs, |
| l_float32 | angle, | ||
| l_int32 | type, | ||
| l_int32 | incolor, | ||
| l_int32 | width, | ||
| l_int32 | height | ||
| ) |
Input: pixs (1, 2, 4, 8, 32 bpp rgb)
angle (radians; clockwise is positive)
type (L_ROTATE_AREA_MAP, L_ROTATE_SHEAR, L_ROTATE_SAMPLING)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
width (original width; use 0 to avoid embedding)
height (original height; use 0 to avoid embedding)
Return: pixd, or null on error
Notes: (1) This is a high-level, simple interface for rotating images about their center. (2) For very small rotations, just return a clone. (3) Rotation brings either white or black pixels in from outside the image. (4) The rotation type is adjusted if necessary for the image depth and size of rotation angle. For 1 bpp images, we rotate either by shear or sampling. (5) Colormaps are removed for rotation by area mapping. (6) The dest can be expanded so that no image pixels are lost. To invoke expansion, input the original width and height. For repeated rotation, use of the original width and height allows the expansion to stop at the maximum required size, which is a square with side = sqrt(w*w + h*h).
*** Warning: implicit assumption about RGB component ordering ***
Input: pixd (<optional>; can be null, equal to pixs,
or different from pixs)
pixs (all depths)
Return: pixd, or null on error
Notes: (1) This does a 180 rotation of the image about the center, which is equivalent to a left-right flip about a vertical line through the image center, followed by a top-bottom flip about a horizontal line through the image center. (2) There are 3 cases for input: (a) pixd == null (creates a new pixd) (b) pixd == pixs (in-place operation) (c) pixd != pixs (existing pixd) (3) For clarity, use these three patterns, respectively: (a) pixd = pixRotate180(NULL, pixs); (b) pixRotate180(pixs, pixs); (c) pixRotate180(pixd, pixs);
| LEPT_DLL PIX* pixRotate2Shear | ( | PIX * | pixs, |
| l_int32 | xcen, | ||
| l_int32 | ycen, | ||
| l_float32 | angle, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs
xcen, ycen (center of rotation)
angle (radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, or null on error.
Notes: (1) This rotates the image about the given point, using the 2-shear method. It should only be used for angles smaller than MAX_2_SHEAR_ANGLE. For larger angles, a warning is issued. (2) A positive angle gives a clockwise rotation. (3) 2-shear rotation by a specified angle is equivalent to the sequential transformations x' = x + tan(angle) * (y - ycen) for x-shear y' = y + tan(angle) * (x - xcen) for y-shear (4) Computation of tan(angle) is performed within the shear operation. (5) This brings in 'incolor' pixels from outside the image. (6) If the image has an alpha layer, it is rotated separately by two shears.
| LEPT_DLL PIX* pixRotate3Shear | ( | PIX * | pixs, |
| l_int32 | xcen, | ||
| l_int32 | ycen, | ||
| l_float32 | angle, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs
xcen, ycen (center of rotation)
angle (radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, or null on error.
Notes: (1) This rotates the image about the given point, using the 3-shear method. It should only be used for angles smaller than LIMIT_SHEAR_ANGLE. For larger angles, a warning is issued. (2) A positive angle gives a clockwise rotation. (3) 3-shear rotation by a specified angle is equivalent to the sequential transformations y' = y + tan(angle/2) * (x - xcen) for first y-shear x' = x + sin(angle) * (y - ycen) for x-shear y' = y + tan(angle/2) * (x - xcen) for second y-shear (4) Computation of tan(angle) is performed in the shear operations. (5) This brings in 'incolor' pixels from outside the image. (6) If the image has an alpha layer, it is rotated separately by two shears. (7) The algorithm was published by Alan Paeth: "A Fast Algorithm for General Raster Rotation," Graphics Interface '86, pp. 77-81, May 1986. A description of the method, along with an implementation, can be found in Graphics Gems, p. 179, edited by Andrew Glassner, published by Academic Press, 1990.
Input: pixs (all depths)
direction (1 = clockwise, -1 = counter-clockwise)
Return: pixd, or null on error
Notes: (1) This does a 90 degree rotation of the image about the center, either cw or ccw, returning a new pix. (2) The direction must be either 1 (cw) or -1 (ccw).
Input: pixs (2, 4, 8 bpp gray or colormapped, or 32 bpp RGB)
angle (radians; clockwise is positive)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Rotates about image center. (2) A positive angle gives a clockwise rotation. (3) Brings in either black or white pixels from the boundary.
Input: pixs (32 bpp)
angle (radians; clockwise is positive)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Notes: (1) Rotates about image center. (2) A positive angle gives a clockwise rotation. (3) Specify the color to be brought in from outside the image.
Input: pixs
angle (radians; clockwise is positive)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Notes: (1) Rotates the image about the UL corner. (2) A positive angle gives a clockwise rotation. (3) Specify the color to be brought in from outside the image.
Input: pixs
angle (radians; clockwise is positive)
colorval (e.g., 0 to bring in BLACK, 0xffffff00 for WHITE)
Return: pixd, or null on error
Notes: (1) This rotates a color image about the image center. (2) A positive angle gives a clockwise rotation. (3) It uses area mapping, dividing each pixel into 16 subpixels. (4) It is about 10% to 20% faster than the more accurate linear interpolation function pixRotateAMColor(), which uses 256 subpixels. (5) For some reason it shifts the image center. No attempt is made to rotate the alpha component.
*** Warning: implicit assumption about RGB component ordering ***
Input: pixs (1, 2, 4, 8 bpp gray or colormapped, or 32 bpp RGB)
angle (radians; clockwise is positive)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) Rotates about the UL corner of the image. (2) A positive angle gives a clockwise rotation. (3) Brings in either black or white pixels from the boundary.
Input: pixs (8 bpp)
angle (radians; clockwise is positive)
grayval (0 to bring in BLACK, 255 for WHITE)
Return: pixd, or null on error
Notes: (1) Rotates about image center. (2) A positive angle gives a clockwise rotation. (3) Specify the grayvalue to be brought in from outside the image.
Input: pixs
angle (radians; clockwise is positive)
grayval (0 to bring in BLACK, 255 for WHITE)
Return: pixd, or null on error
Notes: (1) Rotates the image about the UL corner. (2) A positive angle gives a clockwise rotation. (3) Specify the grayvalue to be brought in from outside the image.
Input: pixs (1 bpp)
angle (radians; clockwise is positive; about the center)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) For very small rotations, just return a clone. (2) This does a computationally expensive rotation of 1 bpp images. The fastest rotators (using shears or subsampling) leave visible horizontal and vertical shear lines across which the image shear changes by one pixel. To ameliorate the visual effect one can introduce random dithering. One way to do this in a not-too-random fashion is given here. We convert to 8 bpp, do a very small blur, rotate using linear interpolation (same as area mapping), do a small amount of sharpening to compensate for the initial blur, and threshold back to binary. The shear lines are magically removed. (3) This operation is about 5x slower than rotation by sampling.
| LEPT_DLL PIX* pixRotateBySampling | ( | PIX * | pixs, |
| l_int32 | xcen, | ||
| l_int32 | ycen, | ||
| l_float32 | angle, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs (1, 2, 4, 8, 16, 32 bpp rgb; can be cmapped)
xcen (x value of center of rotation)
ycen (y value of center of rotation)
angle (radians; clockwise is positive)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Notes: (1) For very small rotations, just return a clone. (2) Rotation brings either white or black pixels in from outside the image. (3) Colormaps are retained.
Input: pixs (all depths)
quads (0-3; number of 90 degree cw rotations)
Return: pixd, or null on error
| LEPT_DLL PIX* pixRotateShear | ( | PIX * | pixs, |
| l_int32 | xcen, | ||
| l_int32 | ycen, | ||
| l_float32 | angle, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs
xcen (x value for which there is no horizontal shear)
ycen (y value for which there is no vertical shear)
angle (radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, or null on error.
Notes: (1) This rotates an image about the given point, using either 2 or 3 shears. (2) A positive angle gives a clockwise rotation. (3) This brings in 'incolor' pixels from outside the image. (4) For rotation angles larger than about 0.35 radians, we issue a warning because you should probably be using another method (either sampling or area mapping)
Input: pixs
angle (radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error
Input: pixs
angle (radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 pixRotateShearIP | ( | PIX * | pixs, |
| l_int32 | xcen, | ||
| l_int32 | ycen, | ||
| l_float32 | angle, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs (any depth; not colormapped)
xcen, ycen (center of rotation)
angle (radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: 0 if OK; 1 on error
Notes: (1) This does an in-place rotation of the image about the specified point, using the 3-shear method. It should only be used for angles smaller than LIMIT_SHEAR_ANGLE. For larger angles, a warning is issued. (2) A positive angle gives a clockwise rotation. (3) 3-shear rotation by a specified angle is equivalent to the sequential transformations y' = y + tan(angle/2) * (x - xcen) for first y-shear x' = x + sin(angle) * (y - ycen) for x-shear y' = y + tan(angle/2) * (x - xcen) for second y-shear (4) Computation of tan(angle) is performed in the shear operations. (5) This brings in 'incolor' pixels from outside the image. (6) The pix cannot be colormapped, because the in-place operation only blits in 0 or 1 bits, not an arbitrary colormap index.
Input: pixs (32 bpp rgb or cmapped)
angle (radians; clockwise is positive)
pixg (<optional> 8 bpp, can be null)
fract (between 0.0 and 1.0, with 0.0 fully transparent
and 1.0 fully opaque)
Return: pixd (32 bpp rgba), or null on error
Notes: (1) The alpha channel is transformed separately from pixs, and aligns with it, being fully transparent outside the boundary of the transformed pixs. For pixels that are fully transparent, a blending function like pixBlendWithGrayMask() will give zero weight to corresponding pixels in pixs. (2) Rotation is about the center of the image; for very small rotations, just return a clone. The dest is automatically expanded so that no image pixels are lost. (3) Rotation is by area mapping. It doesn't matter what color is brought in because the alpha channel will be transparent (black) there. (4) If pixg is NULL, it is generated as an alpha layer that is partially opaque, using @fract. Otherwise, it is cropped to pixs if required and @fract is ignored. The alpha channel in pixs is never used. (4) Colormaps are removed to 32 bpp. (5) The default setting for the border values in the alpha channel is 0 (transparent) for the outermost ring of pixels and (0.5 * fract * 255) for the second ring. When blended over a second image, this (a) shrinks the visible image to make a clean overlap edge with an image below, and (b) softens the edges by weakening the aliasing there. Use l_setAlphaMaskBorder() to change these values. (6) A subtle use of gamma correction is to remove gamma correction before rotation and restore it afterwards. This is done by sandwiching this function between a gamma/inverse-gamma photometric transform: pixt = pixGammaTRCWithAlpha(NULL, pixs, 1.0 / gamma, 0, 255); pixd = pixRotateWithAlpha(pixt, angle, NULL, fract); pixGammaTRCWithAlpha(pixd, pixd, gamma, 0, 255); pixDestroy(&pixt); This has the side-effect of producing artifacts in the very dark regions.
*** Warning: implicit assumption about RGB component ordering ***
| LEPT_DLL l_int32 pixRowStats | ( | PIX * | pixs, |
| BOX * | box, | ||
| NUMA ** | pnamean, | ||
| NUMA ** | pnamedian, | ||
| NUMA ** | pnamode, | ||
| NUMA ** | pnamodecount, | ||
| NUMA ** | pnavar, | ||
| NUMA ** | pnarootvar | ||
| ) |
Input: pixs (8 bpp; not cmapped)
box (<optional> clipping box; can be null)
&namean (<optional return> numa of mean values)
&namedian (<optional return> numa of median values)
&namode (<optional return> numa of mode intensity values)
&namodecount (<optional return> numa of mode counts)
&navar (<optional return> numa of variance)
&narootvar (<optional return> numa of square root of variance)
Return: na (numa of requested statistic for each row), or null on error
Notes: (1) This computes numas that represent column vectors of statistics, with each of its values derived from the corresponding row of a Pix. (2) Use NULL on input to prevent computation of any of the 5 numas. (3) Other functions that compute pixel row statistics are: pixCountPixelsByRow() pixAverageByRow() pixVarianceByRow() pixGetRowStats()
| LEPT_DLL NUMA* pixRunHistogramMorph | ( | PIX * | pixs, |
| l_int32 | runtype, | ||
| l_int32 | direction, | ||
| l_int32 | maxsize | ||
| ) |
Input: pixs
runtype (L_RUN_OFF, L_RUN_ON)
direction (L_HORIZ, L_VERT)
maxsize (size of largest runlength counted)
Return: numa of run-lengths
Input: pixs (1 bpp)
color (0 for white runs, 1 for black runs)
direction (L_HORIZONTAL_RUNS, L_VERTICAL_RUNS)
depth (8 or 16 bpp)
Return: pixd (8 or 16 bpp), or null on error
Notes: (1) The dest Pix is 8 or 16 bpp, with the pixel values equal to the runlength in which it is a member. The length is clipped to the max pixel value if necessary. (2) The color determines if we're labelling white or black runs. (3) A pixel that is not a member of the chosen color gets value 0; it belongs to a run of length 0 of the chosen color. (4) To convert for maximum dynamic range, either linear or log, use pixMaxDynamicRange().
| LEPT_DLL l_int32 pixSauvolaBinarize | ( | PIX * | pixs, |
| l_int32 | whsize, | ||
| l_float32 | factor, | ||
| l_int32 | addborder, | ||
| PIX ** | ppixm, | ||
| PIX ** | ppixsd, | ||
| PIX ** | ppixth, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (8 bpp grayscale; not colormapped)
whsize (window half-width for measuring local statistics)
factor (factor for reducing threshold due to variance; >= 0)
addborder (1 to add border of width (@whsize + 1) on all sides)
&pixm (<optional return> local mean values)
&pixsd (<optional return> local standard deviation values)
&pixth (<optional return> threshold values)
&pixd (<optional return> thresholded image)
Return: 0 if OK, 1 on error
Notes: (1) The window width and height are 2 * @whsize + 1. The minimum value for @whsize is 2; typically it is >= 7.. (2) The local statistics, measured over the window, are the average and standard deviation. (3) The measurements of the mean and standard deviation are performed inside a border of (@whsize + 1) pixels. If pixs does not have these added border pixels, use @addborder = 1 to add it here; otherwise use @addborder = 0. (4) The Sauvola threshold is determined from the formula: t = m * (1 - k * (1 - s / 128)) where: t = local threshold m = local mean k = @factor (>= 0) [ typ. 0.35 ] s = local standard deviation, which is maximized at 127.5 when half the samples are 0 and half are 255. (5) The basic idea of Niblack and Sauvola binarization is that the local threshold should be less than the median value, and the larger the variance, the closer to the median it should be chosen. Typical values for k are between 0.2 and 0.5.
| LEPT_DLL l_int32 pixSauvolaBinarizeTiled | ( | PIX * | pixs, |
| l_int32 | whsize, | ||
| l_float32 | factor, | ||
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| PIX ** | ppixth, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (8 bpp grayscale, not colormapped)
whsize (window half-width for measuring local statistics)
factor (factor for reducing threshold due to variance; >= 0)
nx, ny (subdivision into tiles; >= 1)
&pixth (<optional return> Sauvola threshold values)
&pixd (<optional return> thresholded image)
Return: 0 if OK, 1 on error
Notes: (1) The window width and height are 2 * @whsize + 1. The minimum value for @whsize is 2; typically it is >= 7.. (2) For nx == ny == 1, this defaults to pixSauvolaBinarize(). (3) Why a tiled version? (a) Because the mean value accumulator is a uint32, overflow can occur for an image with more than 16M pixels. (b) The mean value accumulator array for 16M pixels is 64 MB. The mean square accumulator array for 16M pixels is 128 MB. Using tiles reduces the size of these arrays. (c) Each tile can be processed independently, in parallel, on a multicore processor. (4) The Sauvola threshold is determined from the formula: t = m * (1 - k * (1 - s / 128)) See pixSauvolaBinarize() for details.
Input: pixm (8 bpp grayscale; not colormapped)
pixms (32 bpp)
factor (factor for reducing threshold due to variance; >= 0)
&pixsd (<optional return> local standard deviation)
Return: pixd (8 bpp, sauvola threshold values), or null on error
Notes: (1) The Sauvola threshold is determined from the formula: t = m * (1 - k * (1 - s / 128)) where: t = local threshold m = local mean k = @factor (>= 0) [ typ. 0.35 ] s = local standard deviation, which is maximized at 127.5 when half the samples are 0 and half are 255. (2) See pixSauvolaBinarize() for other details. (3) Important definitions and relations for computing averages: v == pixel value E(p) == expected value of p == average of p over some pixel set S(v) == square of v == v * v mv == E(v) == expected pixel value == mean value ms == E(S(v)) == expected square of pixel values == mean square value var == variance == expected square of deviation from mean == E(S(v - mv)) = E(S(v) - 2 * S(v * mv) + S(mv)) = E(S(v)) - S(mv) = ms - mv * mv s == standard deviation = sqrt(var) So for evaluating the standard deviation in the Sauvola threshold, we take s = sqrt(ms - mv * mv)
| LEPT_DLL l_int32 pixSaveTiled | ( | PIX * | pixs, |
| PIXA * | pixa, | ||
| l_float32 | scalefactor, | ||
| l_int32 | newrow, | ||
| l_int32 | space, | ||
| l_int32 | dp | ||
| ) |
Input: pixs (1, 2, 4, 8, 32 bpp)
pixa (the pix are accumulated here)
scalefactor (0.0 to disable; otherwise this is a scale factor)
newrow (0 if placed on the same row as previous; 1 otherwise)
space (horizontal and vertical spacing, in pixels)
dp (depth of pixa; 8 or 32 bpp; only used on first call)
Return: 0 if OK, 1 on error.
| LEPT_DLL l_int32 pixSaveTiledOutline | ( | PIX * | pixs, |
| PIXA * | pixa, | ||
| l_float32 | scalefactor, | ||
| l_int32 | newrow, | ||
| l_int32 | space, | ||
| l_int32 | linewidth, | ||
| l_int32 | dp | ||
| ) |
Input: pixs (1, 2, 4, 8, 32 bpp)
pixa (the pix are accumulated here)
scalefactor (0.0 to disable; otherwise this is a scale factor)
newrow (0 if placed on the same row as previous; 1 otherwise)
space (horizontal and vertical spacing, in pixels)
linewidth (width of added outline for image; 0 for no outline)
dp (depth of pixa; 8 or 32 bpp; only used on first call)
Return: 0 if OK, 1 on error.
Notes: (1) Before calling this function for the first time, use pixaCreate() to make the @pixa that will accumulate the pix. This is passed in each time pixSaveTiled() is called. (2) @scalefactor scales the input image. After scaling and possible depth conversion, the image is saved in the input pixa, along with a box that specifies the location to place it when tiled later. Disable saving the pix by setting @scalefactor == 0.0. (3) @newrow and @space specify the location of the new pix with respect to the last one(s) that were entered. (4) @dp specifies the depth at which all pix are saved. It can be only 8 or 32 bpp. Any colormap is removed. This is only used at the first invocation. (5) This function uses two variables from call to call. If they were static, the function would not be .so or thread safe, and furthermore, there would be interference with two or more pixa accumulating images at a time. Consequently, we use the first pix in the pixa to store and obtain both the depth and the current position of the bottom (one pixel below the lowest image raster line when laid out using the boxa). The bottom variable is stored in the input format field, which is the only field available for storing an int.
| LEPT_DLL l_int32 pixSaveTiledWithText | ( | PIX * | pixs, |
| PIXA * | pixa, | ||
| l_int32 | outwidth, | ||
| l_int32 | newrow, | ||
| l_int32 | space, | ||
| l_int32 | linewidth, | ||
| L_BMF * | bmf, | ||
| const char * | textstr, | ||
| l_uint32 | val, | ||
| l_int32 | location | ||
| ) |
Input: pixs (1, 2, 4, 8, 32 bpp)
pixa (the pix are accumulated here; as 32 bpp)
outwidth (in pixels; use 0 to disable entirely)
newrow (1 to start a new row; 0 to go on same row as previous)
space (horizontal and vertical spacing, in pixels)
linewidth (width of added outline for image; 0 for no outline)
bmf (<optional> font struct)
textstr (<optional> text string to be added)
val (color to set the text)
location (L_ADD_ABOVE, L_ADD_AT_TOP, L_ADD_AT_BOT, L_ADD_BELOW)
Return: 0 if OK, 1 on error.
Notes: (1) Before calling this function for the first time, use pixaCreate() to make the @pixa that will accumulate the pix. This is passed in each time pixSaveTiled() is called. (2) @outwidth is the scaled width. After scaling, the image is saved in the input pixa, along with a box that specifies the location to place it when tiled later. Disable saving the pix by setting @outwidth == 0. (3) @newrow and @space specify the location of the new pix with respect to the last one(s) that were entered. (4) All pix are saved as 32 bpp RGB. (5) If both @bmf and @textstr are defined, this generates a pix with the additional text; otherwise, no text is written. (6) The text is written before scaling, so it is properly antialiased in the scaled pix. However, if the pix on different calls have different widths, the size of the text will vary. (7) See pixSaveTiledOutline() for other implementation details.
Input: pixs (1, 2, 4, 8, 16 and 32 bpp)
scalex, scaley
Return: pixd, or null on error
This function scales 32 bpp RGB; 2, 4 or 8 bpp palette color; 2, 4, 8 or 16 bpp gray; and binary images.
When the input has palette color, the colormap is removed and the result is either 8 bpp gray or 32 bpp RGB, depending on whether the colormap has color entries. Images with 2, 4 or 16 bpp are converted to 8 bpp.
Because pixScale() is meant to be a very simple interface to a number of scaling functions, including the use of unsharp masking, the type of scaling and the sharpening parameters are chosen by default. Grayscale and color images are scaled using one of four methods, depending on the scale factors: (1) antialiased subsampling (lowpass filtering followed by subsampling, implemented here by area mapping), for scale factors less than 0.2 (2) antialiased subsampling with sharpening, for scale factors between 0.2 and 0.7 (3) linear interpolation with sharpening, for scale factors between 0.7 and 1.4 (4) linear interpolation without sharpening, for scale factors >= 1.4.
One could use subsampling for scale factors very close to 1.0, because it preserves sharp edges. Linear interpolation blurs edges because the dest pixels will typically straddle two src edge pixels. Subsmpling removes entire columns and rows, so the edge is not blurred. However, there are two reasons for not doing this. First, it moves edges, so that a straight line at a large angle to both horizontal and vertical will have noticeable kinks where horizontal and vertical rasters are removed. Second, although it is very fast, you get good results on sharp edges by applying a sharpening filter.
For images with sharp edges, sharpening substantially improves the image quality for scale factors between about 0.2 and about 2.0. pixScale() uses a small amount of sharpening by default because it strengthens edge pixels that are weak due to anti-aliasing. The default sharpening factors are:
However, sharpening is computationally expensive, and one needs to consider the speed-quality tradeoff:
In many situations you will get a satisfactory result by scaling without sharpening: call pixScaleGeneral() with @sharpfract = 0.0. Alternatively, if you wish to sharpen but not use the default value, first call pixScaleGeneral() with @sharpfract = 0.0, and then sharpen explicitly using pixUnsharpMasking().
Binary images are scaled to binary by sampling the closest pixel, without any low-pass filtering (averaging of neighboring pixels). This will introduce aliasing for reductions. Aliasing can be prevented by using pixScaleToGray() instead.
*** Warning: implicit assumption about RGB component order for LI color scaling
| LEPT_DLL l_int32 pixScaleAndTransferAlpha | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_float32 | scalex, | ||
| l_float32 | scaley | ||
| ) |
Input: pixd (32 bpp, scaled image)
pixs (32 bpp, original unscaled image)
scalex, scaley (both > 0.0)
Return: 0 if OK; 1 on error
Notes: (1) This scales the alpha component of pixs and inserts into pixd.
Input: pixs (2, 4, 8 or 32 bpp; and 2, 4, 8 bpp with colormap)
scalex, scaley (must both be <= 0.7)
Return: pixd, or null on error
Notes: (1) This function should only be used when the scale factors are less than or equal to 0.7 (i.e., more than about 1.42x reduction). If either scale factor is larger than 0.7, we issue a warning and invoke pixScale(). (2) This works only on 2, 4, 8 and 32 bpp images. If there is a colormap, it is removed by converting to RGB. In other cases, we issue a warning and invoke pixScale(). (3) It does a relatively expensive area mapping computation, to avoid antialiasing. It is about 2x slower than pixScaleSmooth(), but the results are much better on fine text. (4) This is typically about 20% faster for the special cases of 2x, 4x, 8x and 16x reduction. (5) Surprisingly, there is no speedup (and a slight quality impairment) if you do as many successive 2x reductions as possible, ending with a reduction with a scale factor larger than 0.5.
*** Warning: implicit assumption about RGB component ordering ***
Input: pixs (2, 4, 8 or 32 bpp; and 2, 4, 8 bpp with colormap) Return: pixd, or null on error
Notes: (1) This function does an area mapping (average) for 2x reduction. (2) This works only on 2, 4, 8 and 32 bpp images. If there is a colormap, it is removed by converting to RGB. (3) Speed on 3 GHz processor: Color: 160 Mpix/sec Gray: 700 Mpix/sec This contrasts with the speed of the general pixScaleAreaMap(): Color: 35 Mpix/sec Gray: 50 Mpix/sec (4) From (3), we see that this special function is about 4.5x faster for color and 14x faster for grayscale (5) Consequently, pixScaleAreaMap2() is incorporated into the general area map scaling function, for the special cases of 2x, 4x, 8x and 16x reduction.
Input: pixs (1 bpp)
scalex, scaley (both > 0.0)
Return: pixd, or null on error
Notes: (1) This function samples from the source without filtering. As a result, aliasing will occur for subsampling (scalex and scaley < 1.0).
Input: pixs (1, 2, 4, 8, 16, 32 bpp)
factor (integer subsampling)
Return: pixd, or null on error
Notes: (1) Simple interface to pixScaleBySampling(), for isotropic integer reduction. (2) If @factor == 1, returns a copy.
Input: pixs (1, 2, 4, 8, 16, 32 bpp)
scalex, scaley (both > 0.0)
Return: pixd, or null on error
Notes: (1) This function samples from the source without filtering. As a result, aliasing will occur for subsampling (@scalex and/or @scaley < 1.0). (2) If @scalex == 1.0 and @scaley == 1.0, returns a copy.
Input: pixs (1, 2, 4, 8, 16 and 32 bpp)
wd (target width; use 0 if using height as target)
hd (target height; use 0 if using width as target)
Return: pixd, or null on error
Notes: (1) This guarantees that the output scaled image has the dimension(s) you specify.
Input: pixs (32 bpp, representing rgb) Return: pixd, or null on error
Notes: (1) This is a special case of linear interpolated scaling, for 2x upscaling. It is about 8x faster than using the generic pixScaleColorLI(), and about 4x faster than using the special 2x scale function pixScaleGray2xLI() on each of the three components separately. (2) The speed on intel hardware is about 80 * 10^6 dest-pixels/sec/GHz.
*** Warning: implicit assumption about RGB component ordering ***
Input: pixs (32 bpp, representing rgb) Return: pixd, or null on error
Notes: (1) This is a special case of color linear interpolated scaling, for 4x upscaling. It is about 3x faster than using the generic pixScaleColorLI(). (2) The speed on intel hardware is about 30 * 10^6 dest-pixels/sec/GHz (3) This scales each component separately, using pixScaleGray4xLI(). It would be about 4x faster to inline the color code properly, in analogy to scaleColor4xLILow(), and I leave this as an exercise for someone who really needs it.
Input: pixs (32 bpp, representing rgb)
scalex, scaley (must both be >= 0.7)
Return: pixd, or null on error
Notes: (1) If this is used for scale factors less than 0.7, it will suffer from antialiasing. A warning is issued. Particularly for document images with sharp edges, use pixScaleSmooth() or pixScaleAreaMap() instead. (2) For the general case, it's about 4x faster to manipulate the color pixels directly, rather than to make images out of each of the 3 components, scale each component using the pixScaleGrayLI(), and combine the results back into an rgb image. (3) The speed on intel hardware for the general case (not 2x) is about 10 * 10^6 dest-pixels/sec/GHz. (The special 2x case runs at about 80 * 10^6 dest-pixels/sec/GHz.)
| LEPT_DLL PIX* pixScaleGeneral | ( | PIX * | pixs, |
| l_float32 | scalex, | ||
| l_float32 | scaley, | ||
| l_float32 | sharpfract, | ||
| l_int32 | sharpwidth | ||
| ) |
Input: pixs (1, 2, 4, 8, 16 and 32 bpp)
scalex, scaley (both > 0.0)
sharpfract (use 0.0 to skip sharpening)
sharpwidth (halfwidth of low-pass filter; typ. 1 or 2)
Return: pixd, or null on error
Notes: (1) See pixScale() for usage. (2) This interface may change in the future, as other special cases are added. (3) The actual sharpening factors used depend on the maximum of the two scale factors (maxscale): maxscale <= 0.2: no sharpening 0.2 < maxscale < 1.4: uses the input parameters maxscale >= 1.4: no sharpening (4) To avoid sharpening for grayscale and color images with scaling factors between 0.2 and 1.4, call this function with @sharpfract == 0.0. (5) To use arbitrary sharpening in conjunction with scaling, call this function with @sharpfract = 0.0, and follow this with a call to pixUnsharpMasking() with your chosen parameters.
Input: pixs (8 bpp grayscale, not cmapped) Return: pixd, or null on error
Notes: (1) This is a special case of gray linear interpolated scaling, for 2x upscaling. It is about 6x faster than using the generic pixScaleGrayLI(). (2) The speed on intel hardware is about 100 * 10^6 dest-pixels/sec/GHz
Input: pixs (8 bpp, not cmapped) Return: pixd (1 bpp), or null on error
Notes: (1) This does 2x upscale on pixs, using linear interpolation, followed by Floyd-Steinberg dithering to binary. (2) Buffers are used to avoid making a large grayscale image.
Input: pixs (8 bpp, not cmapped)
thresh (between 0 and 256)
Return: pixd (1 bpp), or null on error
Notes: (1) This does 2x upscale on pixs, using linear interpolation, followed by thresholding to binary. (2) Buffers are used to avoid making a large grayscale image.
Input: pixs (8 bpp grayscale, not cmapped) Return: pixd, or null on error
Notes: (1) This is a special case of gray linear interpolated scaling, for 4x upscaling. It is about 12x faster than using the generic pixScaleGrayLI(). (2) The speed on intel hardware is about 160 * 10^6 dest-pixels/sec/GHz.
Input: pixs (8 bpp, not cmapped) Return: pixd (1 bpp), or null on error
Notes: (1) This does 4x upscale on pixs, using linear interpolation, followed by Floyd-Steinberg dithering to binary. (2) Buffers are used to avoid making a large grayscale image.
Input: pixs (8 bpp)
thresh (between 0 and 256)
Return: pixd (1 bpp), or null on error
Notes: (1) This does 4x upscale on pixs, using linear interpolation, followed by thresholding to binary. (2) Buffers are used to avoid making a large grayscale image. (3) If a full 4x expanded grayscale image can be kept in memory, this function is only about 10% faster than separately doing a linear interpolation to a large grayscale image, followed by thresholding to binary.
Input: pixs (8 bpp grayscale, no cmap)
scalex, scaley (must both be >= 0.7)
Return: pixd, or null on error
This function is appropriate for upscaling (magnification: scale factors > 1), and for a small amount of downscaling (reduction: scale factors > 0.5). For scale factors less than 0.5, the best result is obtained by area mapping, but this is very expensive. So for such large reductions, it is more appropriate to do low pass filtering followed by subsampling, a combination which is effectively a cheap form of area mapping.
Some details follow.
For each pixel in the dest, this does a linear interpolation of 4 neighboring pixels in the src. Specifically, consider the UL corner of src and dest pixels. The UL corner of the dest falls within a src pixel, whose four corners are the UL corners of 4 adjacent src pixels. The value of the dest is taken by linear interpolation using the values of the four src pixels and the distance of the UL corner of the dest from each corner.
If the image is expanded so that the dest pixel is smaller than the src pixel, such interpolation is a reasonable approach. This interpolation is also good for a small image reduction factor that is not more than a 2x reduction.
Note that the linear interpolation algorithm for scaling is identical in form to the area-mapping algorithm for grayscale rotation. The latter corresponds to a translation of each pixel without scaling.
This function is NOT optimal if the scaling involves a large reduction. If the image is significantly reduced, so that the dest pixel is much larger than the src pixels, this interpolation, which is over src pixels only near the UL corner of the dest pixel, is not going to give a good area-mapping average. Because area mapping for image scaling is considerably more computationally intensive than linear interpolation, we choose not to use it. For large image reduction, linear interpolation over adjacent src pixels degenerates asymptotically to subsampling. But subsampling without a low-pass pre-filter causes aliasing by the nyquist theorem. To avoid aliasing, a low-pass filter (e.g., an averaging filter) of size roughly equal to the dest pixel (i.e., the reduction factor) should be applied to the src before subsampling.
As an alternative to low-pass filtering and subsampling for large reduction factors, linear interpolation can also be done between the (widely separated) src pixels in which the corners of the dest pixel lie. This also is not optimal, as it samples src pixels only near the corners of the dest pixel, and it is not implemented.
Summary: (1) If this is used for scale factors less than 0.7, it will suffer from antialiasing. A warning is issued. Particularly for document images with sharp edges, use pixScaleSmooth() or pixScaleAreaMap() instead. (2) The speed on intel hardware for the general case (not 2x) is about 13 * 10^6 dest-pixels/sec/GHz. (The special 2x case runs at about 100 * 10^6 dest-pixels/sec/GHz.)
Input: pixs (8 bpp, not cmapped)
xfact (x downscaling factor; integer)
yfact (y downscaling factor; integer)
type (L_CHOOSE_MIN, L_CHOOSE_MAX, L_CHOOSE_MAX_MIN_DIFF)
Return: pixd (8 bpp)
Notes: (1) The downscaled pixels in pixd are the min, max or (max - min) of the corresponding set of xfact * yfact pixels in pixs. (2) Using L_CHOOSE_MIN is equivalent to a grayscale erosion, using a brick Sel of size (xfact * yfact), followed by subsampling within each (xfact * yfact) cell. Using L_CHOOSE_MAX is equivalent to the corresponding dilation. (3) Using L_CHOOSE_MAX_MIN_DIFF finds the difference between max and min values in each cell. (4) For the special case of downscaling by 2x in both directions, pixScaleGrayMinMax2() is about 2x more efficient.
Input: pixs (8 bpp, not cmapped)
type (L_CHOOSE_MIN, L_CHOOSE_MAX, L_CHOOSE_MAX_MIN_DIFF)
Return: pixd (8 bpp downscaled by 2x)
Notes: (1) Special version for 2x reduction. The downscaled pixels in pixd are the min, max or (max - min) of the corresponding set of 4 pixels in pixs. (2) The max and min operations are a special case (for levels 1 and 4) of grayscale analog to the binary rank scaling operation pixReduceRankBinary2(). Note, however, that because of the photometric definition that higher gray values are lighter, the erosion-like L_CHOOSE_MIN will darken the resulting image, corresponding to a threshold level 1 in the binary case. Likewise, L_CHOOSE_MAX will lighten the pixd, corresponding to a threshold level of 4. (3) To choose any of the four rank levels in a 2x grayscale reduction, use pixScaleGrayRank2(). (4) This runs at about 70 MPix/sec/GHz of source data for erosion and dilation.
Input: pixs (8 bpp, no cmap)
rank (1 (darkest), 2, 3, 4 (lightest))
Return: pixd (8 bpp, downscaled by 2x)
Notes: (1) Rank 2x reduction. If rank == 1(4), the downscaled pixels in pixd are the min(max) of the corresponding set of 4 pixels in pixs. Values 2 and 3 are intermediate. (2) This is the grayscale analog to the binary rank scaling operation pixReduceRankBinary2(). Here, because of the photometric definition that higher gray values are lighter, rank 1 gives the darkest pixel, whereas rank 4 gives the lightest pixel. This is opposite to the binary rank operation. (3) For rank = 1 and 4, this calls pixScaleGrayMinMax2(), which runs at about 70 MPix/sec/GHz of source data. For rank 2 and 3, this runs 3x slower, at about 25 MPix/sec/GHz.
| LEPT_DLL PIX* pixScaleGrayRankCascade | ( | PIX * | pixs, |
| l_int32 | level1, | ||
| l_int32 | level2, | ||
| l_int32 | level3, | ||
| l_int32 | level4 | ||
| ) |
Input: pixs (8 bpp, not cmapped)
level1, ... level4 (rank thresholds, in set {0, 1, 2, 3, 4})
Return: pixd (8 bpp, downscaled by up to 16x)
Notes: (1) This performs up to four cascaded 2x rank reductions. (2) Use level = 0 to truncate the cascade.
Input: pixs (8 bpp grayscale)
factor (integer reduction factor >= 1)
thresh (binarization threshold)
Return: pixd (1 bpp), or null on error
Notes: (1) This does simultaneous subsampling by an integer factor and thresholding from gray to binary. (2) It is designed for maximum speed, and is used for quickly generating a downsized binary image from a higher resolution gray image. This would typically be used for image analysis.
Input: pixs (2, 4, 8 or 32 bpp; with or without colormap)
scalex, scaley (must both be >= 0.7)
Return: pixd, or null on error
Notes: (1) This function should only be used when the scale factors are greater than or equal to 0.7, and typically greater than 1. If either scale factor is smaller than 0.7, we issue a warning and invoke pixScale(). (2) This works on 2, 4, 8, 16 and 32 bpp images, as well as on 2, 4 and 8 bpp images that have a colormap. If there is a colormap, it is removed to either gray or RGB, depending on the colormap. (3) This does a linear interpolation on the src image. (4) It dispatches to much faster implementations for the special cases of 2x and 4x expansion.
*** Warning: implicit assumption about RGB component ordering ***
Input: pixs1 (high res 8 bpp, no cmap)
pixs2 (low res -- 2x reduced -- 8 bpp, no cmap)
scale (reduction with respect to high res image, > 0.5)
Return: 8 bpp pix, scaled down by reduction in each direction,
or NULL on error.
Notes: (1) See notes in pixScaleToGrayMipmap(). (2) This function suffers from aliasing effects that are easily seen in document images.
Input: pixs (32 bpp RGB)
factor (integer reduction factor >= 1)
thresh (binarization threshold)
Return: pixd (1 bpp), or null on error
Notes: (1) This does simultaneous subsampling by an integer factor and conversion from RGB to gray to binary. (2) It is designed for maximum speed, and is used for quickly generating a downsized binary image from a higher resolution RGB image. This would typically be used for image analysis. (3) It uses the green channel to represent the RGB pixel intensity.
Input: pixs (32 bpp rgb)
rwt, gwt, bwt (must sum to 1.0)
Return: pixd, (8 bpp, 2x reduced), or null on error
Input: pixs (32 bpp rgb)
factor (integer reduction factor >= 1)
color (one of COLOR_RED, COLOR_GREEN, COLOR_BLUE)
Return: pixd (8 bpp), or null on error
Notes: (1) This does simultaneous subsampling by an integer factor and extraction of the color from the RGB pix. (2) It is designed for maximum speed, and is used for quickly generating a downsized grayscale image from a higher resolution RGB image. This would typically be used for image analysis. (3) The standard color byte order (RGBA) is assumed.
Input: pixs (2, 4, 8 or 32 bpp; and 2, 4, 8 bpp with colormap)
scalex, scaley (must both be < 0.7)
Return: pixd, or null on error
Notes: (1) This function should only be used when the scale factors are less than or equal to 0.7 (i.e., more than about 1.42x reduction). If either scale factor is larger than 0.7, we issue a warning and invoke pixScale(). (2) This works only on 2, 4, 8 and 32 bpp images, and if there is a colormap, it is removed by converting to RGB. In other cases, we issue a warning and invoke pixScale(). (3) It does simple (flat filter) convolution, with a filter size commensurate with the amount of reduction, to avoid antialiasing. (4) It does simple subsampling after smoothing, which is appropriate for this range of scaling. Linear interpolation gives essentially the same result with more computation for these scale factors, so we don't use it. (5) The result is the same as doing a full block convolution followed by subsampling, but this is faster because the results of the block convolution are only computed at the subsampling locations. In fact, the computation time is approximately independent of the scale factor, because the convolution kernel is adjusted so that each source pixel is summed approximately once.
*** Warning: implicit assumption about RGB component ordering ***
Input: pixs (1 bpp)
scalefactor (reduction: must be > 0.0 and < 1.0)
Return: pixd (8 bpp), scaled down by scalefactor in each direction,
or NULL on error.
Notes:
For faster scaling in the range of scalefactors from 0.0625 to 0.5, with very little difference in quality, use pixScaleToGrayFast().
Binary images have sharp edges, so they intrinsically have very high frequency content. To avoid aliasing, they must be low-pass filtered, which tends to blur the edges. How can we keep relatively crisp edges without aliasing? The trick is to do binary upscaling followed by a power-of-2 scaleToGray. For large reductions, where you don't end up with much detail, some corners can be cut.
The intent here is to get high quality reduced grayscale images with relatively little computation. We do binary pre-scaling followed by scaleToGrayN() for best results, esp. to avoid excess blur when the scale factor is near an inverse power of 2. Where a low-pass filter is required, we use simple convolution kernels: either the hat filter for linear interpolation or a flat filter for larger downscaling. Other choices, such as a perfect bandpass filter with infinite extent (the sinc) or various approximations to it (e.g., lanczos), are unnecessarily expensive.
The choices made are as follows: (1) Do binary upscaling before scaleToGrayN() for scalefactors > 1/8 (2) Do binary downscaling before scaleToGray8() for scalefactors between 1/16 and 1/8. (3) Use scaleToGray16() before grayscale downscaling for scalefactors less than 1/16 Another reasonable choice would be to start binary downscaling for scalefactors below 1/4, rather than below 1/8 as we do here.
The general scaling rules, not all of which are used here, go as follows: (1) For grayscale upscaling, use pixScaleGrayLI(). However, note that edges will be visibly blurred for scalefactors near (but above) 1.0. Replication will avoid edge blur, and should be considered for factors very near 1.0. (2) For grayscale downscaling with a scale factor larger than about 0.7, use pixScaleGrayLI(). For scalefactors near (but below) 1.0, you tread between Scylla and Charybdis. pixScaleGrayLI() again gives edge blurring, but pixScaleBySampling() gives visible aliasing. (3) For grayscale downscaling with a scale factor smaller than about 0.7, use pixScaleSmooth() (4) For binary input images, do as much scale to gray as possible using the special integer functions (2, 3, 4, 8 and 16). (5) It is better to upscale in binary, followed by scaleToGrayN() than to do scaleToGrayN() followed by an upscale using either LI or oversampling. (6) It may be better to downscale in binary, followed by scaleToGrayN() than to first use scaleToGrayN() followed by downscaling. For downscaling between 8x and 16x, this is a reasonable option. (7) For reductions greater than 16x, it's reasonable to use scaleToGray16() followed by further grayscale downscaling.
Input: pixs (1 bpp)
Return: pixd (8 bpp), scaled down by 16x in each direction,
or null on error.
Input: pixs (1 bpp)
Return: pixd (8 bpp), scaled down by 2x in each direction,
or null on error.
Input: pixs (1 bpp)
Return: pixd (8 bpp), scaled down by 3x in each direction,
or null on error.
Notes: (1) Speed is about 100 x 10^6 src-pixels/sec/GHz. Another way to express this is it processes 1 src pixel in about 10 cycles. (2) The width of pixd is truncated is truncated to a factor of 8.
Input: pixs (1 bpp)
Return: pixd (8 bpp), scaled down by 4x in each direction,
or null on error.
Notes: (1) The width of pixd is truncated is truncated to a factor of 2.
Input: pixs (1 bpp)
Return: pixd (8 bpp), scaled down by 6x in each direction,
or null on error.
Notes: (1) The width of pixd is truncated is truncated to a factor of 8.
Input: pixs (1 bpp)
Return: pixd (8 bpp), scaled down by 8x in each direction,
or null on error
Input: pixs (1 bpp)
scalefactor (reduction: must be > 0.0 and < 1.0)
Return: pixd (8 bpp), scaled down by scalefactor in each direction,
or NULL on error.
Notes: (1) See notes in pixScaleToGray() for the basic approach. (2) This function is considerably less expensive than pixScaleToGray() for scalefactor in the range (0.0625 ... 0.5), and the quality is nearly as good. (3) Unlike pixScaleToGray(), which does binary upscaling before downscaling for scale factors >= 0.0625, pixScaleToGrayFast() first downscales in binary for all scale factors < 0.5, and then does a 2x scale-to-gray as the final step. For scale factors < 0.0625, both do a 16x scale-to-gray, followed by further grayscale reduction.
Input: pixs (1 bpp)
scalefactor (reduction: must be > 0.0 and < 1.0)
Return: pixd (8 bpp), scaled down by scalefactor in each direction,
or NULL on error.
Notes:
This function is here mainly for pedagogical reasons. Mip-mapping is widely used in graphics for texture mapping, because the texture changes smoothly with scale. This is accomplished by constructing a multiresolution pyramid and, for each pixel, doing a linear interpolation between corresponding pixels in the two planes of the pyramid that bracket the desired resolution. The computation is very efficient, and is implemented in hardware in high-end graphics cards.
We can use mip-mapping for scale-to-gray by using two scale-to-gray reduced images (we don't need the entire pyramid) selected from the set {2x, 4x, ... 16x}, and interpolating. However, we get severe aliasing, probably because we are subsampling from the higher resolution image. The method is very fast, but the result is very poor. In fact, the results don't look any better than either subsampling off the higher-res grayscale image or oversampling on the lower-res image. Consequently, this method should NOT be used for generating reduced images, scale-to-gray or otherwise.
Input: pixs (1, 2, 4, 8, 16 and 32 bpp)
wd (target width; use 0 if using height as target)
hd (target height; use 0 if using width as target)
Return: pixd, or null on error
Notes: (1) This guarantees that the output scaled image has the dimension(s) you specify.
| LEPT_DLL PIX* pixScaleWithAlpha | ( | PIX * | pixs, |
| l_float32 | scalex, | ||
| l_float32 | scaley, | ||
| PIX * | pixg, | ||
| l_float32 | fract | ||
| ) |
Input: pixs (32 bpp rgb or cmapped)
scalex, scaley (must be > 0.0)
pixg (<optional> 8 bpp, can be null)
fract (between 0.0 and 1.0, with 0.0 fully transparent
and 1.0 fully opaque)
Return: pixd (32 bpp rgba), or null on error
Notes: (1) The alpha channel is transformed separately from pixs, and aligns with it, being fully transparent outside the boundary of the transformed pixs. For pixels that are fully transparent, a blending function like pixBlendWithGrayMask() will give zero weight to corresponding pixels in pixs. (2) Scaling is done with area mapping or linear interpolation, depending on the scale factors. Default sharpening is done. (3) If pixg is NULL, it is generated as an alpha layer that is partially opaque, using @fract. Otherwise, it is cropped to pixs if required, and @fract is ignored. The alpha channel in pixs is never used. (4) Colormaps are removed to 32 bpp. (5) The default setting for the border values in the alpha channel is 0 (transparent) for the outermost ring of pixels and (0.5 * fract * 255) for the second ring. When blended over a second image, this (a) shrinks the visible image to make a clean overlap edge with an image below, and (b) softens the edges by weakening the aliasing there. Use l_setAlphaMaskBorder() to change these values. (6) A subtle use of gamma correction is to remove gamma correction before scaling and restore it afterwards. This is done by sandwiching this function between a gamma/inverse-gamma photometric transform: pixt = pixGammaTRCWithAlpha(NULL, pixs, 1.0 / gamma, 0, 255); pixd = pixScaleWithAlpha(pixt, scalex, scaley, NULL, fract); pixGammaTRCWithAlpha(pixd, pixd, gamma, 0, 255); pixDestroy(&pixt); This has the side-effect of producing artifacts in the very dark regions.
*** Warning: implicit assumption about RGB component ordering ***
| LEPT_DLL l_int32 pixScanForEdge | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_int32 | lowthresh, | ||
| l_int32 | highthresh, | ||
| l_int32 | maxwidth, | ||
| l_int32 | factor, | ||
| l_int32 | scanflag, | ||
| l_int32 * | ploc | ||
| ) |
Input: pixs (1 bpp)
box (<optional> within which the search is conducted)
lowthresh (threshold to choose clipping location)
highthresh (threshold required to find an edge)
maxwidth (max allowed width between low and high thresh locs)
factor (sampling factor along pixel counting direction)
scanflag (direction of scan; e.g., L_FROM_LEFT)
&loc (<return> location in scan direction of first black pixel)
Return: 0 if OK; 1 on error or if the edge is not found
Notes: (1) If there are no fg pixels, the position is set to 0. Caller must check the return value! (2) Use @box == NULL to scan from edge of pixs (3) As the scan progresses, the location where the sum of pixels equals or excees @lowthresh is noted (loc). The scan is stopped when the sum of pixels equals or exceeds @highthresh. If the scan distance between loc and that point does not exceed @maxwidth, an edge is found and its position is taken to be loc. @maxwidth implicitly sets a minimum on the required gradient of the edge. (4) The thresholds must be at least 1, and the low threshold cannot be larger than the high threshold.
Input: pixs (1 bpp)
box (<optional> within which the search is conducted)
scanflag (direction of scan; e.g., L_FROM_LEFT)
&loc (location in scan direction of first black pixel)
Return: 0 if OK; 1 on error or if no fg pixels are found
Notes: (1) If there are no fg pixels, the position is set to 0. Caller must check the return value! (2) Use @box == NULL to scan from edge of pixs
| LEPT_DLL PTA* pixSearchBinaryMaze | ( | PIX * | pixs, |
| l_int32 | xi, | ||
| l_int32 | yi, | ||
| l_int32 | xf, | ||
| l_int32 | yf, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (1 bpp, maze)
xi, yi (beginning point; use same initial point
that was used to generate the maze)
xf, yf (end point, or close to it)
&ppixd (<optional return> maze with path illustrated, or
if no path possible, the part of the maze
that was searched)
Return: pta (shortest path), or null if either no path
exists or on error
Notes: (1) Because of the overhead in calling pixGetPixel() and pixSetPixel(), we have used raster line pointers and the GET_DATA* and SET_DATA* macros for many of the pix accesses. (2) Commentary: The goal is to find the shortest path between beginning and end points, without going through walls, and there are many ways to solve this problem. We use a queue to implement a breadth-first search. Two auxiliary "image" data structures can be used: one to mark the visited pixels and one to give the direction to the parent for each visited pixel. The first structure is used to avoid putting pixels on the queue more than once, and the second is used for retracing back to the origin, like the breadcrumbs in Hansel and Gretel. Each pixel taken off the queue is destroyed after it is used to locate the allowed neighbors. In fact, only one distance image is required, if you initialize it to some value that signifies "not yet visited." (We use a binary image for marking visited pixels because it is clearer.) This method for a simple search of a binary maze is implemented in pixSearchBinaryMaze(). An alternative method would store the (manhattan) distance from the start point with each pixel on the queue. The children of each pixel get a distance one larger than the parent. These values can be stored in an auxiliary distance map image that is constructed simultaneously with the search. Once the end point is reached, the distance map is used to backtrack along a minimum path. There may be several equal length minimum paths, any one of which can be chosen this way.
| LEPT_DLL PTA* pixSearchGrayMaze | ( | PIX * | pixs, |
| l_int32 | xi, | ||
| l_int32 | yi, | ||
| l_int32 | xf, | ||
| l_int32 | yf, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (1 bpp, maze)
xi, yi (beginning point; use same initial point
that was used to generate the maze)
xf, yf (end point, or close to it)
&ppixd (<optional return> maze with path illustrated, or
if no path possible, the part of the maze
that was searched)
Return: pta (shortest path), or null if either no path
exists or on error
Commentary: Consider first a slight generalization of the binary maze search problem. Suppose that you can go through walls, but the cost is higher (say, an increment of 3 to go into a wall pixel rather than 1)? You're still trying to find the shortest path. One way to do this is with an ordered queue, and a simple way to visualize an ordered queue is as a set of stacks, each stack being marked with the distance of each pixel in the stack from the start. We place the start pixel in stack 0, pop it, and process its 4 children. Each pixel is given a distance that is incremented from that of its parent (0 in this case), depending on if it is a wall pixel or not. That value may be recorded on a distance map, according to the algorithm below. For children of the first pixel, those not on a wall go in stack 1, and wall children go in stack 3. Stack 0 being emptied, the process then continues with pixels being popped from stack 1. Here is the algorithm for each child pixel. The pixel's distance value, were it to be placed on a stack, is compared with the value for it that is on the distance map. There are three possible cases: (1) If the pixel has not yet been registered, it is pushed on its stack and the distance is written to the map. (2) If it has previously been registered with a higher distance, the distance on the map is relaxed to that of the current pixel, which is then placed on its stack. (3) If it has previously been registered with an equal or lower value, the pixel is discarded. The pixels are popped and processed successively from stack 1, and when stack 1 is empty, popping starts on stack 2. This continues until the destination pixel is popped off a stack. The minimum path is then derived from the distance map, going back from the end point as before. This is just Dijkstra's algorithm for a directed graph; here, the underlying graph (consisting of the pixels and four edges connecting each pixel to its 4-neighbor) is a special case of a directed graph, where each edge is bi-directional. The implementation of this generalized maze search is left as an exercise to the reader.
Let's generalize a bit further. Suppose the "maze" is just a grayscale image – think of it as an elevation map. The cost of moving on this surface depends on the height, or the gradient, or whatever you want. All that is required is that the cost is specified and non-negative on each link between adjacent pixels. Now the problem becomes: find the least cost path moving on this surface between two specified end points. For example, if the cost across an edge between two pixels depends on the "gradient", you can use: cost = 1 + L_ABS(deltaV) where deltaV is the difference in value between two adjacent pixels. If the costs are all integers, we can still use an array of stacks to avoid ordering the queue (e.g., by using a heap sort.) This is a neat problem, because you don't even have to build a maze – you can can use it on any grayscale image!
Rather than using an array of stacks, a more practical approach is to implement with a priority queue, which is a queue that is sorted so that the elements with the largest (or smallest) key values always come off first. The priority queue is efficiently implemented as a heap, and this is how we do it. Suppose you run the algorithm using a priority queue, doing the bookkeeping with an auxiliary image data structure that saves the distance of each pixel put on the queue as before, according to the method described above. We implement it as a 2-way choice by initializing the distance array to a large value and putting a pixel on the queue if its distance is less than the value found on the array. When you finally pop the end pixel from the queue, you're done, and you can trace the path backward, either always going downhill or using an auxiliary image to give you the direction to go at each step. This is implemented here in searchGrayMaze().
Do we really have to use a sorted queue? Can we solve this generalized maze with an unsorted queue of pixels? (Or even an unsorted stack, doing a depth-first search (DFS)?) Consider a different algorithm for this generalized maze, where we travel again breadth first, but this time use a single, unsorted queue. An auxiliary image is used as before to store the distances and to determine if pixels get pushed on the stack or dropped. As before, we must allow pixels to be revisited, with relaxation of the distance if a shorter path arrives later. As a result, we will in general have multiple instances of the same pixel on the stack with different distances. However, because the queue is not ordered, some of these pixels will be popped when another instance with a lower distance is still on the stack. Here, we're just popping them in the order they go on, rather than setting up a priority based on minimum distance. Thus, unlike the priority queue, when a pixel is popped we have to check the distance map to see if a pixel with a lower distance has been put on the queue, and, if so, we discard the pixel we just popped. So the "while" loop looks like this:
How does this loop terminate? Before, with an ordered queue, it terminates when you pop the end pixel. But with an unordered queue (or stack), the first time you hit the end pixel, the distance is not guaranteed to be correct, because the pixels along the shortest path may not have yet been visited and relaxed. Because the shortest path can theoretically go anywhere, we must keep going. How do we know when to stop? Dijkstra uses an ordered queue to systematically remove nodes from further consideration. (Each time a pixel is popped, we're done with it; it's "finalized" in the Dijkstra sense because we know the shortest path to it.) However, with an unordered queue, the brute force answer is: stop when the queue (or stack) is empty, because then every pixel in the image has been assigned its minimum "distance" from the start pixel.
This is similar to the situation when you use a stack for the simpler uniform-step problem: with breadth-first search (BFS) the pixels on the queue are automatically ordered, so you are done when you locate the end pixel as a neighbor of a popped pixel; whereas depth-first search (DFS), using a stack, requires, in general, a search of every accessible pixel. Further, if a pixel is revisited with a smaller distance, that distance is recorded and the pixel is put on the stack again.
But surely, you ask, can't we stop sooner? What if the start and end pixels are very close to each other? OK, suppose they are, and you have very high walls and a long snaking level path that is actually the minimum cost. That long path can wind back and forth across the entire maze many times before ending up at the end point, which could be just over a wall from the start. With the unordered queue, you very quickly get a high distance for the end pixel, which will be relaxed to the minimum distance only after all the pixels of the path have been visited and placed on the queue, multiple times for many of them. So that's the price for not ordering the queue!
| LEPT_DLL l_int32 pixSeedfill | ( | PIX * | pixs, |
| L_STACK * | stack, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | connectivity | ||
| ) |
Input: pixs (1 bpp)
stack (for holding fillsegs)
x,y (location of seed pixel)
connectivity (4 or 8)
Return: 0 if OK, 1 on error
Notes: (1) This removes the component from pixs with a fg pixel at (x,y). (2) See pixSeedfill4() and pixSeedfill8() for details.
Input: pixs (1 bpp)
stack (for holding fillsegs)
x,y (location of seed pixel)
Return: 0 if OK, 1 on error
Notes: (1) This is Paul Heckbert's stack-based 4-cc seedfill algorithm. (2) This operates on the input 1 bpp pix to remove the fg seed pixel, at (x,y), and all pixels that are 4-connected to it. The seed pixel at (x,y) must initially be ON. (3) Reference: see pixSeedFill4BB()
Input: pixs (1 bpp)
stack (for holding fillsegs)
x,y (location of seed pixel)
Return: box or null on error.
Notes: (1) This is Paul Heckbert's stack-based 4-cc seedfill algorithm. (2) This operates on the input 1 bpp pix to remove the fg seed pixel, at (x,y), and all pixels that are 4-connected to it. The seed pixel at (x,y) must initially be ON. (3) Returns the bounding box of the erased 4-cc component. (4) Reference: see Paul Heckbert's stack-based seed fill algorithm in "Graphic Gems", ed. Andrew Glassner, Academic Press, 1990. The algorithm description is given on pp. 275-277; working C code is on pp. 721-722.) The code here follows Heckbert's exactly, except we use function calls instead of macros for pushing data on and popping data off the stack. This makes sense to do because Heckbert's fixed-size stack with macros is dangerous: images exist that will overrun the stack and crash. The stack utility here grows dynamically as needed, and the fillseg structures that are not in use are stored in another stack for reuse. It should be noted that the overhead in the function calls (vs. macros) is negligible.
Input: pixs (1 bpp)
stack (for holding fillsegs)
x,y (location of seed pixel)
Return: 0 if OK, 1 on error
Notes: (1) This is Paul Heckbert's stack-based 8-cc seedfill algorithm. (2) This operates on the input 1 bpp pix to remove the fg seed pixel, at (x,y), and all pixels that are 8-connected to it. The seed pixel at (x,y) must initially be ON. (3) Reference: see pixSeedFill8BB()
Input: pixs (1 bpp)
stack (for holding fillsegs)
x,y (location of seed pixel)
Return: box or null on error.
Notes: (1) This is Paul Heckbert's stack-based 8-cc seedfill algorithm. (2) This operates on the input 1 bpp pix to remove the fg seed pixel, at (x,y), and all pixels that are 8-connected to it. The seed pixel at (x,y) must initially be ON. (3) Returns the bounding box of the erased 8-cc component. (4) Reference: see Paul Heckbert's stack-based seed fill algorithm in "Graphic Gems", ed. Andrew Glassner, Academic Press, 1990. The algorithm description is given on pp. 275-277; working C code is on pp. 721-722.) The code here follows Heckbert's closely, except the leak checks are changed for 8 connectivity. See comments on pixSeedfill4BB() for more details.
| LEPT_DLL BOX* pixSeedfillBB | ( | PIX * | pixs, |
| L_STACK * | stack, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | connectivity | ||
| ) |
Input: pixs (1 bpp)
stack (for holding fillsegs)
x,y (location of seed pixel)
connectivity (4 or 8)
Return: box or null on error
Notes: (1) This is the high-level interface to Paul Heckbert's stack-based seedfill algorithm.
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs; 1 bpp)
pixs (1 bpp seed)
pixm (1 bpp filling mask)
connectivity (4 or 8)
Return: pixd always
Notes: (1) This is for binary seedfill (aka "binary reconstruction"). (2) There are 3 cases: (a) pixd == null (make a new pixd) (b) pixd == pixs (in-place) (c) pixd != pixs (3) If you know the case, use these patterns for clarity: (a) pixd = pixSeedfillBinary(NULL, pixs, ...); (b) pixSeedfillBinary(pixs, pixs, ...); (c) pixSeedfillBinary(pixd, pixs, ...); (4) The resulting pixd contains the filled seed. For some applications you want to OR it with the inverse of the filling mask. (5) The input seed and mask images can be different sizes, but in typical use the difference, if any, would be only a few pixels in each direction. If the sizes differ, the clipping is handled by the low-level function seedfillBinaryLow().
| LEPT_DLL PIX* pixSeedfillBinaryRestricted | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| PIX * | pixm, | ||
| l_int32 | connectivity, | ||
| l_int32 | xmax, | ||
| l_int32 | ymax | ||
| ) |
Input: pixd (<optional>; this can be null, equal to pixs,
or different from pixs; 1 bpp)
pixs (1 bpp seed)
pixm (1 bpp filling mask)
connectivity (4 or 8)
xmax (max distance in x direction of fill into the mask)
ymax (max distance in y direction of fill into the mask)
Return: pixd always
Notes: (1) See usage for pixSeedfillBinary(), which has unrestricted fill. In pixSeedfillBinary(), the filling distance is unrestricted and can be larger than pixs, depending on the topology of th mask. (2) There are occasions where it is useful not to permit the fill to go more than a certain distance into the mask. @xmax specifies the maximum horizontal distance allowed in the fill; @ymax does likewise in the vertical direction. (3) Operationally, the max "distance" allowed for the fill is a linear distance from the original seed, independent of the actual mask topology. (4) Another formulation of this problem, not implemented, would use the manhattan distance from the seed, as determined by a breadth-first search starting at the seed boundaries and working outward where the mask fg allows. How this might use the constraints of separate xmax and ymax is not clear.
Input: pixs (8 bpp seed; filled in place)
pixm (8 bpp filling mask)
connectivity (4 or 8)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place filling operation on the seed, pixs, where the clipping mask is always above or at the level of the seed as it is filled. (2) For details of the operation, see the description in seedfillGrayLow() and the code there. (3) As an example of use, see the description in pixHDome(). There, the seed is an image where each pixel is a fixed amount smaller than the corresponding mask pixel. (4) Reference paper : L. Vincent, Morphological grayscale reconstruction in image analysis: applications and efficient algorithms, IEEE Transactions on Image Processing, vol. 2, no. 2, pp. 176-201, 1993.
Input: pixb (binary mask giving seed locations)
pixm (8 bpp basin-type filling mask)
delta (amount of seed value above mask)
connectivity (4 or 8)
Return: pixd (filled seed) if OK, null on error
Notes: (1) This fills from a seed within basins defined by a filling mask. The seed value(s) are greater than the corresponding filling mask value, and the result has the bottoms of the basins raised by the initial seed value. (2) The seed has value 255 except where pixb has fg (1), which are the seed 'locations'. At the seed locations, the seed value is the corresponding value of the mask pixel in pixm plus @delta. If @delta == 0, we return a copy of pixm. (3) The actual filling is done using the standard grayscale filling operation on the inverse of the mask and using the inverse of the seed image. After filling, we return the inverse of the filled seed. (4) As an example of use: pixm can describe a grayscale image of text, where the (dark) text pixels are basins of low values; pixb can identify the local minima in pixm (say, at the bottom of the basins); and delta is the amount that we wish to raise (lighten) the basins. We construct the seed (a.k.a marker) image from pixb, pixm and @delta.
Input: pixs (8 bpp seed; filled in place)
pixm (8 bpp filling mask)
connectivity (4 or 8)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place filling operation on the seed, pixs, where the clipping mask is always below or at the level of the seed as it is filled. Think of filling up a basin to a particular level, given by the maximum seed value in the basin. Outside the filled region, the mask is above the filling level. (2) Contrast this with pixSeedfillGray(), where the clipping mask is always above or at the level of the fill. An example of its use is the hdome fill, where the seed is an image where each pixel is a fixed amount smaller than the corresponding mask pixel. (3) The basin fill, pixSeedfillGrayBasin(), is a special case where the seed pixel values are generated from the mask, and where the implementation uses pixSeedfillGray() by inverting both the seed and mask.
Input: pixs (8 bpp seed; filled in place)
pixm (8 bpp filling mask)
connectivity (4 or 8)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place filling operation on the seed, pixs, where the clipping mask is always below or at the level of the seed as it is filled. Think of filling up a basin to a particular level, given by the maximum seed value in the basin. Outside the filled region, the mask is above the filling level. (2) Contrast this with pixSeedfillGraySimple(), where the clipping mask is always above or at the level of the fill. An example of its use is the hdome fill, where the seed is an image where each pixel is a fixed amount smaller than the corresponding mask pixel.
Input: pixs (8 bpp seed; filled in place)
pixm (8 bpp filling mask)
connectivity (4 or 8)
Return: 0 if OK, 1 on error
Notes: (1) This is an in-place filling operation on the seed, pixs, where the clipping mask is always above or at the level of the seed as it is filled. (2) For details of the operation, see the description in seedfillGrayLowSimple() and the code there. (3) As an example of use, see the description in pixHDome(). There, the seed is an image where each pixel is a fixed amount smaller than the corresponding mask pixel. (4) Reference paper : L. Vincent, Morphological grayscale reconstruction in image analysis: applications and efficient algorithms, IEEE Transactions on Image Processing, vol. 2, no. 2, pp. 176-201, 1993.
Input: pixs (seed)
pixm (mask)
maxiters (use 0 to go to completion)
connectivity (4 or 8)
Return: pixd (after filling into the mask) or null on error
Notes: (1) This is in general a very inefficient method for filling from a seed into a mask. Use it for a small number of iterations, but if you expect more than a few iterations, use pixSeedfillBinary(). (2) We use a 3x3 brick SEL for 8-cc filling and a 3x3 plus SEL for 4-cc.
Input: pixs (8 bpp source)
connectivity (4 or 8)
Return: pixd, or null on error
Notes: (1) The raster/anti-raster method for implementing this filling operation was suggested by Ray Smith. (2) This takes an arbitrary set of nonzero pixels in pixs, which can be sparse, and spreads (extrapolates) the values to fill all the pixels in pixd with the nonzero value it is closest to in pixs. This is similar (though not completely equivalent) to doing a Voronoi tiling of the image, with a tile surrounding each pixel that has a nonzero value. All pixels within a tile are then closer to its "central" pixel than to any others. Then assign the value of the "central" pixel to each pixel in the tile. (3) This is implemented by computing a distance function in parallel with the fill. The distance function uses free boundary conditions (assumed maxval outside), and it controls the propagation of the pixels in pixd away from the nonzero (seed) values. This is done in 2 traversals (raster/antiraster). In the raster direction, whenever the distance function is nonzero, the spread pixel takes on the value of its predecessor that has the minimum distance value. In the antiraster direction, whenever the distance function is nonzero and its value is replaced by a smaller value, the spread pixel takes the value of the predecessor with the minimum distance value. (4) At boundaries where a pixel is equidistant from two nearest nonzero (seed) pixels, the decision of which value to use is arbitrary (greedy in search for minimum distance). This can give rise to strange-looking results, particularly for 4-connectivity where the L1 distance is computed from steps in N,S,E and W directions (no diagonals).
| LEPT_DLL PIX* pixSelectByAreaFraction | ( | PIX * | pixs, |
| l_float32 | thresh, | ||
| l_int32 | connectivity, | ||
| l_int32 | type, | ||
| l_int32 * | pchanged | ||
| ) |
Input: pixs (1 bpp)
thresh (threshold ratio of fg pixels to (w * h))
connectivity (4 or 8)
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixd, or null on error
Notes: (1) The args specify constraints on the amount of foreground coverage of the components that are kept. (2) If unchanged, returns a copy of pixs. Otherwise, returns a new pix with the filtered components. (3) This filters components based on the fraction of fg pixels of the component in its bounding box. (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save components with less than the threshold fraction of foreground, and L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
| LEPT_DLL PIX* pixSelectByPerimSizeRatio | ( | PIX * | pixs, |
| l_float32 | thresh, | ||
| l_int32 | connectivity, | ||
| l_int32 | type, | ||
| l_int32 * | pchanged | ||
| ) |
Input: pixs (1 bpp)
thresh (threshold ratio of fg boundary to fg pixels)
connectivity (4 or 8)
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixd, or null on error
Notes: (1) The args specify constraints on the size of the components that are kept. (2) If unchanged, returns a copy of pixs. Otherwise, returns a new pix with the filtered components. (3) This filters components with smooth vs. dendritic shape, using the ratio of the fg boundary pixels to the circumference of the bounding box, and comparing it to a threshold value. (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save the smooth boundary components, and L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
| LEPT_DLL PIX* pixSelectByPerimToAreaRatio | ( | PIX * | pixs, |
| l_float32 | thresh, | ||
| l_int32 | connectivity, | ||
| l_int32 | type, | ||
| l_int32 * | pchanged | ||
| ) |
Input: pixs (1 bpp)
thresh (threshold ratio of fg boundary to fg pixels)
connectivity (4 or 8)
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixd, or null on error
Notes: (1) The args specify constraints on the size of the components that are kept. (2) If unchanged, returns a copy of pixs. Otherwise, returns a new pix with the filtered components. (3) This filters "thick" components, where a thick component is defined to have a ratio of boundary to interior pixels that is smaller than a given threshold value. (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save the thicker components, and L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
| LEPT_DLL PIX* pixSelectBySize | ( | PIX * | pixs, |
| l_int32 | width, | ||
| l_int32 | height, | ||
| l_int32 | connectivity, | ||
| l_int32 | type, | ||
| l_int32 | relation, | ||
| l_int32 * | pchanged | ||
| ) |
Input: pixs (1 bpp)
width, height (threshold dimensions)
connectivity (4 or 8)
type (L_SELECT_WIDTH, L_SELECT_HEIGHT,
L_SELECT_IF_EITHER, L_SELECT_IF_BOTH)
relation (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 otherwise)
Return: filtered pixd, or null on error
Notes: (1) The args specify constraints on the size of the components that are kept. (2) If unchanged, returns a copy of pixs. Otherwise, returns a new pix with the filtered components. (3) If the selection type is L_SELECT_WIDTH, the input height is ignored, and v.v. (4) To keep small components, use relation = L_SELECT_IF_LT or L_SELECT_IF_LTE. To keep large components, use relation = L_SELECT_IF_GT or L_SELECT_IF_GTE.
| LEPT_DLL PIX* pixSelectByWidthHeightRatio | ( | PIX * | pixs, |
| l_float32 | thresh, | ||
| l_int32 | connectivity, | ||
| l_int32 | type, | ||
| l_int32 * | pchanged | ||
| ) |
Input: pixs (1 bpp)
thresh (threshold ratio of width/height)
connectivity (4 or 8)
type (L_SELECT_IF_LT, L_SELECT_IF_GT,
L_SELECT_IF_LTE, L_SELECT_IF_GTE)
&changed (<optional return> 1 if changed; 0 if clone returned)
Return: pixd, or null on error
Notes: (1) The args specify constraints on the width-to-height ratio for components that are kept. (2) If unchanged, returns a copy of pixs. Otherwise, returns a new pix with the filtered components. (3) This filters components based on the width-to-height ratios. (4) Use L_SELECT_IF_LT or L_SELECT_IF_LTE to save components with less than the threshold ratio, and L_SELECT_IF_GT or L_SELECT_IF_GTE to remove them.
| LEPT_DLL l_int32 pixSelectedLocalExtrema | ( | PIX * | pixs, |
| l_int32 | mindist, | ||
| PIX ** | ppixmin, | ||
| PIX ** | ppixmax | ||
| ) |
Input: pixs (8 bpp)
mindist (-1 for keeping all pixels; >= 0 specifies distance)
&ppixmin (<return> mask of local minima)
&ppixmax (<return> mask of local maxima)
Return: 0 if OK, 1 on error
Notes: (1) This selects those local 3x3 minima that are at least a specified distance from the nearest local 3x3 maxima, and v.v. for the selected set of local 3x3 maxima. The local 3x3 minima is the set of pixels whose value equals the value after a 3x3 brick erosion, and the local 3x3 maxima is the set of pixels whose value equals the value after a 3x3 brick dilation. (2) mindist is the minimum distance allowed between local 3x3 minima and local 3x3 maxima, in an 8-connected sense. mindist == 1 keeps all pixels found in step 1. mindist == 0 removes all pixels from each mask that are both a local 3x3 minimum and a local 3x3 maximum. mindist == 1 removes any local 3x3 minimum pixel that touches a local 3x3 maximum pixel, and likewise for the local maxima. To make the decision, visualize each local 3x3 minimum pixel as being surrounded by a square of size (2 * mindist + 1) on each side, such that no local 3x3 maximum pixel is within that square; and v.v. (3) The generated masks can be used as markers for further operations.
| LEPT_DLL PIX* pixSelectiveConnCompFill | ( | PIX * | pixs, |
| l_int32 | connectivity, | ||
| l_int32 | minw, | ||
| l_int32 | minh | ||
| ) |
Input: pixs (binary)
connectivity (4 or 8)
minw (minimum width to consider; use 0 or 1 for any width)
minh (minimum height to consider; use 0 or 1 for any height)
Return: pix (with holes filled in selected c.c.), or null on error
| LEPT_DLL BOX* pixSelectLargeULComp | ( | PIX * | pixs, |
| l_float32 | areaslop, | ||
| l_int32 | yslop, | ||
| l_int32 | connectivity | ||
| ) |
Input: pixs (1 bpp)
areaslop (fraction near but less than 1.0)
yslop (number of pixels in y direction)
connectivity (4 or 8)
Return: box, or null on error
Notes: (1) This selects a box near the top (first) and left (second) of the image, from the set of all boxes that have area >= @areaslop * (area of biggest box), where @areaslop is some fraction; say ~ 0.9. (2) For all boxes satisfying the above condition, select the left-most box that is within @yslop (say, 20) pixels of the box nearest the top. (3) This can be used to reliably select a specific one of the largest regions in an image, for applications where there are expected to be small variations in region size and location. (4) See boxSelectLargeULBox() for implementation details.
Input: pixs (8 bpp)
pixm (1 bpp)
&pta (<return> pta of min pixel locations)
&nav (<optional return> numa of minima values)
Return: 0 if OK, 1 on error.
Notes: (1) For each 8 connected component in pixm, this finds a pixel in pixs that has the lowest value, and saves it in a Pta. If several pixels in pixs have the same minimum value, it picks the first one found. (2) For a mask pixm of true local minima, all pixels in each connected component have the same value in pixs, so it is fastest to select one of them using a special seedfill operation. Not yet implemented.
Input: pixs (all depths, colormap OK)
&data (<return> serialized data in memory)
&nbytes (<return> number of bytes in data string)
Return: 0 if OK, 1 on error
Notes: (1) This does a fast serialization of the principal elements of the pix, as follows: "spix" (4 bytes) – ID for file type w (4 bytes) h (4 bytes) d (4 bytes) wpl (4 bytes) ncolors (4 bytes) – in colormap; 0 if there is no colormap cdata (4 * ncolors) – size of serialized colormap array rdatasize (4 bytes) – size of serialized raster data = 4 * wpl * h rdata (rdatasize)
Input: pix (all depths; use cmapped with caution) Return: 0 if OK, 1 on error
Notes: (1) Sets all data to 1. For 1 bpp, this is black; for grayscale or color, this is white. (2) Caution: for colormapped pix, this sets the pixel value to the maximum value supported by the colormap: 2^d - 1. However, this color may not be defined, because the colormap may not be full.
Input: pix (all depths; use cmapped with caution)
val (value to set all pixels)
Return: 0 if OK; 1 on error
Notes: (1) Caution! For colormapped pix, @val is used as an index into a colormap. Be sure that index refers to the intended color. If the color is not in the colormap, you should first add it and then call this function.
Input: pix (all depths, cmap ok)
grayval (in range 0 ... 255)
Return: 0 if OK; 1 on error
Notes: (1) N.B. For all images, @grayval == 0 represents black and @grayval == 255 represents white. (2) For depth < 8, we do our best to approximate the gray level. For 1 bpp images, any @grayval < 128 is black; >= 128 is white. For 32 bpp images, each r,g,b component is set to @grayval, and the alpha component is preserved. (3) If pix is colormapped, it adds the gray value, replicated in all components, to the colormap if it's not there and there is room. If the colormap is full, it finds the closest color in L2 distance of components. This index is written to all pixels.
Input: pixs (colormapped or 32 bpp rgb; no alpha)
Return: pixd (new pix with meaningful alpha component),
or null on error
Notes: (1) The generated alpha component is transparent over white (background) pixels in pixs, and quickly grades to opaque away from the transparent parts. This is a cheap and dirty alpha generator. The 2 pixel gradation is useful to blur the boundary between the transparent region (that will render entirely from a backing image) and the remainder which renders from pixs. (2) All alpha component bits in pixs are overwritten.
Input: pixs (all depths; cmap ok)
op (L_SET_BLACK, L_SET_WHITE)
Return: 0 if OK; 1 on error
Notes: (1) Function for setting all pixels in an image to either black or white. (2) If pixs is colormapped, it adds black or white to the colormap if it's not there and there is room. If the colormap is full, it finds the closest color in intensity. This index is written to all pixels.
Input: pixs (any depth, can be cmapped)
boxa (<optional> of boxes, to clear or set)
op (L_SET_BLACK, L_SET_WHITE)
Return: pixd (with boxes filled with white or black), or null on error
Input: pixs (any depth; cmap OK)
dist (distance from outside; must be > 0; first ring is 1)
val (value to set at each border pixel)
Return: 0 if OK; 1 on error
Notes: (1) The rings are single-pixel-wide rectangular sets of pixels at a given distance from the edge of the pix. This sets all pixels in a given ring to a value.
| LEPT_DLL l_int32 pixSetBorderVal | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot, | ||
| l_uint32 | val | ||
| ) |
Input: pixs (8, 16 or 32 bpp)
left, right, top, bot (amount to set)
val (value to set at each border pixel)
Return: 0 if OK; 1 on error
Notes: (1) The border region is defined to be the region in the image within a specific distance of each edge. Here, we allow the pixels within a specified distance of each edge to be set independently. This sets the pixels in the border region to the given input value. (2) For efficiency, use pixSetOrClearBorder() if you're setting the border to either black or white. (3) If d != 32, the input value should be masked off to the appropriate number of least significant bits. (4) The code is easily generalized for 2 or 4 bpp.
Input: pix
sampling (1 for subsampling; 0 for no subsampling)
Return: 0 if OK, 1 on error
Notes: (1) The default is for 2x2 chroma subsampling because the files are considerably smaller and the appearance is typically satisfactory. To get full resolution output in the chroma channels for jpeg writing, call this with @sampling == 0.
Input: pix
colormap (to be assigned)
Return: 0 if OK, 1 on error.
Notes: (1) Unlike with the pix data field, pixSetColormap() destroys any existing colormap before assigning the new one. Because colormaps are not ref counted, it is important that the new colormap does not belong to any other pix.
Input: pix (32 bpp)
comp (COLOR_RED, COLOR_GREEN, COLOR_BLUE, L_ALPHA_CHANNEL)
val (value to set this component)
Return: 0 if OK; 1 on error
Notes: (1) For example, this can be used to set the alpha component to opaque: pixSetComponentArbitrary(pix, L_ALPHA_CHANNEL, 255)
Notes: (1) This does not free any existing data. To free existing data, use pixFreeData() before pixSetData().
Input: pix
w, h, d (use 0 to skip the setting for any of these)
Return: 0 if OK, 1 on error
Input: pix (all depths, can be cmapped)
box (in which all pixels will be set)
Return: 0 if OK, 1 on error
Notes: (1) Sets all data in rect to 1. For 1 bpp, this is black; for grayscale or color, this is white. (2) Caution: for colormapped pix, this sets the pixel value to the maximum value supported by the colormap: 2^d - 1. However, this color may not be defined, because the colormap may not be full.
Input: pix (all depths; can be cmapped)
box (in which all pixels will be set to val)
val (value to set all pixels)
Return: 0 if OK; 1 on error
Notes: (1) For colormapped pix, be sure the value is the intended one in the colormap. (2) Caution: for colormapped pix, this sets each pixel in the rect to the color at the index equal to val. Be sure that this index exists in the colormap and that it is the intended one!
Input: pixs1 (8 bpp)
pixs2 (8 bpp)
mindiff (minimum difference to accept as valid)
Return: 0 if OK; 1 if no pixel diffs are large enough, or on error
Notes: (1) This compares corresponding pixels in pixs1 and pixs2. When they differ by less than @mindiff, set the pixel values to 0 in each. Each pixel typically represents a tile in a larger image, and a very small difference between the min and max in the tile indicates that the min and max values are not to be trusted. (2) If contrast (pixel difference) detection is expected to fail, caller should check return value.
Input: pixd (1, 2, 4, 8, 16 or 32 bpp; or colormapped)
pixm (<optional> 1 bpp mask; no operation if NULL)
val (value to set at each masked pixel)
Return: 0 if OK; 1 on error
Notes: (1) In-place operation. (2) NOTE: For cmapped images, this calls pixSetMaskedCmap(). @val must be the 32-bit color representation of the RGB pixel. It is not the index into the colormap! (2) If pixm == NULL, a warning is given. (3) This is an implicitly aligned operation, where the UL corners of pixd and pixm coincide. A warning is issued if the two image sizes differ significantly, but the operation proceeds. (4) Each pixel in pixd that co-locates with an ON pixel in pixm is set to the specified input value. Other pixels in pixd are not changed. (5) You can visualize this as painting the color through the mask, as a stencil. (6) If you do not want to have the UL corners aligned, use the function pixSetMaskedGeneral(), which requires you to input the UL corner of pixm relative to pixd. (7) Implementation details: see comments in pixPaintThroughMask() for when we use rasterop to do the painting.
| LEPT_DLL l_int32 pixSetMaskedCmap | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (2, 4 or 8 bpp, colormapped)
pixm (<optional> 1 bpp mask; no-op if NULL)
x, y (origin of pixm relative to pixs; can be negative)
rval, gval, bval (new color to set at each masked pixel)
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place operation. (2) It paints a single color through the mask (as a stencil). (3) The mask origin is placed at (x,y) on pixs, and the operation is clipped to the intersection of the mask and pixs. (4) If pixm == NULL, a warning is given. (5) Typically, pixm is a small binary mask located somewhere on the larger pixs. (6) If the color is in the colormap, it is used. Otherwise, it is added if possible; an error is returned if the colormap is already full.
Input: pixd (8, 16 or 32 bpp)
pixm (<optional> 1 bpp mask; no operation if null)
val (value to set at each masked pixel)
x, y (location of UL corner of pixm relative to pixd;
can be negative)
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place operation. (2) Alignment is explicit. If you want the UL corners of the two images to be aligned, use pixSetMasked(). (3) A typical use would be painting through the foreground of a small binary mask pixm, located somewhere on a larger pixd. Other pixels in pixd are not changed. (4) You can visualize this as painting the color through the mask, as a stencil. (5) This uses rasterop to handle clipping and different depths of pixd. (6) If pixd has a colormap, you should call pixPaintThroughMask(). (7) Why is this function here, if pixPaintThroughMask() does the same thing, and does it more generally? I've retained it here to show how one can paint through a mask using only full image rasterops, rather than pixel peeking in pixm and poking in pixd. It's somewhat baroque, but I found it amusing.
| LEPT_DLL l_int32 pixSetMirroredBorder | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot | ||
| ) |
Input: pixs (all depths; colormap ok)
left, right, top, bot (number of pixels to set)
Return: 0 if OK, 1 on error
Notes: (1) This applies what is effectively mirror boundary conditions to a border region in the image. It is in-place. (2) This is useful for setting pixels near the border to a value representative of the near pixels to the interior. (3) The general pixRasterop() is used for an in-place operation here because there is no overlap between the src and dest rectangles.
| LEPT_DLL l_int32 pixSetOrClearBorder | ( | PIX * | pixs, |
| l_int32 | left, | ||
| l_int32 | right, | ||
| l_int32 | top, | ||
| l_int32 | bot, | ||
| l_int32 | op | ||
| ) |
Input: pixs (all depths)
left, right, top, bot (amount to set or clear)
operation (PIX_SET or PIX_CLR)
Return: 0 if OK; 1 on error
Notes: (1) The border region is defined to be the region in the image within a specific distance of each edge. Here, we allow the pixels within a specified distance of each edge to be set independently. This either sets or clears all pixels in the border region. (2) For binary images, use PIX_SET for black and PIX_CLR for white. (3) For grayscale or color images, use PIX_SET for white and PIX_CLR for black.
Input: pix (1, 2, 4, 8, 16, 32 bpp)
val (0 or 1)
Return: 0 if OK; 1 on error
Notes: (1) The pad bits are the bits that expand each scanline to a multiple of 32 bits. They are usually not used in image processing operations. When boundary conditions are important, as in seedfill, they must be set properly. (2) This sets the value of the pad bits (if any) in the last 32-bit word in each scanline. (3) For 32 bpp pix, there are no pad bits, so this is a no-op.
Input: pix (1, 2, 4, 8, 16, 32 bpp)
by (starting y value of band)
bh (height of band)
val (0 or 1)
Return: 0 if OK; 1 on error
Notes: (1) The pad bits are the bits that expand each scanline to a multiple of 32 bits. They are usually not used in image processing operations. When boundary conditions are important, as in seedfill, they must be set properly. (2) This sets the value of the pad bits (if any) in the last 32-bit word in each scanline, within the specified band of raster lines. (3) For 32 bpp pix, there are no pad bits, so this is a no-op.
Input: pix
(x,y) pixel coords
val (value to be inserted)
Return: 0 if OK; 1 on error
Notes: (1) Warning: the input value is not checked for overflow with respect the the depth of @pix, and the sign bit (if any) is ignored.
Input: pix (8 bpp; not cmapped)
col (column index)
colvect (vector of floats)
Return: 0 if OK, 1 on error
Input: pix
xres, yres (use 0 to skip the setting for either of these)
Return: 0 if OK, 1 on error
Input: pixd (32 bpp)
pixs (8 bpp)
comp (one of the set: {COLOR_RED, COLOR_GREEN,
COLOR_BLUE, L_ALPHA_CHANNEL})
Return: 0 if OK; 1 on error
Notes: (1) This places the 8 bpp pixel in pixs into the specified component (properly interleaved) in pixd, (2) The two images are registered to the UL corner; the sizes need not be the same, but a warning is issued if they differ.
| LEPT_DLL l_int32 pixSetRGBPixel | ( | PIX * | pix, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pix (32 bpp rgb)
(x,y) pixel coords
rval (red component)
gval (green component)
bval (blue component)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixSetSelectCmap | ( | PIX * | pixs, |
| BOX * | box, | ||
| l_int32 | sindex, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (1, 2, 4 or 8 bpp, with colormap)
box (<optional> region to set color; can be NULL)
sindex (colormap index of pixels to be changed)
rval, gval, bval (new color to paint)
Return: 0 if OK, 1 on error
Note: (1) This is an in-place operation. (2) It sets all pixels in region that have the color specified by the colormap index 'sindex' to the new color. (3) sindex must be in the existing colormap; otherwise an error is returned. (4) If the new color exists in the colormap, it is used; otherwise, it is added to the colormap. If it cannot be added because the colormap is full, an error is returned. (5) If box is NULL, applies function to the entire image; otherwise, clips the operation to the intersection of the box and pix. (6) An example of use would be to set to a specific color all the light (background) pixels within a certain region of a 3-level 2 bpp image, while leaving light pixels outside this region unchanged.
| LEPT_DLL l_int32 pixSetSelectMaskedCmap | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 | sindex, | ||
| l_int32 | rval, | ||
| l_int32 | gval, | ||
| l_int32 | bval | ||
| ) |
Input: pixs (2, 4 or 8 bpp, with colormap)
pixm (<optional> 1 bpp mask; no-op if NULL)
x, y (UL corner of mask relative to pixs)
sindex (colormap index of pixels in pixs to be changed)
rval, gval, bval (new color to substitute)
Return: 0 if OK, 1 on error
Note: (1) This is an in-place operation. (2) This paints through the fg of pixm and replaces all pixels in pixs that have a particular value (sindex) with the new color. (3) If pixm == NULL, a warning is given. (4) sindex must be in the existing colormap; otherwise an error is returned. (5) If the new color exists in the colormap, it is used; otherwise, it is added to the colormap. If the colormap is full, an error is returned.
Input: pix
textstring (can be null)
Return: 0 if OK, 1 on error
Notes: (1) This removes any existing textstring and puts a copy of the input textstring there.
| LEPT_DLL l_int32 pixSetTextblock | ( | PIX * | pixs, |
| L_BMF * | bmf, | ||
| const char * | textstr, | ||
| l_uint32 | val, | ||
| l_int32 | x0, | ||
| l_int32 | y0, | ||
| l_int32 | wtext, | ||
| l_int32 | firstindent, | ||
| l_int32 * | poverflow | ||
| ) |
Input: pixs (input image)
bmf (bitmap font data)
textstr (block text string to be set)
val (color to set the text)
x0 (left edge for each line of text)
y0 (baseline location for the first text line)
wtext (max width of each line of generated text)
firstindent (indentation of first line, in x-widths)
&overflow (<optional return> 0 if text is contained in
input pix; 1 if it is clipped)
Return: 0 if OK, 1 on error
Notes: (1) This function paints a set of lines of text over an image. (2) @val is the pixel value to be painted through the font mask. It should be chosen to agree with the depth of pixs. If it is out of bounds, an intermediate value is chosen. For RGB, use hex notation: 0xRRGGBB00, where RR is the hex representation of the red intensity, etc. The last two hex digits are 00 (byte value 0), assigned to the A component. Note that, as usual, RGBA proceeds from left to right in the order from MSB to LSB (see pix.h for details). (3) If there is a colormap, this does the best it can to use the requested color, or something similar to it.
| LEPT_DLL l_int32 pixSetTextline | ( | PIX * | pixs, |
| L_BMF * | bmf, | ||
| const char * | textstr, | ||
| l_uint32 | val, | ||
| l_int32 | x0, | ||
| l_int32 | y0, | ||
| l_int32 * | pwidth, | ||
| l_int32 * | poverflow | ||
| ) |
Input: pixs (input image)
bmf (bitmap font data)
textstr (text string to be set on the line)
val (color to set the text)
x0 (left edge for first char)
y0 (baseline location for all text on line)
&width (<optional return> width of generated text)
&overflow (<optional return> 0 if text is contained in
input pix; 1 if it is clipped)
Return: 0 if OK, 1 on error
Notes: (1) This function paints a line of text over an image. (2) @val is the pixel value to be painted through the font mask. It should be chosen to agree with the depth of pixs. If it is out of bounds, an intermediate value is chosen. For RGB, use hex notation: 0xRRGGBB00, where RR is the hex representation of the red intensity, etc. The last two hex digits are 00 (byte value 0), assigned to the A component. Note that, as usual, RGBA proceeds from left to right in the order from MSB to LSB (see pix.h for details). (3) If there is a colormap, this does the best it can to use the requested color, or something similar to it.
Input: pixs (32 bpp rgba)
val (32 bit unsigned color to use where alpha == 0)
debug (displays layers of pixs)
Return: pixd (32 bpp rgba), or null on error
Notes: (1) This sets the r, g and b components under every fully transparent alpha component to @val. The alpha components are unchanged. (2) Full transparency is denoted by alpha == 0. Setting all pixels to a constant @val where alpha is transparent can improve compressibility by reducing the entropy. (3) The visual result depends on how the image is displayed. (a) For display devices that respect the use of the alpha layer, this will not affect the appearance. (b) For typical leptonica operations, alpha is ignored, so there will be a change in appearance because this resets the rgb values in the fully transparent region. (4) pixRead() and pixWrite() will, by default, read and write 4-component (rgba) pix in png format. To ignore the alpha component after reading, or omit it on writing, pixSetSpp(..., 3). (5) Here are some examples:
Input: pix (8 bpp, no colormap)
&w (<optional return> width)
&h (<optional return> height)
Return: line ptr array, or null on error
Notes: (1) This is a simple helper for processing 8 bpp images with direct byte access. It can swap byte order within each word. (2) After processing, you must call pixCleanupByteProcessing(), which frees the lineptr array and restores byte order. (3) Usage: l_uint8 **lineptrs = pixSetupByteProcessing(pix, &w, &h); for (i = 0; i < h; i++) { l_uint8 *line = lineptrs[i]; for (j = 0; j < w; j++) { val = line[j]; ... } } pixCleanupByteProcessing(pix, lineptrs);
Input: pix
compval (zlib compression value)
Return: 0 if OK, 1 on error
Notes: (1) Valid zlib compression values are in the interval [0 ... 9], where, as defined in zlib.h: 0 Z_NO_COMPRESSION 1 Z_BEST_SPEED (poorest compression) 9 Z_BEST_COMPRESSION For the default value, use either of these: 6 Z_DEFAULT_COMPRESSION -1 (resolves to Z_DEFAULT_COMPRESSION) (2) If you use the defined constants in zlib.h instead of the compression integers given above, you must include zlib.h.
| LEPT_DLL l_int32 pixShiftAndTransferAlpha | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_float32 | shiftx, | ||
| l_float32 | shifty | ||
| ) |
Input: pixd (32 bpp)
pixs (32 bpp)
shiftx, shifty
Return: 0 if OK; 1 on error
Input: pixd (<optional>; either NULL or equal to pixs for in-place)
pixs (32 bpp rgb)
srcval (source color: 0xrrggbb00)
dstval (target color: 0xrrggbb00)
Return: pixd (with all pixels mapped based on the srcval/destval
mapping), or pixd on error
Notes: (1) For each component (r, b, g) separately, this does a linear mapping of the colors in pixs to colors in pixd. Let rs and rd be the red src and dest components in @srcval and @dstval, and rval is the red component of the src pixel. Then for all pixels in pixs, the mapping for the red component from pixs to pixd is: if (rd <= rs) (shift toward black) rval --> (rd/rs) * rval if (rd > rs) (shift toward white) (255 - rval) --> ((255 - rs)/(255 - rd)) * (255 - rval) Thus if rd <= rs, the red component of all pixels is mapped by the same fraction toward white, and if rd > rs, they are mapped by the same fraction toward black. This is essentially a different linear TRC (gamma = 1) for each component. The source and target color inputs are just used to generate the three fractions. (2) Note that this mapping differs from that in pixLinearMapToTargetColor(), which maps rs --> rd and does a piecewise stretching in between. (3) For inplace operation, call it this way: pixFractionalShiftByComponent(pixs, pixs, ... ) (4) For generating a new pixd: pixd = pixLinearMapToTargetColor(NULL, pixs, ...) (5) A simple application is to color a grayscale image. A light background can be colored using srcval = 0xffffff00 and picking a target background color for dstval. A dark foreground can be colored by using srcval = 0x0 and choosing a target foreground color for dstval.
| LEPT_DLL PIX* pixSimpleCaptcha | ( | PIX * | pixs, |
| l_int32 | border, | ||
| l_int32 | nterms, | ||
| l_uint32 | seed, | ||
| l_uint32 | color, | ||
| l_int32 | cmapflag | ||
| ) |
Input: pixs (8 bpp; no colormap)
border (added white pixels on each side)
nterms (number of x and y harmonic terms)
seed (of random number generator)
color (for colorizing; in 0xrrggbb00 format; use 0 for black)
cmapflag (1 for colormap output; 0 for rgb)
Return: pixd (8 bpp cmap or 32 bpp rgb), or null on error
Notes: (1) This uses typical default values for generating captchas. The magnitudes of the harmonic warp are typically to be smaller when more terms are used, even though the phases are random. See, for example, prog/warptest.c.
| LEPT_DLL PIX* pixSimpleColorQuantize | ( | PIX * | pixs, |
| l_int32 | sigbits, | ||
| l_int32 | factor, | ||
| l_int32 | ncolors | ||
| ) |
pixSimpleColorQuantize() Input: pixs (32 bpp rgb) sigbits (2-4, significant bits retained in the quantizer for each component of the input image) factor (subsampling factor; use 1 for no subsampling) ncolors (the number of most populated colors to select) Return: pixd (8 bpp cmapped) or NULL on error
Notes: (1) If you want to do color quantization for real, use octcube or modified median cut. This function shows that it is easy to make a simple quantizer based solely on the population in cells of a given size in rgb color space. (2) The @ncolors most populated cells at the @sigbits level form the colormap for quantizing, and this uses octcube indexing under the covers to assign each pixel to the nearest color. (3) @sigbits is restricted to 2, 3 and 4. At the low end, the color discrimination is very crude; at the upper end, a set of similar colors can dominate the result. Interesting results are generally found for @sigbits = 3 and ncolors ~ 20. (4) See also pixColorSegment() for a method of quantizing the colors to generate regions of similar color.
Input: two pix
Return: 1 if the two pix have same {h, w, d}; 0 otherwise.
Input: pixs (8 bpp grayscale; no colormap)
pixm (<optional> 1 bpp; if null, this is a no-op)
factor (subsampling factor for getting average; >= 1)
Return: 0 if OK, 1 on error
Notes: (1) The pixels in pixs corresponding to those in each 8-connected region in the mask are set to the average value. (2) This is required for adaptive mapping to avoid the generation of stripes in the background map, due to variations in the pixel values near the edges of mask regions. (3) This function is optimized for background smoothing, where there are a relatively small number of components. It will be inefficient if used where there are many small components.
| LEPT_DLL PIX* pixSnapColor | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_uint32 | srcval, | ||
| l_uint32 | dstval, | ||
| l_int32 | diff | ||
| ) |
Input: pixd (<optional>; either NULL or equal to pixs for in-place)
pixs (colormapped or 8 bpp gray or 32 bpp rgb)
srcval (color center to be selected for change: 0xrrggbb00)
dstval (target color for pixels: 0xrrggbb00)
diff (max absolute difference, applied to all components)
Return: pixd (with all pixels within diff of pixval set to pixval),
or pixd on error
Notes: (1) For inplace operation, call it this way: pixSnapColor(pixs, pixs, ... ) (2) For generating a new pixd: pixd = pixSnapColor(NULL, pixs, ...) (3) If pixs has a colormap, it is handled by pixSnapColorCmap(). (4) All pixels within 'diff' of 'srcval', componentwise, will be changed to 'dstval'.
| LEPT_DLL PIX* pixSnapColorCmap | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_uint32 | srcval, | ||
| l_uint32 | dstval, | ||
| l_int32 | diff | ||
| ) |
Input: pixd (<optional>; either NULL or equal to pixs for in-place)
pixs (colormapped)
srcval (color center to be selected for change: 0xrrggbb00)
dstval (target color for pixels: 0xrrggbb00)
diff (max absolute difference, applied to all components)
Return: pixd (with all pixels within diff of srcval set to dstval),
or pixd on error
Notes: (1) For inplace operation, call it this way: pixSnapCcmap(pixs, pixs, ... ) (2) For generating a new pixd: pixd = pixSnapCmap(NULL, pixs, ...) (3) pixs must have a colormap. (4) All colors within 'diff' of 'srcval', componentwise, will be changed to 'dstval'.
Input: pixs (8 bpp; no colormap)
orientflag (L_HORIZONTAL_EDGES, L_VERTICAL_EDGES, L_ALL_EDGES)
Return: pixd (8 bpp, edges are brighter), or null on error
Notes: (1) Invert pixd to see larger gradients as darker (grayscale). (2) To generate a binary image of the edges, threshold the result using pixThresholdToBinary(). If the high edge values are to be fg (1), invert after running pixThresholdToBinary(). (3) Label the pixels as follows: 1 4 7 2 5 8 3 6 9 Read the data incrementally across the image and unroll the loop. (4) This runs at about 45 Mpix/sec on a 3 GHz processor.
| LEPT_DLL BOXA* pixSplitComponentIntoBoxa | ( | PIX * | pix, |
| BOX * | box, | ||
| l_int32 | minsum, | ||
| l_int32 | skipdist, | ||
| l_int32 | delta, | ||
| l_int32 | maxbg, | ||
| l_int32 | maxcomps, | ||
| l_int32 | remainder | ||
| ) |
Input: pixs (1 bpp)
box (<optional> location of pixs w/rt an origin)
minsum (minimum pixels to trigger propagation)
skipdist (distance before computing sum for propagation)
delta (difference required to stop propagation)
maxbg (maximum number of allowed bg pixels in ref scan)
maxcomps (use 0 for unlimited number of subdivided components)
remainder (set to 1 to get b.b. of remaining stuff)
Return: boxa (of rectangles covering the fg of pixs), or null on error
Notes: (1) This generates a boxa of rectangles that covers the fg of a mask. It does so by a greedy partitioning of the mask, choosing the largest rectangle found from each of the four directions at each step. (2) The input parameters give some flexibility for boundary noise. The resulting set of rectangles must cover all the fg pixels and, in addition, may cover some bg pixels. Using small input parameters on a noiseless mask (i.e., one that has only large vertical and horizontal edges) will result in a proper covering of only the fg pixels of the mask. (3) The input is assumed to be a single connected component, that may have holes. From each side, sweep inward, counting the pixels. If the count becomes greater than @minsum, and we have moved forward a further amount @skipdist, record that count ('countref'), but don't accept if the scan contains more than @maxbg bg pixels. Continue the scan until we reach a count that differs from countref by at least @delta, at which point the propagation stops. The box swept out gets a score, which is the sum of fg pixels minus a penalty. The penalty is the number of bg pixels in the box. This is done from all four sides, and the side with the largest score is saved as a rectangle. The process repeats until there is either no rectangle left, or there is one that can't be captured from any direction. For the latter case, we simply accept the last rectangle. (4) The input box is only used to specify the location of the UL corner of pixs, with respect to an origin that typically represents the UL corner of an underlying image, of which pixs is one component. If @box is null, the UL corner is taken to be (0, 0). (5) The parameter @maxcomps gives the maximum number of allowed rectangles extracted from any single connected component. Use 0 if no limit is to be applied. (6) The flag @remainder specifies whether we take a final bounding box for anything left after the maximum number of allowed rectangle is extracted. (7) So if @maxcomps > 0, it specifies that we want no more than the first @maxcomps rectangles that satisfy the input criteria. After this, we can get a final rectangle that bounds everything left over by setting @remainder == 1. If @remainder == 0, we only get rectangles that satisfy the input criteria. (8) It should be noted that the removal of rectangles can break the original c.c. into several c.c. (9) Summing up:
| LEPT_DLL BOXA* pixSplitComponentWithProfile | ( | PIX * | pixs, |
| l_int32 | delta, | ||
| l_int32 | mindel, | ||
| PIX ** | ppixdebug | ||
| ) |
pixSplitComponentWithProfile()
Input: pixs (1 bpp, exactly one connected component)
delta (distance used in extrema finding in a numa; typ. 10)
mindel (minimum required difference between profile minimum
and profile values +2 and -2 away; typ. 7)
&pixdebug (<optional return> debug image of splitting)
Return: boxa (of c.c. after splitting), or null on error
Notes: (1) This will split the most obvious cases of touching characters. The split points it is searching for are narrow and deep minimima in the vertical pixel projection profile, after a large vertical closing has been applied to the component.
| LEPT_DLL l_int32 pixSplitDistributionFgBg | ( | PIX * | pixs, |
| l_float32 | scorefract, | ||
| l_int32 | factor, | ||
| l_int32 * | pthresh, | ||
| l_int32 * | pfgval, | ||
| l_int32 * | pbgval, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixs (any depth; cmapped ok)
scorefract (fraction of the max score, used to determine
the range over which the histogram min is searched)
factor (subsampling factor; integer >= 1)
&thresh (<optional return> best threshold for separating)
&fgval (<optional return> average foreground value)
&bgval (<optional return> average background value)
debugflag (1 for plotting of distribution and split point)
Return: 0 if OK, 1 on error
Notes: (1) See numaSplitDistribution() for details on the underlying method of choosing a threshold.
| LEPT_DLL BOXA* pixSplitIntoBoxa | ( | PIX * | pixs, |
| l_int32 | minsum, | ||
| l_int32 | skipdist, | ||
| l_int32 | delta, | ||
| l_int32 | maxbg, | ||
| l_int32 | maxcomps, | ||
| l_int32 | remainder | ||
| ) |
Input: pixs (1 bpp)
minsum (minimum pixels to trigger propagation)
skipdist (distance before computing sum for propagation)
delta (difference required to stop propagation)
maxbg (maximum number of allowed bg pixels in ref scan)
maxcomps (use 0 for unlimited number of subdivided components)
remainder (set to 1 to get b.b. of remaining stuff)
Return: boxa (of rectangles covering the fg of pixs), or null on error
Notes: (1) This generates a boxa of rectangles that covers the fg of a mask. For each 8-connected component in pixs, it does a greedy partitioning, choosing the largest rectangle found from each of the four directions at each iter. See pixSplitComponentIntoBoxa() for details. (2) The input parameters give some flexibility for boundary noise. The resulting set of rectangles may cover some bg pixels. (3) This should be used when there are a small number of mask components, each of which has sides that are close to horizontal and vertical. The input parameters @delta and @maxbg determine whether or not holes in the mask are covered. (4) The parameter @maxcomps gives the maximum number of allowed rectangles extracted from any single connected component. Use 0 if no limit is to be applied. (5) The flag @remainder specifies whether we take a final bounding box for anything left after the maximum number of allowed rectangle is extracted.
| LEPT_DLL l_int32 pixSplitIntoCharacters | ( | PIX * | pixs, |
| l_int32 | minw, | ||
| l_int32 | minh, | ||
| BOXA ** | pboxa, | ||
| PIXA ** | ppixa, | ||
| PIX ** | ppixdebug | ||
| ) |
Input: pixs (1 bpp, contains only deskewed text)
minw (minimum component width for initial filtering; typ. 4)
minh (minimum component height for initial filtering; typ. 4)
&boxa (<optional return> character bounding boxes)
&pixa (<optional return> character images)
&pixdebug (<optional return> showing splittings)
Return: 0 if OK, 1 on error
Notes: (1) This is a simple function that attempts to find split points based on vertical pixel profiles. (2) It should be given an image that has an arbitrary number of text characters. (3) The returned pixa includes the boxes from which the (possibly split) components are extracted.
| LEPT_DLL PIX* pixStereoFromPair | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_float32 | rwt, | ||
| l_float32 | gwt, | ||
| l_float32 | bwt | ||
| ) |
Input: pix1 (32 bpp rgb)
pix2 (32 bpp rgb)
rwt, gwt, bwt (weighting factors used for each component in
pix1 to determine the output red channel)
Return: pixd (stereo enhanced), or null on error
Notes: (1) pix1 and pix2 are a pair of stereo images, ideally taken concurrently in the same plane, with some lateral translation. (2) The output red channel is determined from @pix1. The output green and blue channels are taken from the green and blue channels, respectively, of @pix2. (3) The weights determine how much of each component in @pix1 goes into the output red channel. The sum of weights must be 1.0. If it's not, we scale the weights to satisfy this criterion. (4) The most general pixel mapping allowed here is: rval = rwt * r1 + gwt * g1 + bwt * b1 (from pix1) gval = g2 (from pix2) bval = b2 (from pix2) (5) The simplest method is to use rwt = 1.0, gwt = 0.0, bwt = 0.0, but this causes unpleasant visual artifacts with red in the image. Use of green and blue from @pix1 in the red channel, instead of red, tends to fix that problem.
| LEPT_DLL PIX* pixStretchHorizontal | ( | PIX * | pixs, |
| l_int32 | dir, | ||
| l_int32 | type, | ||
| l_int32 | hmax, | ||
| l_int32 | operation, | ||
| l_int32 | incolor | ||
| ) |
| LEPT_DLL PIX* pixStretchHorizontalLI | ( | PIX * | pixs, |
| l_int32 | dir, | ||
| l_int32 | type, | ||
| l_int32 | hmax, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs (1, 8 or 32 bpp)
dir (L_WARP_TO_LEFT or L_WARP_TO_RIGHT)
type (L_LINEAR_WARP or L_QUADRATIC_WARP)
hmax (horizontal displacement at edge)
incolor (L_BRING_IN_WHITE or L_BRING_IN_BLACK)
Return: pixd (stretched/compressed), or null on error
Notes: (1) See pixStretchHorizontal() for details.
| LEPT_DLL PIX* pixStretchHorizontalSampled | ( | PIX * | pixs, |
| l_int32 | dir, | ||
| l_int32 | type, | ||
| l_int32 | hmax, | ||
| l_int32 | incolor | ||
| ) |
Input: pixs (1, 8 or 32 bpp)
dir (L_WARP_TO_LEFT or L_WARP_TO_RIGHT)
type (L_LINEAR_WARP or L_QUADRATIC_WARP)
hmax (horizontal displacement at edge)
incolor (L_BRING_IN_WHITE or L_BRING_IN_BLACK)
Return: pixd (stretched/compressed), or null on error
Notes: (1) See pixStretchHorizontal() for details.
Input: pixs (1 bpp)
color (0 for white runs, 1 for black runs)
depth (of pixd: 8 or 16 bpp)
nangles (2, 4, 6 or 8)
Return: pixd (8 or 16 bpp), or null on error
Notes: (1) The dest Pix is 8 or 16 bpp, with the pixel values equal to the stroke width in which it is a member. The values are clipped to the max pixel value if necessary. (2) The color determines if we're labelling white or black strokes. (3) A pixel that is not a member of the chosen color gets value 0; it belongs to a width of length 0 of the chosen color. (4) This chooses, for each dest pixel, the minimum of sets of runlengths through each pixel. Here are the sets: nangles increment set
2 90 {0, 90} 4 45 {0, 45, 90, 135} 6 30 {0, 30, 60, 90, 120, 150} 8 22.5 {0, 22.5, 45, 67.5, 90, 112.5, 135, 157.5} (5) Runtime scales linearly with (nangles - 2).
Input: pixs (1 bpp, with only boundary pixels in fg)
skip (number to skip between samples as you traverse boundary)
Return: pta, or null on error
Notes: (1) If skip = 0, we take all the fg pixels. (2) We try to traverse the boundaries in a regular way. Some pixels may be missed, and these are then subsampled randomly with a fraction determined by 'skip'. (3) The most natural approach is to use a depth first (stack-based) method to find the fg pixels. However, the pixel runs are 4-connected and there are relatively few branches. So instead of doing a proper depth-first search, we get nearly the same result using two nested while loops: the outer one continues a raster-based search for the next fg pixel, and the inner one does a reasonable job running along each 4-connected coutour.
Input: pixd (<optional>; this can be null, equal to pixs1,
equal to pixs2, or different from both pixs1 and pixs2)
pixs1 (can be == pixd)
pixs2 (can be == pixd)
Return: pixd always
Notes: (1) This gives the set subtraction of two images with equal depth, aligning them to the the UL corner. pixs1 and pixs2 need not have the same width and height. (2) Source pixs2 is always subtracted from source pixs1. The result is pixs1 \ pixs2 = pixs1 & (~pixs2) (3) There are 4 cases: (a) pixd == null, (src1 - src2) --> new pixd (b) pixd == pixs1, (src1 - src2) --> src1 (in-place) (c) pixd == pixs2, (src1 - src2) --> src2 (in-place) (d) pixd != pixs1 && pixd != pixs2), (src1 - src2) --> input pixd (4) For clarity, if the case is known, use these patterns: (a) pixd = pixSubtract(NULL, pixs1, pixs2); (b) pixSubtract(pixs1, pixs1, pixs2); (c) pixSubtract(pixs2, pixs1, pixs2); (d) pixSubtract(pixd, pixs1, pixs2); (5) The size of the result is determined by pixs1. (6) The depths of pixs1 and pixs2 must be equal.
Input: pixd (<optional>; this can be null, equal to pixs1, or
different from pixs1)
pixs1 (can be == to pixd)
pixs2
Return: pixd always
Notes: (1) Arithmetic subtraction of two 8, 16 or 32 bpp images. (2) Source pixs2 is always subtracted from source pixs1. (3) Do explicit clipping to 0. (4) Alignment is to UL corner. (5) There are 3 cases. The result can go to a new dest, in-place to pixs1, or to an existing input dest: (a) pixd == null (src1 - src2) --> new pixd (b) pixd == pixs1 (src1 - src2) --> src1 (in-place) (d) pixd != pixs1 (src1 - src2) --> input pixd (6) pixs2 must be different from both pixd and pixs1.
Input: &pixd (<optional return> input pixd can be null,
and it must be different from pixs)
&pixs (will be nulled after the swap)
Return: 0 if OK, 1 on error
Notes: (1) Simple operation to change the handle name safely. After this operation, the original image in pixd has been destroyed, pixd points to what was pixs, and the input pixs ptr has been nulled. (2) This works safely whether or not pixs and pixd are cloned. If pixs is cloned, the other handles still point to the original image, with the ref count reduced by 1. (3) Usage example: Pix *pix1 = pixRead("..."); Pix *pix2 = function(pix1, ...); pixSwapAndDestroy(&pix1, &pix2); pixDestroy(&pix1); // holds what was in pix2 Example with clones ([] shows ref count of image generated by the function): Pix *pixs = pixRead("..."); Pix *pix1 = pixClone(pixs); Pix *pix2 = function(pix1, ...); [1] Pix *pix3 = pixClone(pix2); [1] --> [2] pixSwapAndDestroy(&pix1, &pix2); pixDestroy(&pixs); // still holds read image pixDestroy(&pix1); // holds what was in pix2 [2] --> [1] pixDestroy(&pix3); // holds what was in pix2 [1] --> [0]
Input: pixs (1 bpp)
&canclip (<return> 1 if fg does not extend to all four edges)
Return: 0 if OK; 1 on error
Notes: (1) This is a lightweight test to determine if a 1 bpp image can be further cropped without loss of fg pixels. If it cannot, canclip is set to 0. (2) It does not test for the existence of any fg pixels. If there are no fg pixels, it will return @canclip = 1. Check the output of the subsequent call to pixClipToForeground().
| LEPT_DLL l_int32 pixTestForSimilarity | ( | PIX * | pix1, |
| PIX * | pix2, | ||
| l_int32 | factor, | ||
| l_int32 | mindiff, | ||
| l_float32 | maxfract, | ||
| l_float32 | maxave, | ||
| l_int32 * | psimilar, | ||
| l_int32 | printstats | ||
| ) |
Input: pix1 (8 bpp gray or 32 bpp rgb, or colormapped)
pix2 (8 bpp gray or 32 bpp rgb, or colormapped)
factor (subsampling factor; use 0 or 1 for no subsampling)
mindiff (minimum pixel difference to be counted; > 0)
maxfract (maximum fraction of pixels allowed to have
diff greater than or equal to mindiff)
maxave (maximum average difference of pixels allowed for
pixels with diff greater than or equal to mindiff,
after subtracting mindiff)
&similar (<return> 1 if similar, 0 otherwise)
printstats (use 1 to print normalized histogram to stderr)
Return: 0 if OK, 1 on error
Notes: (1) This takes 2 pix that are the same size and determines using 3 input parameters if they are "similar". The first parameter @mindiff establishes a criterion of pixel-to-pixel similarity: two pixels are not similar if their difference in value is at least mindiff. Then @maxfract and @maxave are thresholds on the number and distribution of dissimilar pixels allowed for the two pix to be similar. If the pix are to be similar, neither threshold can be exceeded. (2) In setting the @maxfract and @maxave thresholds, you have these options: (a) Base the comparison only on @maxfract. Then set @maxave = 0.0 or 256.0. (If 0, we always ignore it.) (b) Base the comparison only on @maxave. Then set @maxfract = 1.0. (c) Base the comparison on both thresholds. (3) Example of values that can be expected at mindiff = 15 when comparing lossless png encoding with jpeg encoding, q=75: (smoothish bg) fractdiff = 0.01, avediff = 2.5 (natural scene) fractdiff = 0.13, avediff = 3.5 To identify these images as 'similar', select maxfract and maxave to be upper bounds of what you expect. (4) See pixGetDifferenceStats() for a discussion of why we subtract mindiff from the computed average diff of the nonsimilar pixels to get the 'avediff' returned by that function. (5) If there is a colormap, it is removed and the result is either gray or RGB depending on the colormap. (6) If RGB, the maximum difference between pixel components is saved in the histogram.
Input: pixs (1 bpp)
type (L_THIN_FG, L_THIN_BG)
connectivity (4 or 8)
maxiters (max number of iters allowed; use 0 to iterate
until completion)
Return: pixd, or null on error
Notes: (1) See "Connectivity-preserving morphological image transformations," Dan S. Bloomberg, in SPIE Visual Communications and Image Processing, Conference 1606, pp. 320-334, November 1991, Boston, MA. A web version is available at http://www.leptonica.com/papers/conn.pdf (2) We implement here two of the best iterative morphological thinning algorithms, for 4 c.c and 8 c.c. Each iteration uses a mixture of parallel operations (using several different 3x3 Sels) and serial operations. Specifically, each thinning iteration consists of four sequential thinnings from each of four directions. Each of these thinnings is a parallel composite operation, where the union of a set of HMTs are set subtracted from the input. For 4-cc thinning, we use 3 HMTs in parallel, and for 8-cc thinning we use 4 HMTs. (3) A "good" thinning algorithm is one that generates a skeleton that is near the medial axis and has neither pruned real branches nor left extra dendritic branches. (4) To thin the foreground, which is the usual situation, use type == L_THIN_FG. Thickening the foreground is equivalent to thinning the background (type == L_THIN_BG), where the opposite connectivity gets preserved. For example, to thicken the fg using 4-connectivity, we thin the bg using Sels that preserve 8-connectivity.
| LEPT_DLL PIX* pixThinExamples | ( | PIX * | pixs, |
| l_int32 | type, | ||
| l_int32 | index, | ||
| l_int32 | maxiters, | ||
| const char * | selfile | ||
| ) |
Input: pixs (1 bpp)
type (L_THIN_FG, L_THIN_BG)
index (into specific examples; valid 1-9; see notes)
maxiters (max number of iters allowed; use 0 to iterate
until completion)
selfile (<optional> filename for output sel display)
Return: pixd, or null on error
Notes: (1) See notes in pixThin(). The examples are taken from the paper referenced there. (2) Here we allow specific sets of HMTs to be used in parallel for thinning from each of four directions. One iteration consists of four such parallel thins. (3) The examples are indexed as follows: Thinning (e.g., run to completion): index = 1 sel_4_1, sel_4_5, sel_4_6 index = 2 sel_4_1, sel_4_7, sel_4_7_rot index = 3 sel_48_1, sel_48_1_rot, sel_48_2 index = 4 sel_8_2, sel_8_3, sel_48_2 index = 5 sel_8_1, sel_8_5, sel_8_6 index = 6 sel_8_2, sel_8_3, sel_8_8, sel_8_9 index = 7 sel_8_5, sel_8_6, sel_8_7, sel_8_7_rot Thickening: index = 8 sel_4_2, sel_4_3 (e.g,, do just a few iterations) index = 9 sel_8_4 (e.g., do just a few iterations)
Input: pixs (1 bpp)
type (L_THIN_FG, L_THIN_BG)
sela (of Sels for parallel composite HMTs)
maxiters (max number of iters allowed; use 0 to iterate
until completion)
Return: pixd, or null on error
Notes: (1) See notes in pixThin(). That function chooses among the best of the Sels for thinning. (2) This is a general function that takes a Sela of HMTs that are used in parallel for thinning from each of four directions. One iteration consists of four such parallel thins.
Input: pix (8 bpp grayscale)
d (destination depth: 1, 2, 4 or 8)
nlevels (number of levels to be used for colormap)
cmapflag (1 if makes colormap; 0 otherwise)
Return: pixd (thresholded with standard dest thresholds),
or null on error
Notes: (1) This uses, by default, equally spaced "target" values that depend on the number of levels, with thresholds halfway between. For N levels, with separation (N-1)/255, there are N-1 fixed thresholds. (2) For 1 bpp destination, the number of levels can only be 2 and if a cmap is made, black is (0,0,0) and white is (255,255,255), which is opposite to the convention without a colormap. (3) For 1, 2 and 4 bpp, the nlevels arg is used if a colormap is made; otherwise, we take the most significant bits from the src that will fit in the dest. (4) For 8 bpp, the input pixs is quantized to nlevels. The dest quantized with that mapping, either through a colormap table or directly with 8 bit values. (5) Typically you should not use make a colormap for 1 bpp dest. (6) This is not dithering. Each pixel is treated independently.
| LEPT_DLL l_int32 pixThresholdByConnComp | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_int32 | start, | ||
| l_int32 | end, | ||
| l_int32 | incr, | ||
| l_float32 | thresh48, | ||
| l_float32 | threshdiff, | ||
| l_int32 * | pglobthresh, | ||
| PIX ** | ppixd, | ||
| l_int32 | debugflag | ||
| ) |
Input: pixs (depth > 1, colormap OK)
pixm (<optional> 1 bpp mask giving region to ignore by setting
pixels to white; use NULL if no mask)
start, end, incr (binarization threshold levels to test)
thresh48 (threshold on normalized difference between the
numbers of 4 and 8 connected components)
threshdiff (threshold on normalized difference between the
number of 4 cc at successive iterations)
&globthresh (<optional return> best global threshold; 0
if no threshold is found)
&pixd (<optional return> image thresholded to binary, or
null if no threshold is found)
debugflag (1 for plotted results)
Return: 0 if OK, 1 on error or if no threshold is found
Notes: (1) This finds a global threshold based on connected components. Although slow, it is reasonable to use it in a situation where (a) the background in the image is relatively uniform, and (b) the result will be fed to an OCR program that accepts 1 bpp images and works best with easily segmented characters. The reason for (b) is that this selects a threshold with a minimum number of both broken characters and merged characters. (2) If the pix has color, it is converted to gray using the max component. (3) Input 0 to use default values for any of these inputs: @start, @end, @incr, @thresh48, @threshdiff. (4) This approach can be understood as follows. When the binarization threshold is varied, the numbers of c.c. identify four regimes: (a) For low thresholds, text is broken into small pieces, and the number of c.c. is large, with the 4 c.c. significantly exceeding the 8 c.c. (b) As the threshold rises toward the optimum value, the text characters coalesce and there is very little difference between the numbers of 4 and 8 c.c, which both go through a minimum. (c) Above this, the image background gets noisy because some pixels are(thresholded to foreground, and the numbers of c.c. quickly increase, with the 4 c.c. significantly larger than the 8 c.c. (d) At even higher thresholds, the image background noise coalesces as it becomes mostly foreground, and the number of c.c. drops quickly. (5) If there is no global threshold that distinguishes foreground text from background (e.g., weak text over a background that has significant variation and/or bleedthrough), this returns 1, which the caller should check.
| LEPT_DLL l_int32 pixThresholdForFgBg | ( | PIX * | pixs, |
| l_int32 | factor, | ||
| l_int32 | thresh, | ||
| l_int32 * | pfgval, | ||
| l_int32 * | pbgval | ||
| ) |
Input: pixs (any depth; cmapped ok)
factor (subsampling factor; integer >= 1)
thresh (threshold for generating foreground mask)
&fgval (<optional return> average foreground value)
&bgval (<optional return> average background value)
Return: 0 if OK, 1 on error
| LEPT_DLL PIX* pixThresholdGrayArb | ( | PIX * | pixs, |
| const char * | edgevals, | ||
| l_int32 | outdepth, | ||
| l_int32 | use_average, | ||
| l_int32 | setblack, | ||
| l_int32 | setwhite | ||
| ) |
Input: pixs (8 bpp grayscale; can have colormap)
edgevals (string giving edge value of each bin)
outdepth (0, 2, 4 or 8 bpp; 0 is default for min depth)
use_average (1 if use the average pixel value in colormap)
setblack (1 if darkest color is set to black)
setwhite (1 if lightest color is set to white)
Return: pixd (2, 4 or 8 bpp quantized image with colormap),
or null on error
Notes: (1) This function allows exact specification of the quantization bins. The string @edgevals is a space-separated set of values specifying the dividing points between output quantization bins. These threshold values are assigned to the bin with higher values, so that each of them is the smallest value in their bin. (2) The output image (pixd) depth is specified by @outdepth. The number of bins is the number of edgevals + 1. The relation between outdepth and the number of bins is: outdepth = 2 nbins <= 4 outdepth = 4 nbins <= 16 outdepth = 8 nbins <= 256 With @outdepth == 0, the minimum required depth for the given number of bins is used. The output pixd has a colormap. (3) The last 3 args determine the specific values that go into the colormap. (4) For @use_average:
Input: pixs (8 bpp, can have colormap)
nlevels (equally spaced; must be between 2 and 256)
cmapflag (1 to build colormap; 0 otherwise)
Return: pixd (8 bpp, optionally with colormap), or null on error
Notes: (1) Valid values for nlevels is the set {2,...,256}. (2) Any colormap on the input pixs is removed to 8 bpp grayscale. (3) If cmapflag == 1, a colormap of size 'nlevels' is made, and the pixel values in pixs are replaced by their appropriate color indices. Otherwise, the pixel values are the actual thresholded (i.e., quantized) grayscale values. (4) If you don't want the thresholding to be equally spaced, first transform the input 8 bpp src using pixGammaTRC().
| LEPT_DLL l_int32 pixThresholdPixelSum | ( | PIX * | pix, |
| l_int32 | thresh, | ||
| l_int32 * | pabove, | ||
| l_int32 * | tab8 | ||
| ) |
Input: pix (1 bpp)
threshold
&above (<return> 1 if above threshold;
0 if equal to or less than threshold)
tab8 (<optional> 8-bit pixel lookup table)
Return: 0 if OK; 1 on error
Notes: (1) This sums the ON pixels and returns immediately if the count goes above threshold. It is therefore more efficient for matching images (by running this function on the xor of the 2 images) than using pixCountPixels(), which counts all pixels before returning.
| LEPT_DLL l_int32 pixThresholdSpreadNorm | ( | PIX * | pixs, |
| l_int32 | filtertype, | ||
| l_int32 | edgethresh, | ||
| l_int32 | smoothx, | ||
| l_int32 | smoothy, | ||
| l_float32 | gamma, | ||
| l_int32 | minval, | ||
| l_int32 | maxval, | ||
| l_int32 | targetthresh, | ||
| PIX ** | ppixth, | ||
| PIX ** | ppixb, | ||
| PIX ** | ppixd | ||
| ) |
Input: pixs (8 bpp grayscale; not colormapped)
filtertype (L_SOBEL_EDGE or L_TWO_SIDED_EDGE);
edgethresh (threshold on magnitude of edge filter; typ 10-20)
smoothx, smoothy (half-width of convolution kernel applied to
spread threshold: use 0 for no smoothing)
gamma (gamma correction; typ. about 0.7)
minval (input value that gives 0 for output; typ. -25)
maxval (input value that gives 255 for output; typ. 255)
targetthresh (target threshold for normalization)
&pixth (<optional return> computed local threshold value)
&pixb (<optional return> thresholded normalized image)
&pixd (<optional return> normalized image)
Return: 0 if OK, 1 on error
Notes: (1) The basis of this approach is the use of seed spreading on a (possibly) sparse set of estimates for the local threshold. The resulting dense estimates are smoothed by convolution and used to either threshold the input image or normalize it with a local transformation that linearly maps the pixels so that the local threshold estimate becomes constant over the resulting image. This approach is one of several that have been suggested (and implemented) by Ray Smith. (2) You can use either the Sobel or TwoSided edge filters. The results appear to be similar, using typical values of edgethresh in the rang 10-20. (3) To skip the trc enhancement, use gamma = 1.0, minval = 0 and maxval = 255. (4) For the normalized image pixd, each pixel is linearly mapped in such a way that the local threshold is equal to targetthresh. (5) The full width and height of the convolution kernel are (2 * smoothx + 1) and (2 * smoothy + 1). (6) This function can be used with the pixtiling utility if the images are too large. See pixOtsuAdaptiveThreshold() for an example of this.
Input: pixs (8 bpp)
nlevels (equally spaced; must be between 2 and 4)
cmapflag (1 to build colormap; 0 otherwise)
Return: pixd (2 bpp, optionally with colormap), or null on error
Notes: (1) Valid values for nlevels is the set {2, 3, 4}. (2) Any colormap on the input pixs is removed to 8 bpp grayscale. (3) This function is typically invoked with cmapflag == 1. In the situation where no colormap is desired, nlevels is ignored and pixs is thresholded to 4 levels. (4) The target output colors are equally spaced, with the darkest at 0 and the lightest at 255. The thresholds are chosen halfway between adjacent output values. A table is built that specifies the mapping from src to dest. (5) If cmapflag == 1, a colormap of size 'nlevels' is made, and the pixel values in pixs are replaced by their appropriate color indices. The number of holdouts, 4 - nlevels, will be between 0 and 2. (6) If you don't want the thresholding to be equally spaced, either first transform the 8 bpp src using pixGammaTRC(). or, if cmapflag == 1, after calling this function you can use pixcmapResetColor() to change any individual colors. (7) If a colormap is generated, it will specify (to display programs) exactly how each level is to be represented in RGB space. When representing text, 3 levels is far better than 2 because of the antialiasing of the single gray level, and 4 levels (black, white and 2 gray levels) is getting close to the perceptual quality of a (nearly continuous) grayscale image. With 2 bpp, you can set up a colormap and allocate from 2 to 4 levels to represent antialiased text. Any left over colormap entries can be used for coloring regions. For the same number of levels, the file size of a 2 bpp image is about 10% smaller than that of a 4 bpp result for the same number of levels. For both 2 bpp and 4 bpp, using 4 levels you get compression far better than that of jpeg, because the quantization to 4 levels will remove the jpeg ringing in the background near character edges.
Input: pixs (8 bpp, can have colormap)
nlevels (equally spaced; must be between 2 and 16)
cmapflag (1 to build colormap; 0 otherwise)
Return: pixd (4 bpp, optionally with colormap), or null on error
Notes: (1) Valid values for nlevels is the set {2, ... 16}. (2) Any colormap on the input pixs is removed to 8 bpp grayscale. (3) This function is typically invoked with cmapflag == 1. In the situation where no colormap is desired, nlevels is ignored and pixs is thresholded to 16 levels. (4) The target output colors are equally spaced, with the darkest at 0 and the lightest at 255. The thresholds are chosen halfway between adjacent output values. A table is built that specifies the mapping from src to dest. (5) If cmapflag == 1, a colormap of size 'nlevels' is made, and the pixel values in pixs are replaced by their appropriate color indices. The number of holdouts, 16 - nlevels, will be between 0 and 14. (6) If you don't want the thresholding to be equally spaced, either first transform the 8 bpp src using pixGammaTRC(). or, if cmapflag == 1, after calling this function you can use pixcmapResetColor() to change any individual colors. (7) If a colormap is generated, it will specify, to display programs, exactly how each level is to be represented in RGB space. When representing text, 3 levels is far better than 2 because of the antialiasing of the single gray level, and 4 levels (black, white and 2 gray levels) is getting close to the perceptual quality of a (nearly continuous) grayscale image. Therefore, with 4 bpp, you can set up a colormap, allocate a relatively small fraction of the 16 possible values to represent antialiased text, and use the other colormap entries for other things, such as coloring text or background. Two other reasons for using a small number of gray values for antialiased text are (1) PNG compression gets worse as the number of levels that are used is increased, and (2) using a small number of levels will filter out most of the jpeg ringing that is typically introduced near sharp edges of text. This filtering is partly responsible for the improved compression.
Input: pixs (4 or 8 bpp)
threshold value
Return: pixd (1 bpp), or null on error
Notes: (1) If the source pixel is less than the threshold value, the dest will be 1; otherwise, it will be 0
Input: pixd (<optional>; if not null, must be equal to pixs)
pixs (8, 16, 32 bpp)
threshval
setval
Return: pixd always
Notes:
| LEPT_DLL PIXTILING* pixTilingCreate | ( | PIX * | pixs, |
| l_int32 | nx, | ||
| l_int32 | ny, | ||
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | xoverlap, | ||
| l_int32 | yoverlap | ||
| ) |
Input: pixs (pix to be tiled; any depth; colormap OK)
nx (number of tiles across image)
ny (number of tiles down image)
w (desired width of each tile)
h (desired height of each tile)
xoverlap (overlap into neighboring tiles on each side)
yoverlap (overlap into neighboring tiles above and below)
Return: pixtiling, or null on error
Notes: (1) We put a clone of pixs in the PixTiling. (2) The input to pixTilingCreate() for horizontal tiling can be either the number of tiles across the image or the approximate width of the tiles. If the latter, the actual width will be determined by making all tiles but the last of equal width, and making the last as close to the others as possible. The same consideration is applied independently to the vertical tiling. To specify tile width, set nx = 0; to specify the number of tiles horizontally across the image, set w = 0. (3) If pixs is to be tiled in one-dimensional strips, use ny = 1 for vertical strips and nx = 1 for horizontal strips. (4) The overlap must not be larger than the width or height of the leftmost or topmost tile(s).
Input: &pt (<will be set to null before returning>) Return: void
Input: pt (pixtiling)
&nx (<optional return> nx; can be null)
&ny (<optional return> ny; can be null)
Return: 0 if OK, 1 on error
Input: pt (pixtiling)
&w (<optional return> tile width; can be null)
&h (<optional return> tile height; can be null)
Return: 0 if OK, 1 on error
Input: pt (pixtiling)
i (tile row index)
j (tile column index)
Return: pixd (tile with appropriate boundary (overlap) pixels added),
or null on error
Input: pt (pixtiling) Return: 0 if OK, 1 on error
Notes: (1) The default for paint is to strip out the overlap pixels that are added by pixTilingGetTile(). However, some operations will generate an image with these pixels stripped off. This tells the paint operation not to strip the added boundary pixels when painting.
| LEPT_DLL l_int32 pixTilingPaintTile | ( | PIX * | pixd, |
| l_int32 | i, | ||
| l_int32 | j, | ||
| PIX * | pixs, | ||
| PIXTILING * | pt | ||
| ) |
Input: pixd (dest: paint tile onto this, without overlap)
i (tile row index)
j (tile column index)
pixs (source: tile to be painted from)
pt (pixtiling struct)
Return: 0 if OK, 1 on error
Input: pixs
hsize (of Sel; must be odd; origin implicitly in center)
vsize (ditto)
type (L_TOPHAT_WHITE: image - opening
L_TOPHAT_BLACK: closing - image)
Return: pixd, or null on error
Notes: (1) Sel is a brick with all elements being hits (2) If hsize = vsize = 1, returns an image with all 0 data. (3) The L_TOPHAT_WHITE flag emphasizes small bright regions, whereas the L_TOPHAT_BLACK flag emphasizes small dark regions. The L_TOPHAT_WHITE tophat can be accomplished by doing a L_TOPHAT_BLACK tophat on the inverse, or v.v.
| LEPT_DLL l_int32 pixTransferAllData | ( | PIX * | pixd, |
| PIX ** | ppixs, | ||
| l_int32 | copytext, | ||
| l_int32 | copyformat | ||
| ) |
Input: pixd (must be different from pixs)
&pixs (will be nulled if refcount goes to 0)
copytext (1 to copy the text field; 0 to skip)
copyformat (1 to copy the informat field; 0 to skip)
Return: 0 if OK, 1 on error
Notes: (1) This does a complete data transfer from pixs to pixd, followed by the destruction of pixs (refcount permitting). (2) If the refcount of pixs is 1, pixs is destroyed. Otherwise, the data in pixs is copied (rather than transferred) to pixd. (3) This operation, like all others with a pre-existing pixd, will side-effect any existing clones of pixd. The pixd refcount does not change. (4) When might you use this? Suppose you have an in-place Pix function (returning void) with the typical signature: void function-inplace(PIX *pix, ...) where "..." are non-pointer input parameters, and suppose further that you sometimes want to return an arbitrary Pix in place of the input Pix. There are two ways you can do this: (a) The straightforward way is to change the function signature to take the address of the Pix ptr: void function-inplace(PIX **ppix, ...) { PIX *pixt = function-makenew(*ppix); pixDestroy(ppix); *ppix = pixt; return; } Here, the input and returned pix are different, as viewed by the calling function, and the inplace function is expected to destroy the input pix to avoid a memory leak. (b) Keep the signature the same and use pixTransferAllData() to return the new Pix in the input Pix struct: void function-inplace(PIX *pix, ...) { PIX *pixt = function-makenew(pix); pixTransferAllData(pix, &pixt, 0, 0); // pixDestroy() is called on pixt return; } Here, the input and returned pix are the same, as viewed by the calling function, and the inplace function must never destroy the input pix, because the calling function maintains an unchanged handle to it.
| LEPT_DLL PIX* pixTranslate | ( | PIX * | pixd, |
| PIX * | pixs, | ||
| l_int32 | hshift, | ||
| l_int32 | vshift, | ||
| l_int32 | incolor | ||
| ) |
Input: pixd (<optional> destination: this can be null,
equal to pixs, or different from pixs)
pixs
hshift (horizontal shift; hshift > 0 is to right)
vshift (vertical shift; vshift > 0 is down)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK)
Return: pixd, or null on error.
Notes: (1) The general pattern is: pixd = pixTranslate(pixd, pixs, ...); For clarity, when you know the case, use one of these: pixd = pixTranslate(NULL, pixs, ...); // new pixTranslate(pixs, pixs, ...); // in-place pixTranslate(pixd, pixs, ...); // to existing pixd (2) If an existing pixd is not the same size as pixs, the image data will be reallocated.
Input: pixs (8 grayscale or 32 bpp rgb; not colormapped)
pixm (<optional> 1 bpp mask)
na (mapping array)
Return: pixd, or null on error
Notes: (1) This operation is in-place on pixs. (2) For 32 bpp, this applies the same map to each of the r,g,b components. (3) The mapping array is of size 256, and it maps the input index into values in the range [0, 255]. (4) If defined, the optional 1 bpp mask pixm has its origin aligned with pixs, and the map function is applied only to pixels in pixs under the fg of pixm. (5) For 32 bpp, this does not save the alpha channel.
Input: pixs (8 bpp; no colormap)
orientflag (L_HORIZONTAL_EDGES, L_VERTICAL_EDGES)
Return: pixd (8 bpp, edges are brighter), or null on error
Notes: (1) For detecting vertical edges, this considers the difference of the central pixel from those on the left and right. For situations where the gradient is the same sign on both sides, this computes and stores the minimum (absolute value of the) difference. The reason for checking the sign is that we are looking for pixels within a transition. By contrast, for single pixel noise, the pixel value is either larger than or smaller than its neighbors, so the gradient would change direction on each side. Horizontal edges are handled similarly, looking for vertical gradients. (2) To generate a binary image of the edges, threshold the result using pixThresholdToBinary(). If the high edge values are to be fg (1), invert after running pixThresholdToBinary(). (3) This runs at about 60 Mpix/sec on a 3 GHz processor. It is about 30% faster than Sobel, and the results are similar.
Input: pixs (binary)
sela
type (L_MORPH_DILATE, etc.)
Return: pixd (union of the specified morphological operation
on pixs for each Sel in the Sela), or null on error
Input: pixs (1 bpp)
depth (of destination: 2, 4, 8, 16 or 32 bpp)
invert (0: binary 0 --> grayscale 0
binary 1 --> grayscale 0xff...
1: binary 0 --> grayscale 0xff...
binary 1 --> grayscale 0)
Return: pixd (2, 4, 8, 16 or 32 bpp), or null on error
Notes: (1) This function calls special cases of pixConvert1To*(), for 2, 4, 8, 16 and 32 bpp destinations.
Input: pixs (all depths except 1 bpp; with or without colormaps)
halfwidth ("half-width" of smoothing filter)
fract (fraction of edge added back into image)
Return: pixd, or null on error
Notes: (1) We use symmetric smoothing filters of odd dimension, typically use sizes of 3, 5, 7, etc. The @halfwidth parameter for these is (size - 1)/2; i.e., 1, 2, 3, etc. (2) The fract parameter is typically taken in the range: 0.2 < fract < 0.7 (3) Returns a clone if no sharpening is requested.
| LEPT_DLL PIX* pixUnsharpMaskingFast | ( | PIX * | pixs, |
| l_int32 | halfwidth, | ||
| l_float32 | fract, | ||
| l_int32 | direction | ||
| ) |
Input: pixs (all depths except 1 bpp; with or without colormaps)
halfwidth ("half-width" of smoothing filter; 1 and 2 only)
fract (fraction of high frequency added to image)
direction (L_HORIZ, L_VERT, L_BOTH_DIRECTIONS)
Return: pixd, or null on error
Notes: (1) The fast version uses separable 1-D filters directly on the input image. The halfwidth is either 1 (full width = 3) or 2 (full width = 5). (2) The fract parameter is typically taken in the range: 0.2 < fract < 0.7 (3) To skip horizontal sharpening, use @fracth = 0.0; ditto for @fractv (4) For one dimensional filtering (as an example): For @halfwidth = 1, the low-pass filter is L: 1/3 1/3 1/3 and the high-pass filter is H = I - L: -1/3 2/3 -1/3 For @halfwidth = 2, the low-pass filter is L: 1/5 1/5 1/5 1/5 1/5 and the high-pass filter is H = I - L: -1/5 -1/5 4/5 -1/5 -1/5 The new sharpened pixel value is found by adding some fraction of the high-pass filter value (which sums to 0) to the initial pixel value: N = I + fract * H (5) For 2D, the sharpening filter is not separable, because the vertical filter depends on the horizontal location relative to the filter origin, and v.v. So we either do the full 2D filter (for @halfwidth == 1) or do the low-pass convolution separably and then compose with the original pix. (6) Returns a clone if no sharpening is requested.
Input: pixs (8 bpp; no colormap)
halfwidth ("half-width" of smoothing filter)
fract (fraction of edge added back into image)
Return: pixd, or null on error
Notes: (1) We use symmetric smoothing filters of odd dimension, typically use sizes of 3, 5, 7, etc. The @halfwidth parameter for these is (size - 1)/2; i.e., 1, 2, 3, etc. (2) The fract parameter is typically taken in the range: 0.2 < fract < 0.7 (3) Returns a clone if no sharpening is requested.
| LEPT_DLL PIX* pixUnsharpMaskingGray1D | ( | PIX * | pixs, |
| l_int32 | halfwidth, | ||
| l_float32 | fract, | ||
| l_int32 | direction | ||
| ) |
Input: pixs (8 bpp; no colormap)
halfwidth ("half-width" of smoothing filter: 1 or 2)
fract (fraction of high frequency added to image)
direction (of filtering; use L_HORIZ or L_VERT)
Return: pixd, or null on error
Notes: (1) For usage and explanation of the algorithm, see notes in pixUnsharpMaskingFast(). (2) Returns a clone if no sharpening is requested.
Input: pixs (8 bpp; no colormap)
halfwidth ("half-width" of smoothing filter: 1 or 2)
fract (fraction of high frequency added to image)
Return: pixd, or null on error
Notes: (1) For halfwidth == 1, we implement the full sharpening filter directly. For halfwidth == 2, we implement the the lowpass filter separably and then compute the sharpening result locally. (2) Returns a clone if no sharpening is requested.
| LEPT_DLL PIX* pixUnsharpMaskingGrayFast | ( | PIX * | pixs, |
| l_int32 | halfwidth, | ||
| l_float32 | fract, | ||
| l_int32 | direction | ||
| ) |
Input: pixs (8 bpp; no colormap)
halfwidth ("half-width" of smoothing filter: 1 or 2)
fract (fraction of high frequency added to image)
direction (L_HORIZ, L_VERT, L_BOTH_DIRECTIONS)
Return: pixd, or null on error
Notes: (1) For usage and explanation of the algorithm, see notes in pixUnsharpMaskingFast(). (2) Returns a clone if no sharpening is requested.
Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi)
&conf (<return> confidence that text is rightside-up)
mincount (min number of up + down; use 0 for default)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) Special (typical, slightly faster) case, where the pixels identified through the HMT (hit-miss transform) are not clipped by a truncated word mask pixm. See pixOrientDetect() and pixUpDownDetectGeneral() for details. (2) The returned confidence is the normalized difference between the number of detected up and down ascenders, assuming that the text is either rightside-up or upside-down and not rotated at a 90 degree angle.
| LEPT_DLL l_int32 pixUpDownDetectDwa | ( | PIX * | pixs, |
| l_float32 * | pconf, | ||
| l_int32 | mincount, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi)
&conf (<return> confidence that text is rightside-up)
mincount (min number of up + down; use 0 for default)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) Faster (DWA) version of pixUpDownDetect(). (2) This is a special case (but typical and slightly faster) of pixUpDownDetectGeneralDwa(), where the pixels identified through the HMT (hit-miss transform) are not clipped by a truncated word mask pixm. See pixUpDownDetectGeneral() for usage and other details. (3) The returned confidence is the normalized difference between the number of detected up and down ascenders, assuming that the text is either rightside-up or upside-down and not rotated at a 90 degree angle.
| LEPT_DLL l_int32 pixUpDownDetectGeneral | ( | PIX * | pixs, |
| l_float32 * | pconf, | ||
| l_int32 | mincount, | ||
| l_int32 | npixels, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (1 bpp, deskewed, English text, 150 - 300 ppi)
&conf (<return> confidence that text is rightside-up)
mincount (min number of up + down; use 0 for default)
npixels (number of pixels removed from each side of word box)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) See pixOrientDetect() for other details. (2) @conf is the normalized difference between the number of detected up and down ascenders, assuming that the text is either rightside-up or upside-down and not rotated at a 90 degree angle. (3) The typical mode of operation is @npixels == 0. If @npixels > 0, this removes HMT matches at the beginning and ending of "words." This is useful for pages that may have mostly digits, because if npixels == 0, leading "1" and "3" digits can register as having ascenders or descenders, and "7" digits can match descenders. Consequently, a page image of only digits may register as being upside-down. (4) We want to count the number of instances found using the HMT. An expensive way to do this would be to count the number of connected components. A cheap way is to do a rank reduction cascade that reduces each component to a single pixel, and results (after two or three 2x reductions) in one pixel for each of the original components. After the reduction, you have a much smaller pix over which to count pixels. We do only 2 reductions, because this function is designed to work for input pix between 150 and 300 ppi, and an 8x reduction on a 150 ppi image is going too far – components will get merged.
| LEPT_DLL l_int32 pixUpDownDetectGeneralDwa | ( | PIX * | pixs, |
| l_float32 * | pconf, | ||
| l_int32 | mincount, | ||
| l_int32 | npixels, | ||
| l_int32 | debug | ||
| ) |
Input: pixs (1 bpp, deskewed, English text)
&conf (<return> confidence that text is rightside-up)
mincount (min number of up + down; use 0 for default)
npixels (number of pixels removed from each side of word box)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) See the notes in pixUpDownDetectGeneral() for usage.
Input: pixs
&color (<return>)
Return: 0 if OK, 1 on error
Notes: (1) This returns color = TRUE if three things are obtained: (a) the pix has a colormap (b) the colormap has at least one color entry (c) a color entry is actually used (2) It is used in pixEqual() for comparing two images, in a situation where it is required to know if the colormap has color entries that are actually used in the image.
Input: pix (8 or 16 bpp; no colormap)
box (<optional> clipping box for variance; can be null)
Return: na of rmsdev by column, or null on error
Notes: (1) To resample for a bin size different from 1, use numaUniformSampling() on the result of this function. (2) We are actually computing the RMS deviation in each row. This is the square root of the variance.
Input: pix (8 or 16 bpp; no colormap)
box (<optional> clipping box for variance; can be null)
Return: na of rmsdev by row, or null on error
Notes: (1) To resample for a bin size different from 1, use numaUniformSampling() on the result of this function. (2) We are actually computing the RMS deviation in each row. This is the square root of the variance.
Input: pix (1, 2, 4, 8 bpp; not cmapped)
box (<optional> if null, use entire image)
&rootvar (<return> sqrt variance of pixel values in region)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixVarianceInRectangle | ( | PIX * | pixs, |
| BOX * | box, | ||
| PIX * | pix_ma, | ||
| DPIX * | dpix_msa, | ||
| l_float32 * | pvar, | ||
| l_float32 * | prvar | ||
| ) |
Input: pix (8 bpp)
box (region to compute variance and/or root variance)
pix_ma (mean accumulator)
dpix_msa (mean square accumulator)
&var (<optional return> variance)
&rvar (<optional return> root variance)
Return: 0 if OK, 1 on error
Notes: (1) This function is intended to be used for many rectangles on the same image. It can find the variance and/or the square root of the variance within a rectangle in O(1), independent of the size of the rectangle.
Input: pixs (8 bpp)
pixg (8 bpp; contains threshold values for each pixel)
Return: pixd (1 bpp), or null on error
Notes: (1) If the pixel in pixs is less than the corresponding pixel in pixg, the dest will be 1; otherwise it will be 0.
Input: pixd (<optional>, this can be null, equal to pixs,
or different from pixs)
pixs (no restrictions on depth)
xloc (location of vertical line, measured from origin)
angle (in radians; not too close to +-(pi / 2))
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, or null on error
Notes: (1) There are 3 cases: (a) pixd == null (make a new pixd) (b) pixd == pixs (in-place) (c) pixd != pixs (2) For these three cases, use these patterns, respectively: pixd = pixVShear(NULL, pixs, ...); pixVShear(pixs, pixs, ...); pixVShear(pixd, pixs, ...); (3) This shear leaves the vertical line of pixels at x = xloc invariant. For a positive shear angle, pixels to the right of this line are shoved downward, and pixels to the left of the line move upward. (4) With positive shear angle, this can be used, along with pixHShear(), to perform a cw rotation, either with 2 shears (for small angles) or in the general case with 3 shears. (5) Changing the value of xloc is equivalent to translating the result vertically. (6) This brings in 'incolor' pixels from outside the image. (7) For in-place operation, pixs cannot be colormapped, because the in-place operation only blits in 0 or 1 bits, not an arbitrary colormap index. (8) The angle is brought into the range [-pi, -pi]. It is not permitted to be within MIN_DIFF_FROM_HALF_PI radians from either -pi/2 or pi/2.
Input: pixd (<optional>, if not null, must be equal to pixs)
pixs
angle (in radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, or null on error.
Notes: (1) See pixVShear() for usage. (2) This does a vertical shear about the center, with (+) shear pushing increasingly downward (+y) with increasing x.
Input: pixd (<optional>, if not null, must be equal to pixs)
pixs
angle (in radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd, or null on error.
Notes: (1) See pixVShear() for usage. (2) This does a vertical shear about the UL corner, with (+) shear pushing increasingly downward (+y) with increasing x.
Input: pixs (all depths; not colormapped)
xloc (location of vertical line, measured from origin)
angle (in radians)
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: 0 if OK; 1 on error
Notes: (1) This is an in-place version of pixVShear(); see comments there. (2) This brings in 'incolor' pixels from outside the image. (3) pixs cannot be colormapped, because the in-place operation only blits in 0 or 1 bits, not an arbitrary colormap index. (4) Does a vertical full-band shear about the line with (+) shear pushing increasingly downward (+y) with increasing x.
Input: pixs (8 bpp or 32 bpp, or colormapped)
xloc (location of vertical line, measured from origin)
angle (in radians, in range (-pi/2 ... pi/2))
incolor (L_BRING_IN_WHITE, L_BRING_IN_BLACK);
Return: pixd (sheared), or null on error
Notes: (1) This does vertical shear with linear interpolation for accurate results on 8 bpp gray, 32 bpp rgb, or cmapped images. It is relatively slow compared to the sampled version implemented by rasterop, but the result is much smoother. (2) This shear leaves the vertical line of pixels at x = xloc invariant. For a positive shear angle, pixels to the right of this line are shoved downward, and pixels to the left of the line move upward. (3) Any colormap is removed. (4) The angle is brought into the range [-pi/2 + del, pi/2 - del], where del == MIN_DIFF_FROM_HALF_PI.
| LEPT_DLL PIX* pixWarpStereoscopic | ( | PIX * | pixs, |
| l_int32 | zbend, | ||
| l_int32 | zshiftt, | ||
| l_int32 | zshiftb, | ||
| l_int32 | ybendt, | ||
| l_int32 | ybendb, | ||
| l_int32 | redleft | ||
| ) |
Input: pixs (any depth, colormap ok)
zbend (horizontal separation in pixels of red and cyan
at the left and right sides, that gives rise to
quadratic curvature out of the image plane)
zshiftt (uniform pixel translation difference between
red and cyan, that pushes the top of the image
plane away from the viewer (zshiftt > 0) or
towards the viewer (zshiftt < 0))
zshiftb (uniform pixel translation difference between
red and cyan, that pushes the bottom of the image
plane away from the viewer (zshiftb > 0) or
towards the viewer (zshiftb < 0))
ybendt (multiplicative parameter for in-plane vertical
displacement at the left or right edge at the top:
y = ybendt * (2x/w - 1)^2 )
ybendb (same as ybendt, except at the left or right edge
at the bottom)
redleft (1 if the red filter is on the left; 0 otherwise)
Return: pixd (32 bpp), or null on error
Notes: (1) This function splits out the red channel, mucks around with it, then recombines with the unmolested cyan channel. (2) By using a quadratically increasing shift of the red pixels horizontally and away from the vertical centerline, the image appears to bend quadratically out of the image plane, symmetrically with respect to the vertical center line. A positive value of @zbend causes the plane to be curved away from the viewer. We use linearly interpolated stretching to avoid the appearance of kinks in the curve. (3) The parameters @zshiftt and @zshiftb tilt the image plane about a horizontal line through the center, and at the same time move that line either in toward the viewer or away. This is implemented by a combination of horizontal shear about the center line (for the tilt) and horizontal translation (to move the entire plane in or out). A positive value of @zshiftt moves the top of the plane away from the viewer, and a positive value of @zshiftb moves the bottom of the plane away. We use linear interpolated shear to avoid visible vertical steps in the tilted image. (4) The image can be bent in the plane and about the vertical centerline. The centerline does not shift, and the parameter @ybend gives the relative shift at left and right edges, with a downward shift for positive values of @ybend. (6) When writing out a steroscopic (red/cyan) image in jpeg, first call pixSetChromaSampling(pix, 0) to get sufficient resolution in the red channel. (7) Typical values are: zbend = 20 zshiftt = 15 zshiftb = -15 ybendt = 30 ybendb = 0 If the disparity z-values are too large, it is difficult for the brain to register the two images. (8) This function has been cleverly reimplemented by Jeff Breidenbach. The original implementation used two 32 bpp rgb images, and merged them at the end. The result is somewhat faded, and has a parameter "thresh" that controls the amount of color in the result. (The present implementation avoids these two problems, skipping both the colorization and the alpha blending at the end, and is about 3x faster) The basic operations with 32 bpp are as follows: // Immediate conversion to 32 bpp Pix *pixt1 = pixConvertTo32(pixs); // Do vertical shear Pix *pixr = pixQuadraticVerticalShear(pixt1, L_WARP_TO_RIGHT, ybendt, ybendb, L_BRING_IN_WHITE); // Colorize two versions, toward red and cyan Pix *pixc = pixCopy(NULL, pixr); l_int32 thresh = 150; // if higher, get less original color pixColorGray(pixr, NULL, L_PAINT_DARK, thresh, 255, 0, 0); pixColorGray(pixc, NULL, L_PAINT_DARK, thresh, 0, 255, 255); // Shift the red pixels; e.g., by stretching Pix *pixrs = pixStretchHorizontal(pixr, L_WARP_TO_RIGHT, L_QUADRATIC_WARP, zbend, L_INTERPOLATED, L_BRING_IN_WHITE); // Blend the shifted red and unshifted cyan 50:50 Pix *pixg = pixCreate(w, h, 8); pixSetAllArbitrary(pixg, 128); pixd = pixBlendWithGrayMask(pixrs, pixc, pixg, 0, 0);
| LEPT_DLL PIX* pixWindowedMean | ( | PIX * | pixs, |
| l_int32 | wc, | ||
| l_int32 | hc, | ||
| l_int32 | hasborder, | ||
| l_int32 | normflag | ||
| ) |
Input: pixs (8 or 32 bpp grayscale)
wc, hc (half width/height of convolution kernel)
hasborder (use 1 if it already has (wc + 1) border pixels
on left and right, and (hc + 1) on top and bottom;
use 0 to add kernel-dependent border)
normflag (1 for normalization to get average in window;
0 for the sum in the window (un-normalized))
Return: pixd (8 or 32 bpp, average over kernel window)
Notes: (1) The input and output depths are the same. (2) A set of border pixels of width (wc + 1) on left and right, and of height (hc + 1) on top and bottom, must be on the pix before the accumulator is found. The output pixd (after convolution) has this border removed. If @hasborder = 0, the required border is added. (3) Typically, @normflag == 1. However, if you want the sum within the window, rather than a normalized convolution, use @normflag == 0. (4) This builds a block accumulator pix, uses it here, and destroys it. (5) The added border, along with the use of an accumulator array, allows computation without special treatment of pixels near the image boundary, and runs in a time that is independent of the size of the convolution kernel.
Input: pixs (8 bpp grayscale)
wc, hc (half width/height of convolution kernel)
hasborder (use 1 if it already has (wc + 1) border pixels
on left and right, and (hc + 1) on top and bottom;
use 0 to add kernel-dependent border)
Return: pixd (32 bpp, average over rectangular window of
width = 2 * wc + 1 and height = 2 * hc + 1)
Notes: (1) A set of border pixels of width (wc + 1) on left and right, and of height (hc + 1) on top and bottom, must be on the pix before the accumulator is found. The output pixd (after convolution) has this border removed. If @hasborder = 0, the required border is added. (2) The advantage is that we are unaffected by the boundary, and it is not necessary to treat pixels within @wc and @hc of the border differently. This is because processing for pixd only takes place for pixels in pixs for which the kernel is entirely contained in pixs. (3) Why do we have an added border of width (@wc + 1) and height (@hc + 1), when we only need @wc and @hc pixels to satisfy this condition? Answer: the accumulators are asymmetric, requiring an extra row and column of pixels at top and left to work accurately. (4) The added border, along with the use of an accumulator array, allows computation without special treatment of pixels near the image boundary, and runs in a time that is independent of the size of the convolution kernel.
| LEPT_DLL l_int32 pixWindowedStats | ( | PIX * | pixs, |
| l_int32 | wc, | ||
| l_int32 | hc, | ||
| l_int32 | hasborder, | ||
| PIX ** | ppixm, | ||
| PIX ** | ppixms, | ||
| FPIX ** | pfpixv, | ||
| FPIX ** | pfpixrv | ||
| ) |
Input: pixs (8 bpp grayscale)
wc, hc (half width/height of convolution kernel)
hasborder (use 1 if it already has (wc + 1) border pixels
on left and right, and (hc + 1) on top and bottom;
use 0 to add kernel-dependent border)
&pixm (<optional return> 8 bpp mean value in window)
&pixms (<optional return> 32 bpp mean square value in window)
&fpixv (<optional return> float variance in window)
&fpixrv (<optional return> float rms deviation from the mean)
Return: 0 if OK, 1 on error
Notes: (1) This is a high-level convenience function for calculating any or all of these derived images. (2) If @hasborder = 0, a border is added and the result is computed over all pixels in pixs. Otherwise, no border is added and the border pixels are removed from the output images. (3) These statistical measures over the pixels in the rectangular window are:
(pixm)
)*(p -
)> = <p*p> -
*
(pixv)
Input: pixm (mean over window; 8 or 32 bpp grayscale)
pixms (mean square over window; 32 bpp)
&fpixv (<optional return> float variance -- the ms deviation
from the mean)
&fpixrv (<optional return> float rms deviation from the mean)
Return: 0 if OK, 1 on error
Notes: (1) The mean and mean square values are precomputed, using pixWindowedMean() and pixWindowedMeanSquare(). (2) Either or both of the variance and square-root of variance are returned as an fpix, where the variance is the average over the window of the mean square difference of the pixel value from the mean: <(p -
)*(p -
)> = <p*p> -
*
(3) To visualize the results:
| LEPT_DLL l_int32 pixWindowedVarianceOnLine | ( | PIX * | pixs, |
| l_int32 | dir, | ||
| l_int32 | loc, | ||
| l_int32 | c1, | ||
| l_int32 | c2, | ||
| l_int32 | size, | ||
| NUMA ** | pnad | ||
| ) |
Input: pixs (8 bpp; no colormap)
dir (L_HORIZONTAL_LINE or L_VERTICAL_LINE)
loc (location of the constant coordinate for the line)
c1, c2 (end point coordinates for the line)
size (window size; must be > 1)
&nad (<return> windowed square root of variance)
Return: 0 if OK; 1 on error
Notes: (1) The returned variance array traverses the line starting from the smallest coordinate, min(c1,c2). (2) Line end points are clipped to pixs. (3) The reference point for the variance calculation is the center of the window. Therefore, the numa start parameter from pixExtractOnLine() is incremented by @size/2, to align the variance values with the pixel coordinate. (4) The square root of the variance is the RMS deviation from the mean.
| LEPT_DLL l_int32 pixWordBoxesByDilation | ( | PIX * | pixs, |
| l_int32 | maxdil, | ||
| l_int32 | minwidth, | ||
| l_int32 | minheight, | ||
| l_int32 | maxwidth, | ||
| l_int32 | maxheight, | ||
| BOXA ** | pboxa, | ||
| l_int32 * | psize | ||
| ) |
Input: pixs (1 bpp; typ. at 75 to 150 ppi)
maxdil (maximum dilation; 0 for default; warning if > 20)
minwidth, minheight (of saved components; smaller are discarded)
maxwidth, maxheight (of saved components; larger are discarded)
&boxa (<return> dilated word mask)
&size (<optional return> size of optimal horiz Sel)
Return: 0 if OK, 1 on error
Notes: (1) Returns a pruned set of word boxes. (2) See pixWordMaskByDilation().
| LEPT_DLL l_int32 pixWordMaskByDilation | ( | PIX * | pixs, |
| l_int32 | maxdil, | ||
| PIX ** | ppixm, | ||
| l_int32 * | psize | ||
| ) |
Input: pixs (1 bpp; typ. at 75 to 150 ppi)
maxdil (maximum dilation; 0 for default; warning if > 20)
&mask (<optional return> dilated word mask)
&size (<optional return> size of optimal horiz Sel)
Return: 0 if OK, 1 on error
Notes: (1) This gives a crude estimate of the word masks. See pixWordBoxesByDilation() for further filtering of the word boxes. (2) For 75 to 150 ppi, the optimal dilation will be between 5 and 11. For 200 to 300 ppi, it is advisable to use a larger value for @maxdil, say between 10 and 20. Setting maxdil <= 0 results in a default dilation of 16. (3) The best size for dilating to get word masks is optionally returned.
Input: filename
pix
format (defined in imageio.h)
Return: 0 if OK; 1 on error
Notes: (1) Open for write using binary mode (with the "b" flag) to avoid having Windows automatically translate the NL into CRLF, which corrupts image files. On non-windows systems this flag should be ignored, per ISO C90. Thanks to Dave Bryan for pointing this out. (2) If the default image format IFF_DEFAULT is requested: use the input format if known; otherwise, use a lossless format. (3) There are two modes with respect to file naming. (a) The default code writes to @filename. (b) If WRITE_AS_NAMED is defined to 0, it's a bit fancier. Then, if @filename does not have a file extension, one is automatically appended, depending on the requested format. The original intent for providing option (b) was to insure that filenames on Windows have an extension that matches the image compression. However, this is not the default.
Input: filename
pix
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 pixWriteImpliedFormat | ( | const char * | filename, |
| PIX * | pix, | ||
| l_int32 | quality, | ||
| l_int32 | progressive | ||
| ) |
Input: filename
pix
quality (iff JPEG; 1 - 100, 0 for default)
progressive (iff JPEG; 0 for baseline seq., 1 for progressive)
Return: 0 if OK; 1 on error
Notes: (1) This determines the output format from the filename extension. (2) The last two args are ignored except for requests for jpeg files. (3) The jpeg default quality is 75.
| LEPT_DLL l_int32 pixWriteJp2k | ( | const char * | filename, |
| PIX * | pix, | ||
| l_int32 | quality, | ||
| l_int32 | nlevels, | ||
| l_int32 | hint, | ||
| l_int32 | debug | ||
| ) |
| LEPT_DLL l_int32 pixWriteJpeg | ( | const char * | filename, |
| PIX * | pix, | ||
| l_int32 | quality, | ||
| l_int32 | progressive | ||
| ) |
Input: filename
pix (any depth; cmap is OK)
quality (1 - 100; 75 is default)
progressive (0 for baseline sequential; 1 for progressive)
Return: 0 if OK; 1 on error
Input: &data (<return> data of tiff compressed image)
&size (<return> size of returned data)
pix
format (defined in imageio.h)
Return: 0 if OK, 1 on error
Notes: (1) On windows, this will only write tiff and PostScript to memory. For other formats, it requires open_memstream(3). (2) PostScript output is uncompressed, in hex ascii. Most printers support level 2 compression (tiff_g4 for 1 bpp, jpeg for 8 and 32 bpp).
Input: &data (<return> data of tiff compressed image)
&size (<return> size of returned data)
pix
Return: 0 if OK, 1 on error
Notes: (1) See pixWriteStreamBmp() for usage. This version writes to memory instead of to a file stream.
| LEPT_DLL l_int32 pixWriteMemJp2k | ( | l_uint8 ** | pdata, |
| size_t * | psize, | ||
| PIX * | pix, | ||
| l_int32 | quality, | ||
| l_int32 | nlevels, | ||
| l_int32 | hint, | ||
| l_int32 | debug | ||
| ) |
| LEPT_DLL l_int32 pixWriteMemJpeg | ( | l_uint8 ** | pdata, |
| size_t * | psize, | ||
| PIX * | pix, | ||
| l_int32 | quality, | ||
| l_int32 | progressive | ||
| ) |
Input: &data (<return> data of jpeg compressed image)
&size (<return> size of returned data)
pix (any depth; cmap is OK)
quality (1 - 100; 75 is default value; 0 is also default)
progressive (0 for baseline sequential; 1 for progressive)
Return: 0 if OK, 1 on error
Notes: (1) See pixWriteStreamJpeg() for usage. This version writes to memory instead of to a file stream.
| LEPT_DLL l_int32 pixWriteMemPdf | ( | l_uint8 ** | pdata, |
| size_t * | pnbytes, | ||
| PIX * | pix, | ||
| l_int32 | res, | ||
| const char * | title | ||
| ) |
Input: &data (<return> pdf as byte array)
&nbytes (<return> number of bytes in pdf array)
pix (all depths, cmap OK)
res (override the resolution of the input image, in ppi;
use 0 to respect the resolution embedded in the input)
title (<optional> pdf title; taken from the first image
placed on a page; e.g., an input image filename)
Return: 0 if OK, 1 on error
Notes: (1) This is the simplest interface for writing a single image with pdf encoding to memory. It uses G4 encoding for 1 bpp, JPEG encoding for 8 bpp (no cmap) and 32 bpp, and FLATE encoding for everything else.
Input: &data (<return> data of tiff compressed image)
&size (<return> size of returned data)
pix
gamma (use 0.0 if gamma is not defined)
Return: 0 if OK, 1 on error
Notes: (1) See pixWriteStreamPng() for usage. This version writes to memory instead of to a file stream.
Input: &data (<return> data of tiff compressed image)
&size (<return> size of returned data)
pix
Return: 0 if OK, 1 on error
Notes: (1) See pixWriteStreamPnm() for usage. This version writes to memory instead of to a file stream.
| LEPT_DLL l_int32 pixWriteMemPS | ( | l_uint8 ** | pdata, |
| size_t * | psize, | ||
| PIX * | pix, | ||
| BOX * | box, | ||
| l_int32 | res, | ||
| l_float32 | scale | ||
| ) |
Input: &data (<return> data of tiff compressed image)
&size (<return> size of returned data)
pix
box (<optional>)
res (can use 0 for default of 300 ppi)
scale (to prevent scaling, use either 1.0 or 0.0)
Return: 0 if OK, 1 on error
Notes: (1) See pixWriteStringPS() for usage. (2) This is just a wrapper for pixWriteStringPS(), which writes uncompressed image data to memory.
Input: &data (<return> data of serialized, uncompressed pix)
&size (<return> size of returned data)
pix (all depths; colormap OK)
Return: 0 if OK, 1 on error
Input: &data (<return> data of tiff compressed image)
&size (<return> size of returned data)
pix
comptype (IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
IFF_TIFF_G3, IFF_TIFF_G4,
IFF_TIFF_LZW, IFF_TIFF_ZIP)
Return: 0 if OK, 1 on error
Usage: (1) See pixWriteTiff(). This version writes to memory instead of to a file.
| LEPT_DLL l_int32 pixWriteMemTiffCustom | ( | l_uint8 ** | pdata, |
| size_t * | psize, | ||
| PIX * | pix, | ||
| l_int32 | comptype, | ||
| NUMA * | natags, | ||
| SARRAY * | savals, | ||
| SARRAY * | satypes, | ||
| NUMA * | nasizes | ||
| ) |
Input: &data (<return> data of tiff compressed image)
&size (<return> size of returned data)
pix
comptype (IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
IFF_TIFF_G3, IFF_TIFF_G4,
IFF_TIFF_LZW, IFF_TIFF_ZIP)
natags (<optional> NUMA of custom tiff tags)
savals (<optional> SARRAY of values)
satypes (<optional> SARRAY of types)
nasizes (<optional> NUMA of sizes)
Return: 0 if OK, 1 on error
Usage: (1) See pixWriteTiffCustom(). This version writes to memory instead of to a file. (2) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream.
| LEPT_DLL l_int32 pixWriteMemWebP | ( | l_uint8 ** | pencdata, |
| size_t * | pencsize, | ||
| PIX * | pixs, | ||
| l_int32 | quality, | ||
| l_int32 | lossless | ||
| ) |
| LEPT_DLL l_int32 pixWriteMixedToPS | ( | PIX * | pixb, |
| PIX * | pixc, | ||
| l_float32 | scale, | ||
| l_int32 | pageno, | ||
| const char * | fileout | ||
| ) |
Input: filename
pix
gamma
Return: 0 if OK; 1 on error
Notes: (1) Special version for writing png with a specified gamma. When using pixWrite(), no field is given for gamma.
Input: filein (input file, all depths, colormap OK)
fileout (output ps file)
Return: 0 if OK, 1 on error
Notes: (1) This is a simple wrapper function that generates an uncompressed PS file, with a bounding box. (2) The bounding box is required when a program such as TeX (through epsf) places and rescales the image. (3) The bounding box is sized for fitting the image to an 8.5 x 11.0 inch page.
| LEPT_DLL l_int32 pixWriteSegmentedPageToPS | ( | PIX * | pixs, |
| PIX * | pixm, | ||
| l_float32 | textscale, | ||
| l_float32 | imagescale, | ||
| l_int32 | threshold, | ||
| l_int32 | pageno, | ||
| const char * | fileout | ||
| ) |
Input: stream
pix
format
Return: 0 if OK; 1 on error.
Input: stream opened for write
pix
Return: 0 if OK; 1 on error
Writes "ascii" format only: 1 bpp --> pbm (P1) 2, 4, 8, 16 bpp, no colormap or grayscale colormap --> pgm (P2) 2, 4, 8 bpp with color-valued colormap, or rgb --> rgb ppm (P3)
Input: stream opened for write
pix (1, 4, 8, 32 bpp)
Return: 0 if OK, 1 on error
Notes: (1) We position fp at the beginning of the stream, so it truncates any existing data (2) 2 bpp Bmp files are apparently not valid!. We can write and read them, but nobody else can read ours.
| LEPT_DLL l_int32 pixWriteStreamJp2k | ( | FILE * | fp, |
| PIX * | pix, | ||
| l_int32 | quality, | ||
| l_int32 | nlevels, | ||
| l_int32 | hint, | ||
| l_int32 | debug | ||
| ) |
Input: stream
pixs (any depth; cmap is OK)
quality (1 - 100; 75 is default value; 0 is also default)
progressive (0 for baseline sequential; 1 for progressive)
Return: 0 if OK, 1 on error
Notes: (1) Progressive encoding gives better compression, at the expense of slower encoding and decoding. (2) Standard chroma subsampling is 2x2 on both the U and V channels. For highest quality, use no subsampling; this option is set by pixSetChromaSampling(pix, 0). (3) The only valid pixel depths in leptonica are 1, 2, 4, 8, 16 and 32 bpp. However, it is possible, and in some cases desirable, to write out a jpeg file using an rgb pix that has 24 bpp. This can be created by appending the raster data for a 24 bpp image (with proper scanline padding) directly to a 24 bpp pix that was created without a data array. (4) There are two compression paths in this function:
Input: fp (stream opened for writing)
pix (all depths, cmap OK)
res (override the resolution of the input image, in ppi;
use 0 to respect the resolution embedded in the input)
title (<optional> pdf title; taken from the first image
placed on a page; e.g., an input image filename)
Return: 0 if OK, 1 on error
Notes: (1) This is the simplest interface for writing a single image with pdf encoding to a stream. It uses G4 encoding for 1 bpp, JPEG encoding for 8 bpp (no cmap) and 32 bpp, and FLATE encoding for everything else.
Input: stream
pix
gamma (use 0.0 if gamma is not defined)
Return: 0 if OK; 1 on error
Notes: (1) If called from pixWriteStream(), the stream is positioned at the beginning of the file. (2) To do sequential writes of png format images to a stream, use pixWriteStreamPng() directly. (3) gamma is an optional png chunk. If no gamma value is to be placed into the file, use gamma = 0.0. Otherwise, if gamma > 0.0, its value is written into the header. (4) The use of gamma in png is highly problematic. For an illuminating discussion, see: http://hsivonen.iki.fi/png-gamma/ (5) What is the effect/meaning of gamma in the png file? This gamma, which we can call the 'source' gamma, is the inverse of the gamma that was used in enhance.c to brighten or darken images. The 'source' gamma is supposed to indicate the intensity mapping that was done at the time the image was captured. Display programs typically apply a 'display' gamma of 2.2 to the output, which is intended to linearize the intensity based on the response of thermionic tubes (CRTs). Flat panel LCDs have typically been designed to give a similar response as CRTs (call it "backward compatibility"). The 'display' gamma is in some sense the inverse of the 'source' gamma. jpeg encoders attached to scanners and cameras will lighten the pixels, applying a gamma corresponding to approximately a square-root relation of output vs input: output = input^(gamma) where gamma is often set near 0.4545 (1/gamma is 2.2). This is stored in the image file. Then if the display program reads the gamma, it will apply a display gamma, typically about 2.2; the product is 1.0, and the display program produces a linear output. This works because the dark colors were appropriately boosted by the scanner, as described by the 'source' gamma, so they should not be further boosted by the display program. (6) As an example, with xv and display, if no gamma is stored, the program acts as if gamma were 0.4545, multiplies this by 2.2, and does a linear rendering. Taking this as a baseline brightness, if the stored gamma is: > 0.4545, the image is rendered lighter than baseline < 0.4545, the image is rendered darker than baseline In contrast, gqview seems to ignore the gamma chunk in png. (7) The only valid pixel depths in leptonica are 1, 2, 4, 8, 16 and 32. However, it is possible, and in some cases desirable, to write out a png file using an rgb pix that has 24 bpp. For example, the open source xpdf SplashBitmap class generates 24 bpp rgb images. Consequently, we enable writing 24 bpp pix. To generate such a pix, you can make a 24 bpp pix without data and assign the data array to the pix; e.g., pix = pixCreateHeader(w, h, 24); pixSetData(pix, rgbdata); See pixConvert32To24() for an example, where we get rgbdata from the 32 bpp pix. Caution: do not call pixSetPadBits(), because the alignment is wrong and you may erase part of the last pixel on each line. (8) If the pix has a colormap, it is written to file. In most situations, the alpha component is 255 for each colormap entry, which is opaque and indicates that it should be ignored. However, if any alpha component is not 255, it is assumed that the alpha values are valid, and they are written to the png file in a tRNS segment. On readback, the tRNS segment is identified, and the colormapped image with alpha is converted to a 4 spp rgba image.
Input: stream opened for write
pix
Return: 0 if OK; 1 on error
Notes: (1) This writes "raw" packed format only: 1 bpp --> pbm (P4) 2, 4, 8, 16 bpp, no colormap or grayscale colormap --> pgm (P5) 2, 4, 8 bpp with color-valued colormap, or rgb --> rgb ppm (P6) (2) 24 bpp rgb are not supported in leptonica, but this will write them out as a packed array of bytes (3 to a pixel).
Input: stream
pix
box (<optional>)
res (can use 0 for default of 300 ppi)
scale (to prevent scaling, use either 1.0 or 0.0)
Return: 0 if OK; 1 on error
Notes: (1) This writes image in PS format, optionally scaled, adjusted for the printer resolution, and with a bounding box. (2) For details on use of parameters, see pixWriteStringPS().
Input: stream
pix
Return: 0 if OK; 1 on error
Input: stream (opened for append or write)
pix
comptype (IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
IFF_TIFF_G3, IFF_TIFF_G4,
IFF_TIFF_LZW, IFF_TIFF_ZIP)
Return: 0 if OK, 1 on error
Notes: (1) For images with bpp > 1, this resets the comptype, if necessary, to write uncompressed data. (2) G3 and G4 are only defined for 1 bpp. (3) We only allow PACKBITS for bpp = 1, because for bpp > 1 it typically expands images that are not synthetically generated. (4) G4 compression is typically about twice as good as G3. G4 is excellent for binary compression of text/line-art, but terrible for halftones and dithered patterns. (In fact, G4 on halftones can give a file that is larger than uncompressed!) If a binary image has dithered regions, it is usually better to compress with png.
Input: pixs: all depths, colormap OK
box: (a) If box == null, image is placed, optionally scaled,
in a standard b.b. at the center of the page.
This is to be used when another program like
TeX (through epsf) places the image.
(b) If box != null, image is placed without a
b.b. at the specified page location and with
(optional) scaling. This is to be used when
you want to specify exactly where (and optionally
how big) you want the image to be.
Note that all coordinates are in PS convention,
with (0,0) at LL corner of the page:
(x,y) location of LL corner of image, in mils.
(w,h) scaled size, in mils. Use 0 to
scale with "scale" and "res" input.
res: resolution, in printer ppi. Use 0 for default (300 ppi).
scale: scale factor. If no scaling is desired, use
either 1.0 or 0.0. Scaling just resets the resolution
parameter; the actual scaling is done in the
interpreter at rendering time. This is important:
it allows you to scale the image up without
increasing the file size.
Return: ps string if OK, or null on error
Notes: (1) OK, this seems a bit complicated, because there are various ways to scale and not to scale. Here's a summary: (2) If you don't want any scaling at all:
| LEPT_DLL l_int32 pixWriteTempfile | ( | const char * | dir, |
| const char * | tail, | ||
| PIX * | pix, | ||
| l_int32 | format, | ||
| char ** | pfilename | ||
| ) |
Input: dir (directory name; use '.' for local dir; no trailing '/')
tail (<optional> tailname, including extension if any)
pix
format
&filename (<optional> return actual filename used; use
null to skip)
Return: 0 if OK; 1 on error
Notes: (1) This generates a temp filename, writes the pix to it, and optionally returns the temp filename. (2) If the filename is returned to a windows program from a DLL, use lept_free() to free it. (3) See genTempFilename() for details. We omit the time and pid here.
| LEPT_DLL l_int32 pixWriteTiff | ( | const char * | filename, |
| PIX * | pix, | ||
| l_int32 | comptype, | ||
| const char * | modestring | ||
| ) |
Input: filename (to write to)
pix
comptype (IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
IFF_TIFF_G3, IFF_TIFF_G4,
IFF_TIFF_LZW, IFF_TIFF_ZIP)
modestring ("a" or "w")
Return: 0 if OK, 1 on error
Notes: (1) For multi-page tiff, write the first pix with mode "w" and all subsequent pix with mode "a".
| LEPT_DLL l_int32 pixWriteTiffCustom | ( | const char * | filename, |
| PIX * | pix, | ||
| l_int32 | comptype, | ||
| const char * | modestring, | ||
| NUMA * | natags, | ||
| SARRAY * | savals, | ||
| SARRAY * | satypes, | ||
| NUMA * | nasizes | ||
| ) |
Input: filename (to write to)
pix
comptype (IFF_TIFF, IFF_TIFF_RLE, IFF_TIFF_PACKBITS,
IFF_TIFF_G3, IFF_TIFF_G4)
IFF_TIFF_LZW, IFF_TIFF_ZIP)
modestring ("a" or "w")
natags (<optional> NUMA of custom tiff tags)
savals (<optional> SARRAY of values)
satypes (<optional> SARRAY of types)
nasizes (<optional> NUMA of sizes)
Return: 0 if OK, 1 on error
Usage: (1) This writes a page image to a tiff file, with optional extra tags defined in tiff.h (2) For multi-page tiff, write the first pix with mode "w" and all subsequent pix with mode "a". (3) For the custom tiff tags: (a) The three arrays {natags, savals, satypes} must all be either NULL or defined and of equal size. (b) If they are defined, the tags are an array of integers, the vals are an array of values in string format, and the types are an array of types in string format. (c) All valid tags are definined in tiff.h. (d) The types allowed are the set of strings: "char*" "l_uint8*" "l_uint16" "l_uint32" "l_int32" "l_float64" "l_uint16-l_uint16" (note the dash; use it between the two l_uint16 vals in the val string) Of these, "char*" and "l_uint16" are the most commonly used. (e) The last array, nasizes, is also optional. It is for tags that take an array of bytes for a value, a number of elements in the array, and a type that is either "char*" or "l_uint8*" (probably either will work). Use NULL if there are no such tags. (f) VERY IMPORTANT: if there are any tags that require the extra size value, stored in nasizes, they must be written first!
| LEPT_DLL l_int32 pixWriteWebP | ( | const char * | filename, |
| PIX * | pixs, | ||
| l_int32 | quality, | ||
| l_int32 | lossless | ||
| ) |
Input: pixd (<optional>; this can be null, equal to pixs1,
different from pixs1)
pixs1 (can be == pixd)
pixs2 (must be != pixd)
Return: pixd always
Notes: (1) This gives the XOR of two images with equal depth, aligning them to the the UL corner. pixs1 and pixs2 need not have the same width and height. (2) There are 3 cases: (a) pixd == null, (src1 ^ src2) --> new pixd (b) pixd == pixs1, (src1 ^ src2) --> src1 (in-place) (c) pixd != pixs1, (src1 ^ src2) --> input pixd (3) For clarity, if the case is known, use these patterns: (a) pixd = pixXor(NULL, pixs1, pixs2); (b) pixXor(pixs1, pixs1, pixs2); (c) pixXor(pixd, pixs1, pixs2); (4) The size of the result is determined by pixs1. (5) The depths of pixs1 and pixs2 must be equal. (6) Note carefully that the order of pixs1 and pixs2 only matters for the in-place case. For in-place, you must have pixd == pixs1. Setting pixd == pixs2 gives an incorrect result: the copy puts pixs1 image data in pixs2, and the rasterop is then between pixs2 and pixs2 (a no-op).
Input: pix (all depths; colormap OK)
&empty (<return> 1 if all bits in image data field are 0;
0 otherwise)
Return: 0 if OK; 1 on error
Notes: (1) For a binary image, if there are no fg (black) pixels, empty = 1. (2) For a grayscale image, if all pixels are black (0), empty = 1. (3) For an RGB image, if all 4 components in every pixel is 0, empty = 1. (4) For a colormapped image, pixel values are 0. The colormap is ignored.
| LEPT_DLL l_int32 pmsCreate | ( | size_t | minsize, |
| size_t | smallest, | ||
| NUMA * | numalloc, | ||
| const char * | logfile | ||
| ) |
Input: minsize (of data chunk that can be supplied by pms)
smallest (bytes of the smallest pre-allocated data chunk.
numalloc (array with the number of data chunks for each
size that are in the memory store)
logfile (use for debugging; null otherwise)
Return: 0 if OK, 1 on error
Notes: (1) This computes the size of the block of memory required and allocates it. Each chunk starts on a 32-bit word boundary. The chunk sizes are in powers of 2, starting at @smallest, and the number of levels and chunks at each level is specified by @numalloc. (2) This is intended to manage the image data for a small number of relatively large pix. The system malloc is expected to handle very large numbers of small chunks efficiently. (3) Important: set the allocators and call this function before any pix have been allocated. Destroy all the pix in the normal way before calling pmsDestroy(). (4) The pms struct is stored in a static global, so this function is not thread-safe. When used, there must be only one thread per process.
| LEPT_DLL void* pmsCustomAlloc | ( | size_t | nbytes | ) |
Input: nbytes (min number of bytes in the chunk to be retrieved) Return: data (ptr to chunk)
Notes: (1) This attempts to find a suitable pre-allocated chunk. If not found, it dynamically allocates the chunk. (2) If logging is turned on, the allocations that are not taken from the memory store, and are at least as large as the minimum size the store can handle, are logged to file.
| LEPT_DLL void pmsCustomDealloc | ( | void * | data | ) |
Input: data (to be freed or returned to the storage) Return: void
| LEPT_DLL void pmsDestroy | ( | ) |
Input: (none) Return: void
Notes: (1) Important: call this function at the end of the program, after the last pix has been destroyed.
| LEPT_DLL void* pmsGetAlloc | ( | size_t | nbytes | ) |
Input: nbytes Return: data
Notes: (1) This is called when a request for pix data cannot be obtained from the preallocated memory store. After use it is freed like normal memory. (2) If logging is on, only write out allocs that are as large as the minimum size handled by the memory store. (3) size_t is lu on 64 bit platforms and u on 32 bit platforms. The C99 platform-independent format specifier for size_t is zu, but windows hasn't conformed, so we are forced to go back to C89, use lu, and cast to get platform-independence. Ugh.
Input: nbytes (min number of bytes in the chunk to be retrieved)
&level (<return>; -1 if either too small or too large)
Return: 0 if OK, 1 on error
Input: data (ptr to memory chunk)
&level (<return> level in memory store; -1 if allocated
outside the store)
Return: 0 if OK, 1 on error
| LEPT_DLL void pmsLogInfo | ( | ) |
Input: (none) Return: void
| LEPT_DLL l_int32 projectiveXformPt | ( | l_float32 * | vc, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 * | pxp, | ||
| l_float32 * | pyp | ||
| ) |
Input: vc (vector of 8 coefficients)
(x, y) (initial point)
(&xp, &yp) (<return> transformed point)
Return: 0 if OK; 1 on error
Notes: (1) This computes the floating point location of the transformed point. (2) It does not check ptrs for returned data!
| LEPT_DLL l_int32 projectiveXformSampledPt | ( | l_float32 * | vc, |
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 * | pxp, | ||
| l_int32 * | pyp | ||
| ) |
Input: vc (vector of 8 coefficients)
(x, y) (initial point)
(&xp, &yp) (<return> transformed point)
Return: 0 if OK; 1 on error
Notes: (1) This finds the nearest pixel coordinates of the transformed point. (2) It does not check ptrs for returned data!
Input: ptaa
ipta (to the i-th pta)
x,y (point coordinates)
Return: 0 if OK; 1 on error
Input: ptaa
pta (to be added)
copyflag (L_INSERT, L_COPY, L_CLONE)
Return: 0 if OK, 1 on error
Input: n (initial number of ptrs) Return: ptaa, or null on error
Input: pta
x, y
Return: 0 if OK, 1 on error
Input: &ptaa <to be nulled> Return: void
Input: ptas (for initial points)
mat (3x3 transform matrix; canonical form)
Return: ptad (transformed points), or null on error
| LEPT_DLL PTAA* ptaaGetBoundaryPixels | ( | PIX * | pixs, |
| l_int32 | type, | ||
| l_int32 | connectivity, | ||
| BOXA ** | pboxa, | ||
| PIXA ** | ppixa | ||
| ) |
Input: pixs (1 bpp)
type (L_BOUNDARY_FG, L_BOUNDARY_BG)
connectivity (4 or 8)
&boxa (<optional return> bounding boxes of the c.c.)
&pixa (<optional return> pixa of the c.c.)
Return: ptaa, or null on error
Notes: (1) This generates a ptaa of either fg or bg boundary pixels, where each pta has the boundary pixels for a connected component. (2) We can't simply find all the boundary pixels and then select those within the bounding box of each component, because bounding boxes can overlap. It is necessary to extract and dilate or erode each component separately. Note also that special handling is required for bg pixels when the component touches the pix boundary.
Input: ptaa Return: count, or 0 if no ptaa
| LEPT_DLL l_int32 ptaaGetPt | ( | PTAA * | ptaa, |
| l_int32 | ipta, | ||
| l_int32 | jpt, | ||
| l_float32 * | px, | ||
| l_float32 * | py | ||
| ) |
Input: ptaa
ipta (to the i-th pta)
jpt (index to the j-th pt in the pta)
&x (<optional return> float x value)
&y (<optional return> float y value)
Return: 0 if OK; 1 on error
Input: ptaa
index (to the i-th pta)
accessflag (L_COPY or L_CLONE)
Return: pta, or null on error
Input: pixs (32 bpp, of indices of c.c.)
&ncc (<optional return> number of connected components)
Return: ptaa, or null on error
Notes: (1) The pixel values in @pixs are the index of the connected component to which the pixel belongs; @pixs is typically generated from a 1 bpp pix by pixConnCompTransform(). Background pixels in the generating 1 bpp pix are represented in @pixs by 0. We do not check that the pixel values are correctly labelled. (2) Each pta in the returned ptaa gives the pixel locations correspnding to a connected component, with the label of each given by the index of the pta into the ptaa. (3) Initialize with the first pta in ptaa being empty and representing the background value (index 0) in the pix.
Input: ptaa (can have non-null ptrs in the ptr array)
pta (to be replicated into the entire ptr array)
Return: 0 if OK; 1 on error
Input: ptaad (dest ptaa; add to this one)
ptaas (source ptaa; add from this one)
istart (starting index in ptaas)
iend (ending index in ptaas; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) istart < 0 is taken to mean 'read from the start' (istart = 0) (2) iend < 0 means 'read to the end' (3) if ptas == NULL, this is a no-op
Input: filename Return: ptaa, or null on error
Input: stream Return: ptaa, or null on error
Input: ptaa
index (to the index-th pta)
pta (insert and replace any existing one)
Return: 0 if OK, 1 on error
Notes: (1) Any existing pta is destroyed, and the input one is inserted in its place. (2) If the index is invalid, return 1 (error)
Input: ptaas
naindex (na that maps from the new ptaa to the input ptaa)
Return: ptaad (sorted), or null on error
Input: ptaa Return: 0 if OK, 1 on error
Notes: (1) This identifies the largest index containing a pta that has any points within it, destroys all pta above that index, and resets the count.
Input: filename
ptaa
type (0 for float values; 1 for integer values)
Return: 0 if OK, 1 on error
Input: stream
ptaa
type (0 for float values; 1 for integer values)
Return: 0 if OK; 1 on error
Input: pta Return: ptr to same pta, or null on error
Input: pta
x, y (point)
Return: 1 if contained, 0 otherwise or on error
Input: pta Return: box (minimum containing all points in the pta), or null on error
Notes: (1) For 2 corners, the order of the 2 points is UL, LR. For 4 corners, the order of points is UL, UR, LL, LR.
Input: pta
ncorners (2 or 4 for the representation of each box)
Return: boxa (with one box for each 2 or 4 points in the pta),
or null on error
Notes: (1) For 2 corners, the order of the 2 points is UL, LR. For 4 corners, the order of points is UL, UR, LL, LR. (2) Each derived box is the minimum size containing all corners.
Input: ptas
istart (starting index in ptas)
iend (ending index in ptas; use 0 to copy to end)
Return: 0 if OK, 1 on error
Input: n (initial array sizes) Return: pta, or null on error.
Input: nax (<optional> can be null)
nay
Return: pta, or null on error.
Input: ptas
xs, ys (start point; must be in ptas)
Return: ptad (cyclic permutation, starting and ending at (xs, ys),
or null on error
Notes: (1) Check to insure that (a) ptas is a closed path where the first and last points are identical, and (b) the resulting pta also starts and ends on the same point (which in this case is (xs, ys).
Input: &pta (<to be nulled>) Return: void
Note:
| LEPT_DLL l_int32 ptaFindPtByHash | ( | PTA * | pta, |
| L_DNAHASH * | dahash, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_int32 * | pindex | ||
| ) |
Input: pta
dahash (built from pta)
x, y (arbitrary points)
&index (<return> index into pta if (x,y) is in pta;
-1 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) Fast lookup in dnaHash associated with a pta, to see if a random point (x,y) is already stored in the hash table.
Input: pta
&nax (<optional return> numa of x array)
&nay (<optional return> numa of y array)
Return: 0 if OK; 1 on error or if pta is empty
Notes: (1) This copies the internal arrays into new Numas.
Input: pixs (1 bpp)
type (L_BOUNDARY_FG, L_BOUNDARY_BG)
Return: pta, or null on error
Notes: (1) This generates a pta of either fg or bg boundary pixels. (2) See also pixGeneratePtaBoundary() for rendering of fg boundary pixels.
Input: pta Return: box, or null on error
Notes: (1) This is used when the pta represents a set of points in a two-dimensional image. It returns the box of minimum size containing the pts in the pta.
Input: pta Return: count, or 0 if no pta
| LEPT_DLL l_int32 ptaGetCubicLSF | ( | PTA * | pta, |
| l_float32 * | pa, | ||
| l_float32 * | pb, | ||
| l_float32 * | pc, | ||
| l_float32 * | pd, | ||
| NUMA ** | pnafit | ||
| ) |
Input: pta
&a (<optional return> coeff a of LSF: y = ax^3 + bx^2 + cx + d)
&b (<optional return> coeff b of LSF)
&c (<optional return> coeff c of LSF)
&d (<optional return> coeff d of LSF)
&nafit (<optional return> numa of least square fit)
Return: 0 if OK, 1 on error
Notes: (1) This does a cubic least square fit to the set of points in @pta. That is, it finds coefficients a, b, c and d that minimize:
sum (yi - a*xi*xi*xi -b*xi*xi -c*xi - d)^2 i
Differentiate this expression w/rt a, b, c and d, and solve the resulting four equations for these coefficients in terms of various sums over the input data (xi, yi). The four equations are in the form: f[0][0]a + f[0][1]b + f[0][2]c + f[0][3] = g[0] f[1][0]a + f[1][1]b + f[1][2]c + f[1][3] = g[1] f[2][0]a + f[2][1]b + f[2][2]c + f[2][3] = g[2] f[3][0]a + f[3][1]b + f[3][2]c + f[3][3] = g[3] (2) If @nafit is defined, this returns an array of fitted values, corresponding to the two implicit Numa arrays (nax and nay) in pta. Thus, just as you can plot the data in pta as nay vs. nax, you can plot the linear least square fit as nafit vs. nax.
Input: ptas (input pts)
box
Return: ptad (of pts in ptas that are inside the box), or null on error
Input: pta
index (into arrays)
&x (<optional return> integer x value)
&y (<optional return> integer y value)
Return: 0 if OK; 1 on error
Input: pta
&a (<optional return> slope a of least square fit: y = ax + b)
&b (<optional return> intercept b of least square fit)
&nafit (<optional return> numa of least square fit)
Return: 0 if OK, 1 on error
Notes: (1) Either or both &a and &b must be input. They determine the type of line that is fit. (2) If both &a and &b are defined, this returns a and b that minimize:
sum (yi - axi -b)^2
i
The method is simple: differentiate this expression w/rt a and b,
and solve the resulting two equations for a and b in terms of
various sums over the input data (xi, yi).
(3) We also allow two special cases, where either a = 0 or b = 0: (a) If &a is given and &b = null, find the linear LSF that goes through the origin (b = 0). (b) If &b is given and &a = null, find the linear LSF with zero slope (a = 0). (4) If @nafit is defined, this returns an array of fitted values, corresponding to the two implicit Numa arrays (nax and nay) in pta. Thus, just as you can plot the data in pta as nay vs. nax, you can plot the linear least square fit as nafit vs. nax.
Input: pixs (any depth)
x, y (pixel from which we search for nearest neighbors
conn (4 or 8 connectivity)
Return: pta, or null on error
Notes: (1) Generates a pta of all valid neighbor pixel locations, or null on error.
Input: pixs (1 bpp)
box (<optional> can be null)
Return: pta, or null on error
Notes: (1) Generates a pta of fg pixels in the pix, within the box. If box == NULL, it uses the entire pix.
Input: pta
index (into arrays)
&x (<optional return> float x value)
&y (<optional return> float y value)
Return: 0 if OK; 1 on error
| LEPT_DLL l_int32 ptaGetQuadraticLSF | ( | PTA * | pta, |
| l_float32 * | pa, | ||
| l_float32 * | pb, | ||
| l_float32 * | pc, | ||
| NUMA ** | pnafit | ||
| ) |
Input: pta
&a (<optional return> coeff a of LSF: y = ax^2 + bx + c)
&b (<optional return> coeff b of LSF: y = ax^2 + bx + c)
&c (<optional return> coeff c of LSF: y = ax^2 + bx + c)
&nafit (<optional return> numa of least square fit)
Return: 0 if OK, 1 on error
Notes: (1) This does a quadratic least square fit to the set of points in @pta. That is, it finds coefficients a, b and c that minimize:
sum (yi - a*xi*xi -b*xi -c)^2 i
The method is simple: differentiate this expression w/rt a, b and c, and solve the resulting three equations for these coefficients in terms of various sums over the input data (xi, yi). The three equations are in the form: f[0][0]a + f[0][1]b + f[0][2]c = g[0] f[1][0]a + f[1][1]b + f[1][2]c = g[1] f[2][0]a + f[2][1]b + f[2][2]c = g[2] (2) If @nafit is defined, this returns an array of fitted values, corresponding to the two implicit Numa arrays (nax and nay) in pta. Thus, just as you can plot the data in pta as nay vs. nax, you can plot the linear least square fit as nafit vs. nax.
| LEPT_DLL l_int32 ptaGetQuarticLSF | ( | PTA * | pta, |
| l_float32 * | pa, | ||
| l_float32 * | pb, | ||
| l_float32 * | pc, | ||
| l_float32 * | pd, | ||
| l_float32 * | pe, | ||
| NUMA ** | pnafit | ||
| ) |
Input: pta
&a (<optional return> coeff a of LSF:
y = ax^4 + bx^3 + cx^2 + dx + e)
&b (<optional return> coeff b of LSF)
&c (<optional return> coeff c of LSF)
&d (<optional return> coeff d of LSF)
&e (<optional return> coeff e of LSF)
&nafit (<optional return> numa of least square fit)
Return: 0 if OK, 1 on error
Notes: (1) This does a quartic least square fit to the set of points in @pta. That is, it finds coefficients a, b, c, d and 3 that minimize:
sum (yi - a*xi*xi*xi*xi -b*xi*xi*xi -c*xi*xi - d*xi - e)^2 i
Differentiate this expression w/rt a, b, c, d and e, and solve the resulting five equations for these coefficients in terms of various sums over the input data (xi, yi). The five equations are in the form: f[0][0]a + f[0][1]b + f[0][2]c + f[0][3] + f[0][4] = g[0] f[1][0]a + f[1][1]b + f[1][2]c + f[1][3] + f[1][4] = g[1] f[2][0]a + f[2][1]b + f[2][2]c + f[2][3] + f[2][4] = g[2] f[3][0]a + f[3][1]b + f[3][2]c + f[3][3] + f[3][4] = g[3] f[4][0]a + f[4][1]b + f[4][2]c + f[4][3] + f[4][4] = g[4] (2) If @nafit is defined, this returns an array of fitted values, corresponding to the two implicit Numa arrays (nax and nay) in pta. Thus, just as you can plot the data in pta as nay vs. nax, you can plot the linear least square fit as nafit vs. nax.
| LEPT_DLL l_int32 ptaGetRange | ( | PTA * | pta, |
| l_float32 * | pminx, | ||
| l_float32 * | pmaxx, | ||
| l_float32 * | pminy, | ||
| l_float32 * | pmaxy | ||
| ) |
Input: pta
&minx (<optional return> min value of x)
&maxx (<optional return> max value of x)
&miny (<optional return> min value of y)
&maxy (<optional return> max value of y)
Return: 0 if OK, 1 on error
Notes: (1) We can use pts to represent pairs of floating values, that are not necessarily tied to a two-dimension region. For example, the pts can represent a general function y(x).
| LEPT_DLL l_int32 ptaGetSortIndex | ( | PTA * | ptas, |
| l_int32 | sorttype, | ||
| l_int32 | sortorder, | ||
| NUMA ** | pnaindex | ||
| ) |
Input: ptas
sorttype (L_SORT_BY_X, L_SORT_BY_Y)
sortorder (L_SORT_INCREASING, L_SORT_DECREASING)
&naindex (<return> index of sorted order into
original array)
Return: 0 if OK, 1 on error
Input: pta
index (at which pt is to be inserted)
x, y (point values)
Return: 0 if OK; 1 on error
Input: pta1, pta2 Return: ptad (intersection of the point sets), or null on error
Notes: (1) See sarrayIntersectionByAset() for the approach. (2) The key is a 64-bit hash from the (x,y) pair. (3) This is slower than ptaIntersectionByHash(), mostly because of the nlogn sort to build up the rbtree. Do not use for large numbers of points (say, > 1M).
Input: pta1, pta2 Return: ptad (intersection of the point sets), or null on error
Notes: (1) This is faster than ptaIntersectionByAset(), because the bucket lookup is O(n). It should be used if the pts are integers (e.g., representing pixel positions).
Input: ptad (dest pta; add to this one)
ptas (source pta; add from this one)
istart (starting index in ptas)
iend (ending index in ptas; use -1 to cat all)
Return: 0 if OK, 1 on error
Notes: (1) istart < 0 is taken to mean 'read from the start' (istart = 0) (2) iend < 0 means 'read to the end' (3) if ptas == NULL, this is a no-op
| LEPT_DLL l_int32 ptaNoisyLinearLSF | ( | PTA * | pta, |
| l_float32 | factor, | ||
| PTA ** | pptad, | ||
| l_float32 * | pa, | ||
| l_float32 * | pb, | ||
| l_float32 * | pmederr, | ||
| NUMA ** | pnafit | ||
| ) |
Input: pta
factor (reject outliers with error greater than this
number of medians; typically ~ 3)
&ptad (<optional return> with outliers removed)
&a (<optional return> slope a of least square fit: y = ax + b)
&b (<optional return> intercept b of least square fit)
&mederr (<optional return> median error)
&nafit (<optional return> numa of least square fit to ptad)
Return: 0 if OK, 1 on error
Notes: (1) This does a linear least square fit to the set of points in @pta. It then evaluates the errors and removes points whose error is >= factor * median_error. It then re-runs the linear LSF on the resulting points. (2) Either or both &a and &b must be input. They determine the type of line that is fit. (3) The median error can give an indication of how good the fit is likely to be.
| LEPT_DLL l_int32 ptaNoisyQuadraticLSF | ( | PTA * | pta, |
| l_float32 | factor, | ||
| PTA ** | pptad, | ||
| l_float32 * | pa, | ||
| l_float32 * | pb, | ||
| l_float32 * | pc, | ||
| l_float32 * | pmederr, | ||
| NUMA ** | pnafit | ||
| ) |
Input: pta
factor (reject outliers with error greater than this
number of medians; typically ~ 3)
&ptad (<optional return> with outliers removed)
&a (<optional return> coeff a of LSF: y = ax^2 + bx + c)
&b (<optional return> coeff b of LSF: y = ax^2 + bx + c)
&c (<optional return> coeff c of LSF: y = ax^2 + bx + c)
&mederr (<optional return> median error)
&nafit (<optional return> numa of least square fit to ptad)
Return: 0 if OK, 1 on error
Notes: (1) This does a quadratic least square fit to the set of points in @pta. It then evaluates the errors and removes points whose error is >= factor * median_error. It then re-runs a quadratic LSF on the resulting points.
Input: pta (vertices of a polygon)
x, y (point to be tested)
&inside (<return> 1 if inside; 0 if outside or on boundary)
Return: 1 if OK, 0 on error
The abs value of the sum of the angles subtended from a point by the sides of a polygon, when taken in order traversing the polygon, is 0 if the point is outside the polygon and 2*pi if inside. The sign will be positive if traversed cw and negative if ccw.
Input: filename Return: pta, or null on error
Input: stream Return: pta, or null on error
Input: ptas (assumed to be integer values) Return: ptad (with duplicates removed), or null on error
Notes: (1) This is slower than ptaRemoveDupsByHash(), mostly because of the nlogn sort to build up the rbtree. Do not use for large numbers of points (say, > 1M).
Input: ptas (assumed to be integer values)
&ptad (<return> unique set of pts; duplicates removed)
&dahash (<optional return> dnahash used for lookup)
Return: 0 if OK, 1 on error
Notes: (1) Generates a pta with unique values. (2) The dnahash is built up with ptad to assure uniqueness. It can be used to find if a point is in the set: ptaFindPtByHash(ptad, dahash, x, y, &index) (3) The hash of the (x,y) location is simple and fast. It scales up with the number of buckets to insure a fairly random bucket selection for adjacent points. (4) A Dna is used rather than a Numa because we need accurate representation of 32-bit integers that are indices into ptas. Integer --> float --> integer conversion makes errors for integers larger than 10M. (5) This is faster than ptaRemoveDupsByAset(), because the bucket lookup is O(n), although there is a double-loop lookup within the dna in each bucket.
Input: pta
index (of point to be removed)
Return: 0 if OK, 1 on error
Notes: (1) This shifts pta[i] --> pta[i - 1] for all i > index. (2) It should not be used repeatedly on large arrays, because the function is O(n).
| LEPT_DLL PTA* ptaReplicatePattern | ( | PTA * | ptas, |
| PIX * | pixp, | ||
| PTA * | ptap, | ||
| l_int32 | cx, | ||
| l_int32 | cy, | ||
| l_int32 | w, | ||
| l_int32 | h | ||
| ) |
Input: ptas ("sparse" input pta)
pixp (<optional> 1 bpp pattern, to be replicated in output pta)
ptap (<optional> set of pts, to be replicated in output pta)
cx, cy (reference point in pattern)
w, h (clipping sizes for output pta)
Return: ptad (with all points of replicated pattern), or null on error
Notes: (1) You can use either the image @pixp or the set of pts @ptap. (2) The pattern is placed with its reference point at each point in ptas, and all the fg pixels are colleced into ptad. For @pixp, this is equivalent to blitting pixp at each point in ptas, and then converting the resulting pix to a pta.
Input: ptas
type (0 for float values; 1 for integer values)
Return: ptad (reversed pta), or null on error
Input: ptas (for initial points)
(xc, yc) (location of center of rotation)
angle (rotation in radians; clockwise is positive)
Return: 0 if OK; 1 on error
Notes; (1) See createMatrix2dScale() for details of transform. (2) This transform can be thought of as composed of the sum of two parts: (a) an (x,y)-dependent rotation about the origin: xr = x * cosa - y * sina yr = x * sina + y * cosa (b) an (x,y)-independent translation that depends on the rotation center and the angle: xt = xc - xc * cosa + yc * sina yt = yc - xc * sina - yc * cosa The translation part (xt,yt) is equal to the difference between the center (xc,yc) and the location of the center after it is rotated about the origin.
Input: ptas (for initial points)
scalex (horizontal scale factor)
scaley (vertical scale factor)
Return: 0 if OK; 1 on error
Notes; (1) See createMatrix2dScale() for details of transform.
Input: pta
index (into arrays)
x, y
Return: 0 if OK; 1 on error
Input: ptas
sorttype (L_SORT_BY_X, L_SORT_BY_Y)
sortorder (L_SORT_INCREASING, L_SORT_DECREASING)
&naindex (<optional return> index of sorted order into
original array)
Return: ptad (sorted version of ptas), or null on error
Input: ptas
naindex (na that maps from the new pta to the input pta)
Return: ptad (sorted), or null on error
Input: ptas
subfactor (subsample factor, >= 1)
Return: ptad (evenly sampled pt values from ptas, or null on error
Input: pta1, pta2
Return: bval which is 1 if they have any elements in common;
0 otherwise or on error.
| LEPT_DLL PTA* ptaTransform | ( | PTA * | ptas, |
| l_int32 | shiftx, | ||
| l_int32 | shifty, | ||
| l_float32 | scalex, | ||
| l_float32 | scaley | ||
| ) |
Input: pta
shiftx, shifty
scalex, scaley
Return: pta, or null on error
Notes: (1) Shift first, then scale.
Input: ptas (for initial points)
transx (x component of translation wrt. the origin)
transy (y component of translation wrt. the origin)
Return: ptad (translated points), or null on error
Notes; (1) See createMatrix2dTranslate() for details of transform.
Input: ptas Return: ptad (with x and y values swapped), or null on error
Input: pta1, pta2 Return: ptad (with the union of the set of points), or null on error
Notes: (1) See sarrayRemoveDupsByAset() for the approach. (2) The key is a 64-bit hash from the (x,y) pair. (3) This is slower than ptaUnionByHash(), mostly because of the nlogn sort to build up the rbtree. Do not use for large numbers of points (say, > 1M). (4) The *Aset() functions use the sorted l_Aset, which is just an rbtree in disguise.
Input: pta1, pta2 Return: ptad (with the union of the set of points), or null on error
Notes: (1) This is faster than ptaUnionByAset(), because the bucket lookup is O(n). It should be used if the pts are integers (e.g., representing pixel positions).
Input: filename
pta
type (0 for float values; 1 for integer values)
Return: 0 if OK, 1 on error
Input: stream
pta
type (0 for float values; 1 for integer values)
Return: 0 if OK; 1 on error
Input: size of ptr array to be alloc'd Return: paa, or null on error
Notes: (1) The ptraa is generated with a fixed size, that can not change. The ptra can be generated and inserted randomly into this array.
Input: ptra
item (generic ptr to a struct)
Return: 0 if OK, 1 on error
Notes: (1) This adds the element to the next location beyond imax, which is the largest occupied ptr in the array. This is what you expect from a stack, where all ptrs up to and including imax are occupied, but here the occuption of items in the array is entirely arbitrary.
Input: &paa (<to be nulled>)
freeflag (TRUE to free each remaining item in each ptra)
warnflag (TRUE to warn if any remaining items are not destroyed)
Return: void
Notes: (1) See ptraDestroy() for use of @freeflag and @warnflag. (2) To destroy the ptraa, we destroy each ptra, then the ptr array, then the ptraa, and then null the contents of the input ptr.
Input: ptraa Return: ptra, or null on error
Notes: (1) This 'flattens' the ptraa to a ptra, taking the items in each ptra, in order, starting with the first ptra, etc. (2) As a side-effect, the ptra are all removed from the ptraa and destroyed, leaving an empty ptraa.
Input: ptraa
index (location in array)
accessflag (L_HANDLE_ONLY, L_REMOVE)
Return: ptra (at index location), or NULL on error or if there
is no ptra there.
Notes: (1) This returns the ptra ptr. If @accessflag == L_HANDLE_ONLY, the ptra is left on the ptraa. If @accessflag == L_REMOVE, the ptr in the ptraa is set to NULL, and the caller is responsible for disposing of the ptra (either putting it back on the ptraa, or destroying it). (2) This returns NULL if there is no Ptra at the index location.
Input: ptraa
&size (<return> size of ptr array)
Return: 0 if OK; 1 on error
Input: ptraa
index (location in array for insertion)
ptra (to be inserted)
Return: 0 if OK; 1 on error
Notes: (1) Caller should check return value. On success, the Ptra is inserted in the Ptraa and is owned by it. However, on error, the Ptra remains owned by the caller.
Input: ptra Return: 0 if OK, 1 on error
Notes: (1) This compacts the items on the array, filling any empty ptrs. (2) This does not change the size of the array of ptrs.
Input: ptra (array of pdf strings, each for a single-page pdf file)
fileout (concatenated pdf file)
Return: 0 if OK, 1 on error
Notes: (1) This only works with leptonica-formatted single-page pdf files.
| LEPT_DLL l_int32 ptraConcatenatePdfToData | ( | L_PTRA * | pa_data, |
| SARRAY * | sa, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
Input: ptra (array of pdf strings, each for a single-page pdf file)
sarray (<optional> of pathnames for input pdf files)
&data (<return> concatenated pdf data in memory)
&nbytes (<return> number of bytes in pdf data)
Return: 0 if OK, 1 on error
Notes: (1) This only works with leptonica-formatted single-page pdf files. pdf files generated by other programs will have unpredictable (and usually bad) results. The requirements for each pdf file: (a) The Catalog and Info objects are the first two. (b) Object 3 is Pages (c) Object 4 is Page (d) The remaining objects are Contents, XObjects, and ColorSpace (2) We remove trailers from each page, and append the full trailer for all pages at the end. (3) For all but the first file, remove the ID and the first 3 objects (catalog, info, pages), so that each subsequent file has only objects of these classes: Page, Contents, XObject, ColorSpace (Indexed RGB). For those objects, we substitute these refs to objects in the local file: Page: Parent(object 3), Contents, XObject(typically multiple) XObject: [ColorSpace if indexed] The Pages object on the first page (object 3) has a Kids array of references to all the Page objects, with a Count equal to the number of pages. Each Page object refers back to this parent.
Input: size of ptr array to be alloc'd (0 for default) Return: pa, or null on error
Input: &ptra (<to be nulled>)
freeflag (TRUE to free each remaining item in the array)
warnflag (TRUE to warn if any remaining items are not destroyed)
Return: void
Notes: (1) If @freeflag == TRUE, frees each item in the array. (2) If @freeflag == FALSE and warnflag == TRUE, and there are items on the array, this gives a warning and destroys the array. If these items are not owned elsewhere, this will cause a memory leak of all the items that were on the array. So if the items are not owned elsewhere and require their own destroy function, they must be destroyed before the ptra. (3) If warnflag == FALSE, no warnings will be issued. This is useful if the items are owned elsewhere, such as a PixMemoryStore(). (4) To destroy the ptra, we destroy the ptr array, then the ptra, and then null the contents of the input ptr.
Input: ptra
&count (<return> actual number of items on the ptr array)
Return: 0 if OK; 1 on error
Notes: (1) The actual number of items on the ptr array, pa->nactual, will be smaller than pa->n if the array is not compacted.
Input: ptra
&maxindex (<return> index of last item in the array);
Return: 0 if OK; 1 on error
Notes: (1) The largest index to an item in the array is @maxindex. @maxindex is one less than the number of items that would be in the array if there were no null pointers between 0 and @maxindex - 1. However, because the internal ptr array need not be compacted, there may be null pointers at indices below @maxindex; for example, if items have been removed. (2) When an item is added to the end of the array, it goes into pa->array[maxindex + 1], and maxindex is then incremented by 1. (3) If there are no items in the array, this returns @maxindex = -1.
Input: ptra
index (of element to be retrieved)
Return: a ptr to the element, or null on error
Notes: (1) This returns a ptr to the item. You must cast it to the type of item. Do not destroy it; the item belongs to the Ptra. (2) This can access all possible items on the ptr array. If an item doesn't exist, it returns null.
Input: ptra
index (location in ptra to insert new value)
item (generic ptr to a struct; can be null)
shiftflag (L_AUTO_DOWNSHIFT, L_MIN_DOWNSHIFT, L_FULL_DOWNSHIFT)
Return: 0 if OK, 1 on error
Notes: (1) This checks first to see if the location is valid, and then if there is presently an item there. If there is not, it is simply inserted into that location. (2) If there is an item at the insert location, items must be moved down to make room for the insert. In the downward shift there are three options, given by @shiftflag.
Input: ptra1 (add to this one)
ptra2 (appended to ptra1, and emptied of items; can be null)
Return: 0 if OK, 1 on error
Input: ptra
index (element to be removed)
flag (L_NO_COMPACTION, L_COMPACTION)
Return: item, or null on error
Notes: (1) If flag == L_NO_COMPACTION, this removes the item and nulls the ptr on the array. If it takes the last item in the array, pa->n is reduced to the next item. (2) If flag == L_COMPACTION, this compacts the array for for all i >= index. It should not be used repeatedly on large arrays, because compaction is O(n). (3) The ability to remove without automatic compaction allows removal with cost O(1).
Input: ptra Return: item, or null on error or if the array is empty
Input: ptra
index (element to be replaced)
item (new generic ptr to a struct; can be null)
freeflag (TRUE to free old item; FALSE to return it)
Return: item (old item, if it exists and is not freed),
or null on error
Input: ptra Return: 0 if OK, 1 on error
Input: ptra
index1
index2
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 quadtreeGetChildren | ( | FPIXA * | fpixa, |
| l_int32 | level, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 * | pval00, | ||
| l_float32 * | pval10, | ||
| l_float32 * | pval01, | ||
| l_float32 * | pval11 | ||
| ) |
Input: fpixa (mean, variance or root variance)
level, x, y (of current pixel)
&val00, val01, val10, val11 (<return> child pixel values)
Return: 0 if OK, 1 on error
Notes: (1) Check return value for error. On error, all return vals are 0.0. (2) The returned child pixels are located at: level + 1 (2x, 2y), (2x+1, 2y), (2x, 2y+1), (2x+1, 2y+1)
| LEPT_DLL l_int32 quadtreeGetParent | ( | FPIXA * | fpixa, |
| l_int32 | level, | ||
| l_int32 | x, | ||
| l_int32 | y, | ||
| l_float32 * | pval | ||
| ) |
Input: fpixa (mean, variance or root variance)
level, x, y (of current pixel)
&val (<return> parent pixel value), or 0.0 on error.
Return: 0 if OK, 1 on error
Notes: (1) Check return value for error. On error, val is returned as 0.0. (2) The parent is located at: level - 1 (x/2, y/2)
Input: w, h (of image) Return: maxlevels (maximum number of levels allowed), or -1 on error
Notes: (1) The criterion for maxlevels is that the subdivision not go down below the single pixel level. The 1.5 factor is intended to keep any rectangle from accidentally having zero dimension due to integer truncation.
| LEPT_DLL void rasteropHipLow | ( | l_uint32 * | data, |
| l_int32 | pixh, | ||
| l_int32 | depth, | ||
| l_int32 | wpl, | ||
| l_int32 | y, | ||
| l_int32 | h, | ||
| l_int32 | shift | ||
| ) |
Input: data (ptr to image data)
pixh (height)
depth (depth)
wpl (wpl)
y (y val of UL corner of rectangle)
h (height of rectangle)
shift (+ shifts data to the left in a horizontal column)
Return: 0 if OK; 1 on error.
Notes: (1) This clears the pixels that are left exposed after the rasterop. Therefore, for Pix with depth > 1, these pixels become black, and must be subsequently SET if they are to be white. For example, see pixRasteropHip(). (2) This function performs clipping and calls shiftDataHorizontalLine() to do the in-place rasterop on each line.
| LEPT_DLL void rasteropLow | ( | l_uint32 * | datad, |
| l_int32 | dpixw, | ||
| l_int32 | dpixh, | ||
| l_int32 | depth, | ||
| l_int32 | dwpl, | ||
| l_int32 | dx, | ||
| l_int32 | dy, | ||
| l_int32 | dw, | ||
| l_int32 | dh, | ||
| l_int32 | op, | ||
| l_uint32 * | datas, | ||
| l_int32 | spixw, | ||
| l_int32 | spixh, | ||
| l_int32 | swpl, | ||
| l_int32 | sx, | ||
| l_int32 | sy | ||
| ) |
Input: datad (ptr to dest image data)
dpixw (width of dest)
dpixh (height of dest)
depth (depth of src and dest)
dwpl (wpl of dest)
dx (x val of UL corner of dest rectangle)
dy (y val of UL corner of dest rectangle)
dw (width of dest rectangle)
dh (height of dest rectangle)
op (op code)
datas (ptr to src image data)
spixw (width of src)
spixh (height of src)
swpl (wpl of src)
sx (x val of UL corner of src rectangle)
sy (y val of UL corner of src rectangle)
Return: void
Action: Scales width, performs clipping, checks alignment, and dispatches for the rasterop.
Warning: the two images must have equal depth. This is not checked.
| LEPT_DLL void rasteropUniLow | ( | l_uint32 * | datad, |
| l_int32 | dpixw, | ||
| l_int32 | dpixh, | ||
| l_int32 | depth, | ||
| l_int32 | dwpl, | ||
| l_int32 | dx, | ||
| l_int32 | dy, | ||
| l_int32 | dw, | ||
| l_int32 | dh, | ||
| l_int32 | op | ||
| ) |
Input: datad (ptr to dest image data)
dpixw (width of dest)
dpixh (height of dest)
depth (depth of src and dest)
dwpl (wpl of dest)
dx (x val of UL corner of dest rectangle)
dy (y val of UL corner of dest rectangle)
dw (width of dest rectangle)
dh (height of dest rectangle)
op (op code)
Return: void
Action: scales width, performs clipping, checks alignment, and dispatches for the rasterop.
| LEPT_DLL void rasteropVipLow | ( | l_uint32 * | data, |
| l_int32 | pixw, | ||
| l_int32 | pixh, | ||
| l_int32 | depth, | ||
| l_int32 | wpl, | ||
| l_int32 | x, | ||
| l_int32 | w, | ||
| l_int32 | shift | ||
| ) |
Input: data (ptr to image data)
pixw (width)
pixh (height)
depth (depth)
wpl (wpl)
x (x val of UL corner of rectangle)
w (width of rectangle)
shift (+ shifts data downward in vertical column)
Return: 0 if OK; 1 on error.
Notes: (1) This clears the pixels that are left exposed after the translation. You can consider them as pixels that are shifted in from outside the image. This can be later overridden by the incolor parameter in higher-level functions that call this. For example, for images with depth > 1, these pixels are cleared to black; to be white they must later be SET to white. See, e.g., pixRasteropVip(). (2) This function scales the width to accommodate any depth, performs clipping, and then does the in-place rasterop.
Input: &rcha Return: void
| LEPT_DLL l_int32 rchaExtract | ( | L_RCHA * | rcha, |
| NUMA ** | pnaindex, | ||
| NUMA ** | pnascore, | ||
| SARRAY ** | psatext, | ||
| NUMA ** | pnasample, | ||
| NUMA ** | pnaxloc, | ||
| NUMA ** | pnayloc, | ||
| NUMA ** | pnawidth | ||
| ) |
Input: rcha
&naindex (<optional return> indices of best templates)
&nascore (<optional return> correl scores of best templates)
&satext (<optional return> character strings of best templates)
&nasample (<optional return> indices of best samples)
&naxloc (<optional return> x-locations of templates)
&nayloc (<optional return> y-locations of templates)
&nawidth (<optional return> widths of best templates)
Return: 0 if OK, 1 on error
Notes: (1) This returns clones of the number and string arrays. They must be destroyed by the caller.
Input: &rch Return: void
| LEPT_DLL l_int32 rchExtract | ( | L_RCH * | rch, |
| l_int32 * | pindex, | ||
| l_float32 * | pscore, | ||
| char ** | ptext, | ||
| l_int32 * | psample, | ||
| l_int32 * | pxloc, | ||
| l_int32 * | pyloc, | ||
| l_int32 * | pwidth | ||
| ) |
Input: rch
&index (<optional return> index of best template)
&score (<optional return> correlation score of best template)
&text (<optional return> character string of best template)
&sample (<optional return> index of best sample)
&xloc (<optional return> x-location of template)
&yloc (<optional return> y-location of template)
&width (<optional return> width of best template)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 readHeaderJp2k | ( | const char * | filename, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp | ||
| ) |
Input: filename
&w (<optional return>)
&h (<optional return>)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 readHeaderJpeg | ( | const char * | filename, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pspp, | ||
| l_int32 * | pycck, | ||
| l_int32 * | pcmyk | ||
| ) |
Input: filename
&w (<optional return>)
&h (<optional return>)
&spp (<optional return>, samples/pixel)
&ycck (<optional return>, 1 if ycck color space; 0 otherwise)
&cmyk (<optional return>, 1 if cmyk color space; 0 otherwise)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 readHeaderMemJp2k | ( | const l_uint8 * | data, |
| size_t | size, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp | ||
| ) |
Input: data
size (at least 80)
&w (<optional return>)
&h (<optional return>)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
Return: 0 if OK, 1 on error
Notes: (1) The ISO/IEC reference for jpeg2000 is http://www.jpeg.org/public/15444-1annexi.pdf and the file format syntax begins at page 127. (2) The Image Header Box begins with 'ihdr' = 0x69686472 in big-endian order. This typically, but not always, starts byte 44, with the big-endian data fields beginning at byte 48: h: 4 bytes w: 4 bytes spp: 2 bytes bps: 1 byte (contains bps - 1)
| LEPT_DLL l_int32 readHeaderMemJpeg | ( | const l_uint8 * | data, |
| size_t | size, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pspp, | ||
| l_int32 * | pycck, | ||
| l_int32 * | pcmyk | ||
| ) |
Input: data (const; jpeg-encoded)
size (of data)
&w (<optional return>)
&h (<optional return>)
&spp (<optional return>, samples/pixel)
&ycck (<optional return>, 1 if ycck color space; 0 otherwise)
&cmyk (<optional return>, 1 if cmyk color space; 0 otherwise)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 readHeaderMemPng | ( | const l_uint8 * | data, |
| size_t | size, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | piscmap | ||
| ) |
Input: data
size (40 bytes is sufficient)
&w (<optional return>)
&h (<optional return>)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
&iscmap (<optional return>; input NULL to ignore)
Return: 0 if OK, 1 on error
Notes: (1) See readHeaderPng(). (2) png colortypes (see png.h: PNG_COLOR_TYPE_*): 0: gray; fully transparent (with tRNS) (1 spp) 2: RGB (3 spp) 3: colormap; colormap+alpha (with tRNS) (1 spp) 4: gray + alpha (2 spp) 6: RGBA (4 spp) Note: 0 and 3 have the alpha information in a tRNS chunk 4 and 6 have separate alpha samples with each pixel.
| LEPT_DLL l_int32 readHeaderMemPnm | ( | const l_uint8 * | cdata, |
| size_t | size, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pd, | ||
| l_int32 * | ptype, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp | ||
| ) |
Input: cdata (const; pnm-encoded)
size (of data)
&w (<optional return>)
&h (<optional return>)
&d (<optional return>)
&type (<optional return> pnm type)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 readHeaderMemTiff | ( | const l_uint8 * | cdata, |
| size_t | size, | ||
| l_int32 | n, | ||
| l_int32 * | pwidth, | ||
| l_int32 * | pheight, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | pres, | ||
| l_int32 * | pcmap, | ||
| l_int32 * | pformat | ||
| ) |
Input: cdata (const; tiff-encoded)
size (size of data)
n (page image number: 0-based)
&width (<return>)
&height (<return>)
&bps (<return> bits per sample -- 1, 2, 4 or 8)
&spp (<return>; samples per pixel -- 1 or 3)
&res (<optional return>; resolution in x dir; NULL to ignore)
&cmap (<optional return>; colormap exists; input NULL to ignore)
&format (<optional return>; tiff format; input NULL to ignore)
Return: 0 if OK, 1 on error
Notes: (1) Use TIFFClose(); TIFFCleanup() doesn't free internal memstream.
| LEPT_DLL l_int32 readHeaderMemWebP | ( | const l_uint8 * | data, |
| size_t | size, | ||
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pspp | ||
| ) |
| LEPT_DLL l_int32 readHeaderPng | ( | const char * | filename, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | piscmap | ||
| ) |
Input: filename
&w (<optional return>)
&h (<optional return>)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
&iscmap (<optional return>)
Return: 0 if OK, 1 on error
Notes: (1) If there is a colormap, iscmap is returned as 1; else 0. (2) For gray+alpha, although the png records bps = 16, we consider this as two 8 bpp samples (gray and alpha). When a gray+alpha is read, it is converted to 32 bpp RGBA.
| LEPT_DLL l_int32 readHeaderPnm | ( | const char * | filename, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pd, | ||
| l_int32 * | ptype, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp | ||
| ) |
Input: filename
&w (<optional return>)
&h (<optional return>)
&d (<optional return>)
&type (<optional return> pnm type)
&bps (<optional return>, bits/sample)
&spp (<optional return>, samples/pixel)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 readHeaderSpix | ( | const char * | filename, |
| l_int32 * | pwidth, | ||
| l_int32 * | pheight, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | piscmap | ||
| ) |
Input: filename
&width (<return>)
&height (<return>)
&bps (<return>, bits/sample)
&spp (<return>, samples/pixel)
&iscmap (<optional return>; input NULL to ignore)
Return: 0 if OK, 1 on error
Notes: (1) If there is a colormap, iscmap is returned as 1; else 0.
| LEPT_DLL l_int32 readHeaderTiff | ( | const char * | filename, |
| l_int32 | n, | ||
| l_int32 * | pwidth, | ||
| l_int32 * | pheight, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | pres, | ||
| l_int32 * | pcmap, | ||
| l_int32 * | pformat | ||
| ) |
Input: filename
n (page image number: 0-based)
&width (<return>)
&height (<return>)
&bps (<return> bits per sample -- 1, 2, 4 or 8)
&spp (<return>; samples per pixel -- 1 or 3)
&res (<optional return>; resolution in x dir; NULL to ignore)
&cmap (<optional return>; colormap exists; input NULL to ignore)
&format (<optional return>; tiff format; input NULL to ignore)
Return: 0 if OK, 1 on error
Notes: (1) If there is a colormap, cmap is returned as 1; else 0. (2) If
is equal to or greater than the number of images, returns 1.
| LEPT_DLL l_int32 readHeaderWebP | ( | const char * | filename, |
| l_int32 * | pw, | ||
| l_int32 * | ph, | ||
| l_int32 * | pspp | ||
| ) |
Input: &indata (<optional>; nulls indata)
oldsize (size of input data to be copied, in bytes)
newsize (size of data to be reallocated in bytes)
Return: ptr to new data, or null on error
Action: !N.B. (3) and (4)! (1) Allocates memory, initialized to 0 (2) Copies as much of the input data as possible to the new block, truncating the copy if necessary (3) Frees the input data (4) Zeroes the input data ptr
Notes: (1) If newsize <=0, just frees input data and nulls ptr (2) If input ptr is null, just callocs new memory (3) This differs from realloc in that it always allocates new memory (if newsize > 0) and initializes it to 0, it requires the amount of old data to be copied, and it takes the address of the input ptr and nulls the handle.
Input: recoga
recog (to be added and owned by the recoga; not a copy)
Return: recoga, or null on error
Input: n (initial number of recog ptrs) Return: recoga, or null on error
| LEPT_DLL L_RECOGA* recogaCreateFromPixaa | ( | PIXAA * | paa, |
| l_int32 | scalew, | ||
| l_int32 | scaleh, | ||
| l_int32 | templ_type, | ||
| l_int32 | threshold, | ||
| l_int32 | maxyshift | ||
| ) |
Input: paa (of labelled, 1 bpp images)
scalew (scale all widths to this; use 0 for no scaling)
scaleh (scale all heights to this; use 0 for no scaling)
templ_type (L_USE_AVERAGE or L_USE_ALL)
threshold (for binarization; typically ~128)
maxyshift (from nominal centroid alignment; typically 0 or 1)
Return: recoga, or null on error
Notes: (1) This is a convenience function for training from labelled data. (2) Each pixa in the paa is a set of labelled data that is used to train a recognizer (e.g., for a set of characters in a font). Each image example in the pixa is put into a class in its recognizer, defined by its character label. All examples in the same class should be similar. (3) The pixaa can be written by recogaWritePixaa(), and must contain the unscaled bitmaps used for training.
Input: recog Return: recoga, or null on error
Notes: (1) This is a convenience function for making a recoga after you have a recog. The recog is owned by the recoga. (2) For splitting connected components, the input recog must be from the material to be identified, and not a generic bootstrap recog. Those can be added later.
| LEPT_DLL l_int32 recogAddSamples | ( | L_RECOG * | recog, |
| PIXA * | pixa, | ||
| l_int32 | classindex, | ||
| l_int32 | debug | ||
| ) |
Input: recog
pixa (1 or more characters)
classindex (use -1 if not forcing into a specified class)
debug
Return: 0 if OK, 1 on error
Notes: (1) The pix in the pixa are all 1 bpp, and the character string labels are embedded in the pix. (2) Note: this function decides what class each pix belongs in. When input is from a multifont pixaa, with a valid value for @classindex, the character string label in each pix is ignored, and @classindex is used as the class index for all the pix in the pixa. Thus, for that situation we use this class index to avoid making the decision through a lookup based on the character strings embedded in the pix. (3) When a recog is initially filled with samples, the pixaa_u array is initialized to accept up to 256 different classes. When training is finished, the arrays are truncated to the actual number of classes. To pad an existing recog from the boot recognizers, training is started again; if samples from a new class are added, the pixaa_u array must be extended by adding a pixa to hold them.
Input: &recoga (<will be set to null before returning>) Return: void
Notes: (1) If a recog has a parent, the parent owns it. To destroy a recog, it must first be "orphaned".
| LEPT_DLL SARRAY* recogaExtractNumbers | ( | L_RECOGA * | recoga, |
| BOXA * | boxas, | ||
| l_float32 | scorethresh, | ||
| l_int32 | spacethresh, | ||
| BOXAA ** | pbaa, | ||
| NUMAA ** | pnaa | ||
| ) |
Input: recoga
boxas (location of components)
scorethresh (min score for which we accept a component)
spacethresh (max horizontal distance allowed between digits,
use -1 for default)
&baa (<optional return> bounding boxes of identified numbers)
&naa (<optional return> scores of identified digits)
Return: sa (of identified numbers), or null on error
Notes: (1) This extracts digit data after recogaIdentifyMultiple() or lower-level identification has taken place. (2) Each string in the returned sa contains a sequence of ascii digits in a number. (3) The horizontal distance between boxes (limited by @spacethresh) is the negative of the horizontal overlap. (4) Components with a score less than @scorethresh, which may be hyphens or other small characters, will signal the end of the current sequence of digits in the number. A typical value for @scorethresh is 0.60. (5) We allow two digits to be combined if these conditions apply: (a) the first is to the left of the second (b) the second has a horizontal separation less than @spacethresh (c) the vertical overlap >= 0 (vertical separation < 0) (d) both have a score that exceeds @scorethresh (6) Each numa in the optionally returned naa contains the digit scores of a number. Each boxa in the optionally returned baa contains the bounding boxes of the digits in the number.
Input: recoga Return: 0 if OK, 1 on error
Input: recoga Return: count of recog in array; 0 if no recog or on error
Input: recoga
index (to the index-th recog)
Return: recog, or null on error
Notes: (1) This returns a ptr to the recog, which is still owned by the recoga. Do not destroy it.
| LEPT_DLL l_int32 recogaIdentifyMultiple | ( | L_RECOGA * | recoga, |
| PIX * | pixs, | ||
| l_int32 | nitems, | ||
| l_int32 | minw, | ||
| l_int32 | minh, | ||
| BOXA ** | pboxa, | ||
| PIXA ** | ppixa, | ||
| PIX ** | ppixdb, | ||
| l_int32 | debugsplit | ||
| ) |
Input: recoga (with training finished)
pixs (containing typically a small number of characters)
nitems (to be identified in pix; use 0 if not known)
minw (remove components with width less than this;
use -1 for removing all noise components)
minh (remove components with height less than this;
use -1 for removing all noise components)
&boxa (<optional return> locations of identified components)
&pixa (<optional return> images of identified components)
&pixdb (<optional return> debug pix: inputs and best fits)
debugsplit (1 returns pix split debugging images)
Return: 0 if OK; 1 if nothing is found; 2 for other errors.
(Get a warning if nitems and the number found are both > 0,
but not equal to each other.)
Notes: (1) This filters the input pixa, looking for @nitems if requested. Set @nitems == 0 if you don't know how many chars to expect. (2) This bundles the filtered components into a pixa and calls recogIdentifyPixa(). If @nitems > 0, use @minw = -1 and @minh = -1 to remove all noise components. (3) Set @minw = 0 and @minh = 0 to get all noise components. Set @minw > 0 and/or @minh > 0 to retain selected noise components. All noise components are recognized as an empty string with a score of 0.0. (4) An attempt is made to order the (optionally) returned images and boxes in 2-dimensional sorted order. These can then be used to aggregate identified characters into numbers or words. One typically wants the pixa, which contains a boxa of the extracted subimages.
Input: recoga
pixa (of 1 bpp images to match)
naid (<optional> indices of components to identify; can be null)
&pixdb (<optional return> pix showing inputs and best fits)
Return: 0 if OK, 1 on error
Notes: (1) See recogIdentifyPixa(). This does the same operation for each recog, returning the arrays of results (scores, class index and character string) for the best correlation match.
Input: recog1
recog2 (gets added to recog1)
Return: 0 if OK, 1 on error
Notes: (1) This is used to make a training recognizer from more than one trained recognizer source. It should only be used when the bitmaps for corresponding character classes are very similar. That constraint does not arise when the character classes are disjoint; e.g., if recog1 is digits and recog2 is alphabetical. (2) This is done by appending recog2 to recog1. Averages are computed for each recognizer, if necessary, before appending. (3) Non-array fields are combined using the appropriate min and max.
Input: filename Return: recoga, or null on error
Notes: (1) This allows serialization of an array of recognizers, each of which can be used for different fonts, font styles, etc.
Input: stream Return: recog, or null on error
Input: stream
recoga
display (1 for showing template images, 0 otherwise)
Return: 0 if OK, 1 on error
Input: recoga
&done (<return> 1 if training finished on all recog; else 0)
Return: 0 if OK, 1 on error
Input: recog
debug
Return: 0 on success, 1 on failure
Notes: (1) This is called when training is finished, and after outliers have been removed. Both unscaled and scaled inputs are averaged. Averages must be computed before any identification is done. (2) Set debug = 1 to view the resulting templates and their centroids.
Input: filename
recoga
Return: 0 if OK, 1 on error
Input: filename
recoga
Return: 0 if OK, 1 on error
Notes: (1) For each recognizer, this generates a pixa of all the unscaled images. They are combined into a pixaa for the set of recognizers. Each pix has has its character string in the pix text field. (2) As a side-effect, the character class label is written into each pix in recog.
Input: stream (opened for "wb")
recoga
filename (output serialized filename; embedded in file)
Return: 0 if OK, 1 on error
| LEPT_DLL l_int32 recogBestCorrelForPadding | ( | L_RECOG * | recog, |
| L_RECOGA * | recoga, | ||
| NUMA ** | pnaset, | ||
| NUMA ** | pnaindex, | ||
| NUMA ** | pnascore, | ||
| NUMA ** | pnasum, | ||
| PIXA * | pixadb | ||
| ) |
Input: recog (typically the recog to be padded)
recoga (array of recogs for potentially providing the padding)
&naset (<return> of indices into the sets to be matched)
&naindex (<return> of matching indices into the best set)
&nascore (<return> of best correlation scores)
&naave (<return> average of correlation scores from each recog)
pixadb (<optional> debug images; use NULL for no debug)
Return: 0 if OK, 1 on error
Notes: (1) This finds, for each class in recog, the best matching template in the recoga. For that best match, it returns:
| LEPT_DLL l_int32 recogCorrelationBestChar | ( | L_RECOG * | recog, |
| PIX * | pixs, | ||
| BOX ** | pbox, | ||
| l_float32 * | pscore, | ||
| l_int32 * | pindex, | ||
| char ** | pcharstr, | ||
| PIX ** | ppixdb | ||
| ) |
Input: recog (with LUT's pre-computed)
pixs (can be of multiple touching characters, 1 bpp)
&box (<return> bounding box of best fit character)
&score (<return> correlation score)
&index (<optional return> index of class)
&charstr (<optional return> character string of class)
&pixdb (<optional return> debug pix showing input and best fit)
Return: 0 if OK, 1 on error
Notes: (1) Basic matching character splitter. Finds the best match among all templates to some region of the image. This can result in splitting the image into two parts. This is "image decoding" without dynamic programming, because we don't use a setwidth and compute the best matching score for the entire image. (2) Matching is to the average templates, without character scaling.
| LEPT_DLL l_int32 recogCorrelationBestRow | ( | L_RECOG * | recog, |
| PIX * | pixs, | ||
| BOXA ** | pboxa, | ||
| NUMA ** | pnascore, | ||
| NUMA ** | pnaindex, | ||
| SARRAY ** | psachar, | ||
| l_int32 | debug | ||
| ) |
Input: recog (with LUT's pre-computed)
pixs (typically of multiple touching characters, 1 bpp)
&boxa (<return> bounding boxs of best fit character)
&nascores (<optional return> correlation scores)
&naindex (<optional return> indices of classes)
&sachar (<optional return> array of character strings)
debug (1 for results written to pixadb_split)
Return: 0 if OK, 1 on error
Notes: (1) Supervises character matching for (in general) a c.c with multiple touching characters. Finds the best match greedily. Rejects small parts that are left over after splitting. (2) Matching is to the average, and without character scaling.
| LEPT_DLL l_int32 recogCorrelAverages | ( | L_RECOG * | recog1, |
| L_RECOG * | recog2, | ||
| NUMA ** | pnaindex, | ||
| NUMA ** | pnascore, | ||
| PIXA * | pixadb | ||
| ) |
Input: recog1 (typically the recog to be padded)
recog2 (potentially providing the padding)
&naindex (<return> of classes in 2 with respect to classes in 1)
&nascore (<return> correlation scores of corresponding classes)
pixadb (<optional> debug images)
Return: 0 if OK, 1 on error
Notes: (1) Use this for potentially padding recog1 with instances in recog2. The recog have been generated with isotropic scaling to the same fixed height (e.g., 30). The training has been "finished" in the sense that all arrays have been computed and they could potentially be used as they are. This is necessary for doing the correlation between scaled images. However, this function is called when there is a request to augument some of the examples in classes in recog1. (2) Iterate over classes in recog1, finding the corresponding class in recog2 and computing the correlation score between the average templates of the two. naindex is a LUT between the index of a class in recog1 and the corresponding one in recog2. (3) For classes in recog1 that do not exist in recog2, the index -1 is stored in naindex, and 0.0 is stored in the score.
| LEPT_DLL L_RECOG* recogCreate | ( | l_int32 | scalew, |
| l_int32 | scaleh, | ||
| l_int32 | templ_type, | ||
| l_int32 | threshold, | ||
| l_int32 | maxyshift | ||
| ) |
Input: scalew (scale all widths to this; use 0 for no scaling)
scaleh (scale all heights to this; use 0 for no scaling)
templ_type (L_USE_AVERAGE or L_USE_ALL)
threshold (for binarization; typically ~128)
maxyshift (from nominal centroid alignment; typically 0 or 1)
Return: recog, or null on error
Notes: (1) For a set trained on one font, such as numbers in a book, it is sensible to set scalew = scaleh = 0. (2) For a mixed training set, scaling to a fixed height, such as 32 pixels, but leaving the width unscaled, is effective. (3) The storage for most of the arrays is allocated when training is finished.
Input: recog
pixs (of 1 bpp image to match)
Return: 0 if OK, 1 on error
| LEPT_DLL L_RECOG* recogCreateFromPixa | ( | PIXA * | pixa, |
| l_int32 | scalew, | ||
| l_int32 | scaleh, | ||
| l_int32 | templ_type, | ||
| l_int32 | threshold, | ||
| l_int32 | maxyshift | ||
| ) |
Input: pixa (of labelled, 1 bpp images)
scalew (scale all widths to this; use 0 for no scaling)
scaleh (scale all heights to this; use 0 for no scaling)
templ_type (L_USE_AVERAGE or L_USE_ALL)
threshold (for binarization; typically ~128)
maxyshift (from nominal centroid alignment; typically 0 or 1)
Return: recog, or null on error
Notes: (1) This is a convenience function for training from labelled data. The pixa can be read from file. (2) The pixa should contain the unscaled bitmaps used for training. (3) The characters here should work as a single "font", because each image example is put into a class defined by its character label. All examples in the same class should be similar.
| LEPT_DLL L_RECOG* recogCreateFromRecog | ( | L_RECOG * | recs, |
| l_int32 | scalew, | ||
| l_int32 | scaleh, | ||
| l_int32 | templ_type, | ||
| l_int32 | threshold, | ||
| l_int32 | maxyshift | ||
| ) |
Input: recs (source recog with arbitrary input parameters)
scalew (scale all widths to this; use 0 for no scaling)
scaleh (scale all heights to this; use 0 for no scaling)
templ_type (L_USE_AVERAGE or L_USE_ALL)
threshold (for binarization; typically ~128)
maxyshift (from nominal centroid alignment; typically 0 or 1)
Return: recd, or null on error
Notes: (1) This is a convenience function that generates a recog using the unscaled training data in an existing recog.
Input: recog
debug (0 no output; 1 for images; 2 for text; 3 for both)
Return: 0 if OK, 1 on error
Notes: (1) Generates an image that pairs each of the input images used in training with the average template that it is best correlated to. This is written into the recog. (2) It also generates pixa_tr of all the input training images, which can be used, e.g., in recogShowMatchesInRange().
Input: recog (with LUT's pre-computed)
pixs (typically of multiple touching characters, 1 bpp)
nlevels (of templates; 2 for now)
&pixdb (<optional return> debug result; can be null)
Return: 0 if OK, 1 on error
Input: &recog (<will be set to null before returning>) Return: void
Notes: (1) If a recog has a parent, the parent owns it. A recogDestroy() will fail if there is a parent.
Input: recog Return: 0 if OK, 1 on error
Notes: (1) As the signature indicates, this is owned by the recog, and can only be destroyed using this function.
Input: recog Return: 1 if recog->did exists; 0 if not or on error.
Input: recog (with LUT's pre-computed)
val (integer value; can be up to 3 bytes for UTF-8)
text (text from which @val was derived; used if not found)
&index (<return> index into dna_tochar)
Return: 0 if found; 1 if not found and added; 2 on error.
Notes: (1) This is used during training. It searches the dna character array for @val. If not found, it increments the setsize by 1, augmenting both the index and text arrays. (2) Returns the index in &index, except on error. (3) Caller must check the function return value.
Input: recog
index (into array of char types)
&charstr (<return> string representation;
returns an empty string on error)
Return: 0 if found, 1 on error
Notes: (1) Extracts a copy of the string from sa_text, which the caller must free. (2) Caller must check the function return value.
Input: recog Return: count of classes in recog; 0 if no recog or on error
Input: recog Return: did (still owned by the recog), or null on error
Notes: (1) This also makes sure the arrays are defined.
Input: recog
&index (into the parent recoga; -1 if no parent)
Return: 0 if OK, 1 on error
Input: recog Return: recoga (back-pointer to parent); can be null
Input: recog (with LUT's pre-computed)
pixs (of a single character, 1 bpp)
&pixdb (<optional return> debug pix showing input and best fit)
Return: 0 if OK, 1 on error
Notes: (1) Basic recognition function for a single character. (2) If L_USE_ALL, matching is attempted to every bitmap in the recog, and the identify of the best match is returned. However, if L_USE_AVERAGE, the matching is only to the averaged bitmaps, and the index of the bestsample is meaningless (0 is returned if requested). (3) The score is related to the confidence (probability of correct identification), in that a higher score is correlated with a higher probability. However, the actual relation between the correlation (score) and the probability is not known; we call this a "score" because "confidence" can be misinterpreted as an actual probability.
Input: recog
pixa (of 1 bpp images to match)
naid (<optional> indices of components to identify; can be null)
&pixdb (<optional return> pix showing inputs and best fits)
Return: 0 if OK, 1 on error
Notes: (1) See recogIdentifyPix(). This does the same operation for each pix in a pixa, and optionally returns the arrays of results (scores, class index and character string) for the best correlation match.
Input: recog (with LUT's pre-computed)
pixs (typically of multiple touching characters, 1 bpp)
debug (1 for debug output; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) Generates the bit-and sum arrays for each character template along pixs. These are used in the dynamic programming step. (2) Previous arrays are destroyed and the new arrays are allocated. (3) The values are saved in the scoring arrays at the left edge of the template. They are used in the viterbi process at the setwidth position (which is near the RHS of the template as it is positioned on pixs) in the generated trellis.
Input: &recog (to be replaced if padding or more drastic measures
are necessary; otherwise, it is unchanged.)
debug (1 for debug output saved to recog; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) This function does either padding of the recognizer, or its complete replacement. In both cases, we use a "boot" recognizer to provide the sample images. (2) Before calling this, call recogSetPadParams() if you want non-default values for the character set type, min_nopad and max_afterpad values, paths for labelled bitmap character sets that can be used to augment an input recognizer, and optional augmentation of the input training set using eroded versions of the bitmaps. (3) If all classes in @recog have at least min_nopad samples, nothing is done. If the total number of samples in @recog is very small, @recog is replaced in its entirety by a "boot" recog, either from the specified bootpath, or by a default boot recognizer for that character type. Otherwise (the intermediate case), @recog is replaced by one with scaling to fixed height, where an array of recog are used to augment the input recog. (4) If padding or total replacement is done, this destroys the input recog and replaces it by a new one. If the recog belongs to a recoga, the replacement is also done in the recoga.
| LEPT_DLL PIX* recogPreSplittingFilter | ( | L_RECOG * | recog, |
| PIX * | pixs, | ||
| l_float32 | maxasp, | ||
| l_float32 | minaf, | ||
| l_int32 | debug | ||
| ) |
Input: recog
pixs (1 bpp, single connected component)
maxasp (maximum asperity ratio (width/height) to be retained)
minaf (minimum area fraction (|fg|/(w*h)) to be retained)
debug (1 to output indicator arrays)
Return: pixd (with filtered components removed) or null on error
| LEPT_DLL l_int32 recogProcessMultLabelled | ( | L_RECOG * | recog, |
| PIX * | pixs, | ||
| BOX * | box, | ||
| char * | text, | ||
| PIXA ** | ppixa, | ||
| l_int32 | debug | ||
| ) |
Input: recog (in training mode)
pixs (if depth > 1, will be thresholded to 1 bpp)
box (<optional> cropping box)
text (<optional> if null, use text field in pix)
&pixa (<return> of split and thresholded characters)
debug (1 to display images of samples not captured)
Return: 0 if OK, 1 on error
Notes: (1) This crops and segments one or more labelled and contiguous ascii characters, for input in training. It is a special case. (2) The character images are bundled into a pixa with the character text data embedded in each pix. (3) Where there is more than one character, this does some noise reduction and extracts the resulting character images from left to right. No scaling is performed.
| LEPT_DLL l_int32 recogProcessSingleLabelled | ( | L_RECOG * | recog, |
| PIX * | pixs, | ||
| BOX * | box, | ||
| char * | text, | ||
| PIXA ** | ppixa | ||
| ) |
Input: recog (in training mode)
pixs (if depth > 1, will be thresholded to 1 bpp)
box (<optional> cropping box)
text (<optional> if null, use text field in pix)
&pixa (<return> one pix, 1 bpp, labelled)
Return: 0 if OK, 1 on error
Notes: (1) This crops and binarizes the input image, generating a pix of one character where the charval is inserted into the pix.
Input: recog (with LUT's pre-computed)
pixs (typ. single character, possibly d > 1 and uncropped)
pad (extra pixels added to left and right sides)
Return: pixd (1 bpp, clipped to foreground), or null if there
are no fg pixels or on error.
Notes: (1) This is a lightweight operation to insure that the input image is 1 bpp, properly cropped, and padded on each side. If bpp > 1, the image is thresholded.
Input: filename Return: recog, or null on error
Notes: (1) Serialization can be applied to any recognizer, including one with more than one "font". That is, it can have multiple character classes with the same character set description, where each of those classes contains characters that are very similar in size and shape. Each pixa in the serialized pixaa contains images for a single character class.
Input: stream Return: recog, or null on error
| LEPT_DLL l_int32 recogRemoveOutliers | ( | L_RECOG * | recog, |
| l_float32 | targetscore, | ||
| l_float32 | minfract, | ||
| l_int32 | debug | ||
| ) |
Input: recog (after training samples are entered)
targetscore (keep everything with at least this score)
minfract (minimum fraction to retain)
debug (1 for debug output)
Return: 0 if OK, 1 on error
Notes: (1) Removing outliers is particularly important when recognition goes against all the samples in the training set, as opposed to the averages for each class. The reason is that we get an identification error if a mislabeled sample is a best match for an input bitmap. (2) However, the score values depend strongly on the quality of the character images. To avoid losing too many samples, we supplement a target score for retention with a minimum fraction that we must keep. With poor quality images, we may keep samples with a score less than the targetscore, in order to satisfy the @minfract requirement. (3) We always require that at least one sample will be retained. (4) Where the training set is from the same source (e.g., the same book), use a relatively large minscore; say, ~0.8. (5) Method: for each class, generate the averages and match each scaled sample against the average. Decide which samples will be ejected, and throw out both the scaled and unscaled samples and associated data. Recompute the average without the poor matches.
Input: &recog1 (old recog, to be destroyed)
recog2 (new recog, to be inserted in place of @recog1)
Return: 0 if OK, 1 on error
Notes: (1) This always destroys recog1. (2) If recog1 belongs to a recoga, this inserts recog2 into the slot that recog1 previously occupied.
Input: recog
size (of font; even integer between 4 and 20; default is 6)
Return: 0 if OK, 1 on error
Notes: (1) Use this to reset the size of the font used for debug labelling.
Input: recog (with LUT's pre-computed)
&pixdb (<optional return> debug result; can be null)
Return: 0 if OK, 1 on error
Notes: (1) This is recursive, in that (a) we compute the score successively at all pixel positions x, (b) to compute the score at x in the trellis, for each template we look backwards to (x - setwidth) to get the score if that template were to be printed with its setwidth location at x. We save at x the template and score that maximizes the sum of the score at (x - setwidth) and the log-likelihood for the template to be printed with its LHS there.
Input: recog
pixs (1 bpp, to be scaled)
Return: pixd (scaled) if OK, null on error
Input: recog Return: 0 if OK, 1 on error
Notes: (1) This must be set for any bootstrap recog, where the samples are not from the media being identified. (2) It is used to enforce scaled bitmaps for identification, and to prevent the recog from being used to split touching characters (which requires unscaled samples from the material being identified).
Input: recog
nlevels
Return: 0 if OK, 1 on error
Notes: (1) This converts the independent bit-flip probabilities in the "channel" into log-likelihood coefficients on image sums. These coefficients are only defined for the non-background template levels. Thus for nlevels = 2 (one fg, one bg), only beta[1] and gamma[1] are used. For nlevels = 4 (three fg templates), we use beta[1-3] and gamma[1-3].
| LEPT_DLL l_int32 recogSetPadParams | ( | L_RECOG * | recog, |
| const char * | bootdir, | ||
| const char * | bootpattern, | ||
| const char * | bootpath, | ||
| l_int32 | boot_iters, | ||
| l_int32 | type, | ||
| l_int32 | min_nopad, | ||
| l_int32 | max_afterpad, | ||
| l_int32 | min_samples | ||
| ) |
Input: recog (to be padded, if necessary)
bootdir (<optional> directory to bootstrap labelled pixa)
bootpattern (<optional> pattern for bootstrap labelled pixa)
bootpath (<optional> path to single bootstrap labelled pixa)
boot_iters (number of 2x2 erosions for extension of boot pixa)
type (character set type; -1 for default; see enum in recog.h)
size (character set size; -1 for default)
min_nopad (min number in a class without padding; -1 default)
max_afterpad (max number of samples in padded classes;
-1 for default)
min_samples (use boot if total num samples is less than this;
-1 for default)
Return: 0 if OK, 1 on error
Notes: (1) This is used to augment or replace a book-adapted recognizer (BAR). It is called when the recognizer is created, and must be called again before recogPadTrainingSet() if non-default values are to be used. (2) Default values allow for some padding. To disable padding, set @min_nopad = 0. (3) Constraint on @min_nopad and @max_afterpad guarantees that padding will be allowed if requested. (4) The file directory (@bootdir) and tail pattern (@bootpattern) are used to identify serialized pixa, from which we can generate an array of recog. These can be used to augment an input but incomplete BAR (book adapted recognizer). (5) The boot recog can be extended using erosions. Set boot_iters to the number of 2x2 erosions desired. For a typical font size, @boot_iters <= 2. (6) If the BAR is very sparse, with num_samples < min_samples, we will destroy it and use the generic bootstrap recognizer given at @bootpath.
Input: recog
scalew (scale all widths to this; use 0 for no scaling)
scaleh (scale all heights to this; use 0 for no scaling)
Return: 0 if OK, 1 on error
Input: recog
templ_type (L_USE_AVERAGE or L_USE_ALL)
Return: 0 if OK, 1 on error
Input: recog Return: 0 on success, 1 on failure
Notes: (1) This debug routine generates a display of the averaged templates, both scaled and unscaled, with the centroid visible in red.
Input: stream
recog
display (1 for showing template images, 0 otherwise)
Return: 0 if OK, 1 on error
| LEPT_DLL PIX* recogShowMatch | ( | L_RECOG * | recog, |
| PIX * | pix1, | ||
| PIX * | pix2, | ||
| BOX * | box, | ||
| l_int32 | index, | ||
| l_float32 | score | ||
| ) |
Input: recog
pix1 (input pix; several possibilities)
pix2 (<optional> matching template)
box (<optional> region in pix1 for which pix2 matches)
index (index of matching template; use -1 to disable printing)
score (score of match)
Return: pixd (pair of images, showing input pix and best template,
optionally with matching information), or null on error.
Notes: (1) pix1 can be one of these: (a) The input pix alone, which can be either a single character (box == NULL) or several characters that need to be segmented. If more than character is present, the box region is displayed with an outline. (b) Both the input pix and the matching template. In this case, pix2 and box will both be null. (2) If the bmf has been made (by a call to recogMakeBmf()) and the index >= 0, the text field, match score and index will be rendered; otherwise their values will be ignored.
| LEPT_DLL l_int32 recogShowMatchesInRange | ( | L_RECOG * | recog, |
| PIXA * | pixa, | ||
| l_float32 | minscore, | ||
| l_float32 | maxscore, | ||
| l_int32 | display | ||
| ) |
Input: recog
pixa (of 1 bpp images to match)
minscore, maxscore (range to include output)
display (to display the result)
Return: 0 if OK, 1 on error
Notes: (1) This gives a visual output of the best matches for a given range of scores. Each pair of images can optionally be labelled with the index of the best match and the correlation. If the bmf has been previously made, it will be used here. (2) To use this, save a set of 1 bpp images (labelled or unlabelled) that can be given to a recognizer in a pixa. Then call this function with the pixa and parameters to filter a range of score.
Input: recog Return: 0 if OK, 1 on error
Notes: (1) This just writes a "dummy" result with 0 score and empty string id into the rch.
| LEPT_DLL l_int32 recogSplitIntoCharacters | ( | L_RECOG * | recog, |
| PIX * | pixs, | ||
| l_int32 | minw, | ||
| l_int32 | minh, | ||
| BOXA ** | pboxa, | ||
| PIXA ** | ppixa, | ||
| NUMA ** | pnaid, | ||
| l_int32 | debug | ||
| ) |
Input: recog
pixs (1 bpp, contains only mostly deskewed text)
minw (remove components with width less than this;
use -1 for default removing out of band components)
minh (remove components with height less than this;
use -1 for default removing out of band components)
&boxa (<return> character bounding boxes)
&pixa (<return> character images)
&naid (<return> indices of components to identify)
debug (1 for results written to pixadb_split)
Return: 0 if OK, 1 on error or if no components are returned
Notes: (1) This can be given an image that has an arbitrary number of text characters. It does splitting of connected components based on greedy correlation matching in recogCorrelationBestRow(). The returned pixa includes the boxes from which the (possibly split) components are extracted. (2) If either @minw < 0 or @minh < 0, noise components are filtered out, and the returned @naid array is all 1. Otherwise, some noise components whose dimensions (w,h) satisfy w >= @minw and h >= @minh are allowed through, but they are identified in the returned @naid, where they are labelled by 0 to indicate that they are not to be run through identification. Retaining the noise components provides spatial information that can help applications interpret the results. (3) In addition to optional filtering of the noise, the resulting components are put in row-major (2D) order, and the smaller of overlapping components are removed if they satisfy conditions of relative size and fractional overlap. (4) Note that the spliting function uses unscaled templates and does not bother returning the class results and scores. Thes are more accurately found later using the scaled templates.
| LEPT_DLL l_int32 recogSplittingFilter | ( | L_RECOG * | recog, |
| PIX * | pixs, | ||
| l_float32 | maxasp, | ||
| l_float32 | minaf, | ||
| l_int32 * | premove, | ||
| l_int32 | debug | ||
| ) |
Input: recog
pixs (1 bpp, single connected component)
maxasp (maximum asperity ratio (width/height) to be retained)
minaf (minimum area fraction (|fg|/(w*h)) to be retained)
&remove (<return> 0 to save, 1 to remove)
debug (1 to output indicator arrays)
Return: 0 if OK, 1 on error
Input: recog
text (text string for some class)
&index (<return> index for that class; -1 if not found)
Return: 0 if OK, 1 on error (not finding the string is an error)
Input: recog
debug
Return: 0 if OK, 1 on error
Notes: (1) This must be called after all training samples have been added. (2) Set debug = 1 to view the resulting templates and their centroids. (3) The following things are done here: (a) Allocate (or reallocate) storage for (possibly) scaled bitmaps, centroids, and fg areas. (b) Generate the (possibly) scaled bitmaps. (c) Compute centroid and fg area data for both unscaled and scaled bitmaps. (d) Compute the averages for both scaled and unscaled bitmaps (e) Truncate the pixaa, ptaa and numaa arrays down from 256 to the actual size. (4) Putting these operations here makes it simple to recompute the recog with different scaling on the bitmaps. (5) Removal of outliers must happen after this is called.
| LEPT_DLL l_int32 recogTrainLabelled | ( | L_RECOG * | recog, |
| PIX * | pixs, | ||
| BOX * | box, | ||
| char * | text, | ||
| l_int32 | multflag, | ||
| l_int32 | debug | ||
| ) |
Input: recog (in training mode)
pixs (if depth > 1, will be thresholded to 1 bpp)
box (<optional> cropping box)
text (<optional> if null, use text field in pix)
multflag (1 if one or more contiguous ascii characters;
0 for a single arbitrary character)
debug (1 to display images of samples not captured)
Return: 0 if OK, 1 on error
Notes: (1) Training is restricted to the addition of either: (a) multflag == 0: a single character in an arbitrary (e.g., UTF8) charset (b) multflag == 1: one or more ascii characters rendered contiguously in pixs (2) If box != null, it should represent the cropped location of the character image. (3) If multflag == 1, samples will be rejected if the number of connected components does not equal to the number of ascii characters in the textstring. In that case, if debug == 1, the rejected samples will be displayed.
| LEPT_DLL l_int32 recogTrainUnlabelled | ( | L_RECOG * | recog, |
| L_RECOG * | recogboot, | ||
| PIX * | pixs, | ||
| BOX * | box, | ||
| l_int32 | singlechar, | ||
| l_float32 | minscore, | ||
| l_int32 | debug | ||
| ) |
Input: recog (in training mode: the input characters in pixs are
inserted after labelling)
recogboot (labels the input)
pixs (if depth > 1, will be thresholded to 1 bpp)
box (<optional> cropping box)
singlechar (1 if pixs is a single character; 0 otherwise)
minscore (min score for accepting the example; e.g., 0.75)
debug (1 for debug output saved to recog; 0 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) This trains on one or several characters of unlabelled data, using a bootstrap recognizer to apply the labels. In this way, we can build a recognizer using a source of unlabelled data. (2) The input pix can have several (non-touching) characters. If box != NULL, we treat the region in the box as a single char If box == NULL, use all of pixs: if singlechar == 0, we identify each c.c. as a single character if singlechar == 1, we treat pixs as a single character Multiple chars are identified separately by recogboot and inserted into recog. (3) recogboot is a trained recognizer. It would typically be constructed from a variety of sources, and use the individual templates (not the averages) for scoring. It must be used in scaled mode; typically with width = 20 and height = 32. (4) For debugging, if bmf is defined in the recog, the correlation scores are generated and saved (by adding to the pixadb_boot field) with the matching images.
Input: filename
recog
Return: 0 if OK, 1 on error
Input: filename
recog
Return: 0 if OK, 1 on error
Notes: (1) This generates a pixa of all the unscaled images in the recognizer, where each one has its character string in the pix text field, by flattening pixaa_u to a pixa. (2) As a side-effect, the character class label is written into each pix in recog.
Input: stream (opened for "wb")
recog
filename (output serialized filename; embedded in file)
Return: 0 if OK, 1 on error
| LEPT_DLL char* reformatPacked64 | ( | char * | inarray, |
| l_int32 | insize, | ||
| l_int32 | leadspace, | ||
| l_int32 | linechars, | ||
| l_int32 | addquotes, | ||
| l_int32 * | poutsize | ||
| ) |
Input: inarray (base64 encoded string with newlines)
insize (number of bytes in input array)
leadspace (number of spaces in each line before the data)
linechars (number of bytes of data in each line; multiple of 4)
addquotes (1 to add quotes to each line of data; 0 to skip)
&outsize (<return> number of bytes in output char array)
Return: outarray (ascii)
Notes: (1) Each line in the output array has @leadspace space characters, followed optionally by a double-quote, followed by @linechars bytes of base64 data, followed optionally by a double-quote, followed by a newline. (2) This can be used to convert a base64 encoded string to a string formatted for inclusion in a C source file.
| LEPT_DLL l_int32 regTestCheckFile | ( | L_REGPARAMS * | rp, |
| const char * | localname | ||
| ) |
Input: rp (regtest parameters)
localname (name of output file from reg test)
Return: 0 if OK, 1 on error (a failure in comparison is not an error)
Notes: (1) This function does one of three things, depending on the mode:
| LEPT_DLL l_int32 regTestCleanup | ( | L_REGPARAMS * | rp | ) |
Input: rp (regression test parameters) Return: 0 if OK, 1 on error
Notes: (1) This copies anything written to the temporary file to the output file /tmp/lept/reg_results.txt.
| LEPT_DLL l_int32 regTestCompareFiles | ( | L_REGPARAMS * | rp, |
| l_int32 | index1, | ||
| l_int32 | index2 | ||
| ) |
Input: rp (regtest parameters)
index1 (of one output file from reg test)
index2 (of another output file from reg test)
Return: 0 if OK, 1 on error (a failure in comparison is not an error)
Notes: (1) This only does something in "compare" mode. (2) The canonical format of the golden filenames is: /tmp/lept/golden/<root of="" main="" name>="">_golden.<index>. <ext of="" localname>=""> e.g., /tmp/lept/golden/maze_golden.0.png
| LEPT_DLL l_int32 regTestComparePix | ( | L_REGPARAMS * | rp, |
| PIX * | pix1, | ||
| PIX * | pix2 | ||
| ) |
Input: rp (regtest parameters)
pix1, pix2 (to be tested for equality)
Return: 0 if OK, 1 on error (a failure in comparison is not an error)
Notes: (1) This function compares two pix for equality. On failure, this writes to stderr.
| LEPT_DLL l_int32 regTestCompareSimilarPix | ( | L_REGPARAMS * | rp, |
| PIX * | pix1, | ||
| PIX * | pix2, | ||
| l_int32 | mindiff, | ||
| l_float32 | maxfract, | ||
| l_int32 | printstats | ||
| ) |
Input: rp (regtest parameters)
pix1, pix2 (to be tested for near equality)
mindiff (minimum pixel difference to be counted; > 0)
maxfract (maximum fraction of pixels allowed to have
diff greater than or equal to mindiff)
printstats (use 1 to print normalized histogram to stderr)
Return: 0 if OK, 1 on error (a failure in similarity comparison
is not an error)
Notes: (1) This function compares two pix for near equality. On failure, this writes to stderr. (2) The pix are similar if the fraction of non-conforming pixels does not exceed @maxfract. Pixels are non-conforming if the difference in pixel values equals or exceeds @mindiff. Typical values might be @mindiff = 15 and @maxfract = 0.01. (3) The input images must have the same size and depth. The pixels for comparison are typically subsampled from the images. (4) Normally, use @printstats = 0. In debugging mode, to see the relation between @mindiff and the minimum value of @maxfract for success, set this to 1.
| LEPT_DLL l_int32 regTestCompareStrings | ( | L_REGPARAMS * | rp, |
| l_uint8 * | string1, | ||
| size_t | bytes1, | ||
| l_uint8 * | string2, | ||
| size_t | bytes2 | ||
| ) |
Input: rp (regtest parameters)
string1 (typ. the expected string)
bytes1 (size of string1)
string2 (typ. the computed string)
bytes2 (size of string2)
Return: 0 if OK, 1 on error (a failure in comparison is not an error)
| LEPT_DLL l_int32 regTestCompareValues | ( | L_REGPARAMS * | rp, |
| l_float32 | val1, | ||
| l_float32 | val2, | ||
| l_float32 | delta | ||
| ) |
Input: rp (regtest parameters)
val1 (typ. the golden value)
val2 (typ. the value computed)
delta (allowed max absolute difference)
Return: 0 if OK, 1 on error (a failure in comparison is not an error)
| LEPT_DLL l_int32 regTestSetup | ( | l_int32 | argc, |
| char ** | argv, | ||
| L_REGPARAMS ** | prp | ||
| ) |
Input: argc (from invocation; can be either 1 or 2)
argv (to regtest: @argv[1] is one of these:
"generate", "compare", "display")
&rp (<return> all regression params)
Return: 0 if OK, 1 on error
Notes: (1) Call this function with the args to the reg test. The first arg is the name of the reg test. There are three cases: Case 1: There is either only one arg, or the second arg is "compare". This is the mode in which you run a regression test (or a set of them), looking for failures and logging the results to a file. The output, which includes logging of all reg test failures plus a SUCCESS or FAILURE summary for each test, is appended to the file "/tmp/lept/reg_results.txt. For this case, as in Case 2, the display field in rp is set to FALSE, preventing image display. Case 2: The second arg is "generate". This will cause generation of new golden files for the reg test. The results of the reg test are not recorded, and the display field in rp is set to FALSE. Case 3: The second arg is "display". The test will run and files will be written. Comparisons with golden files will not be carried out, so the only notion of success or failure is with tests that do not involve golden files. The display field in rp is TRUE, and this is used by pixDisplayWithTitle(). (2) See regutils.h for examples of usage.
| LEPT_DLL l_int32 regTestWritePixAndCheck | ( | L_REGPARAMS * | rp, |
| PIX * | pix, | ||
| l_int32 | format | ||
| ) |
Input: rp (regtest parameters)
pix (to be written)
format (of output pix)
Return: 0 if OK, 1 on error (a failure in comparison is not an error)
Notes: (1) This function makes it easy to write the pix in a numbered sequence of files, and either to: (a) write the golden file ("generate" arg to regression test) (b) make a local file and "compare" with the golden file (c) make a local file and "display" the results (3) The canonical format of the local filename is: /tmp/lept/regout/<root of="" main="" name>="">.<count>.<format extension>=""> e.g., for scale_reg, /tmp/lept/regout/scale.0.png
Input: bc (SYMMETRIC_MORPH_BC, ASYMMETRIC_MORPH_BC) Return: void
Input: msg (error message)
procname
fval (return val)
Return: fval
Input: msg (error message)
procname
ival (return val)
Return: ival (typically 1 for an error return)
| LEPT_DLL void* returnErrorPtr | ( | const char * | msg, |
| const char * | procname, | ||
| void * | pval | ||
| ) |
Input: msg (error message)
procname
pval (return val)
Return: pval (typically null)
| LEPT_DLL void rotateAMColorCornerLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_float32 | angle, | ||
| l_uint32 | colorval | ||
| ) |
| LEPT_DLL void rotateAMColorFastLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_float32 | angle, | ||
| l_uint32 | colorval | ||
| ) |
This is a special simplification of area mapping with division of each pixel into 16 sub-pixels. The exact coefficients that should be used are the same as for the 4x linear interpolation scaling case, and are given there. I tried to approximate these as weighted coefficients with a maximum sum of 4, which allows us to do the arithmetic in parallel for the R, G and B components in a 32 bit pixel. However, there are three reasons for not doing that: (1) the loss of accuracy in the parallel implementation is visually significant (2) the parallel implementation (described below) is slower (3) the parallel implementation requires allocation of a temporary color image
There are 16 cases for the choice of the subpixel, and for each, the mapping to the relevant source pixels is as follows:
subpixel src pixel weights
0 sp1 1 (3 * sp1 + sp2) / 4 2 (sp1 + sp2) / 2 3 (sp1 + 3 * sp2) / 4 4 (3 * sp1 + sp3) / 4 5 (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16 6 (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8 7 (3 * sp1 + 9 * sp2 + sp3 + 3 * sp4) / 16 8 (sp1 + sp3) / 2 9 (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8 10 (sp1 + sp2 + sp3 + sp4) / 4 11 (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8 12 (sp1 + 3 * sp3) / 4 13 (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16 14 (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8 15 (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16
Another way to visualize this is to consider the area mapping (or linear interpolation) coefficients for the pixel sp1. Expressed in fourths, they can be written as asymmetric matrix:
4 3 2 1 3 2.25 1.5 0.75 2 1.5 1 0.5 1 0.75 0.5 0.25
The coefficients for the three neighboring pixels can be similarly written.
This is implemented here, where, for each color component, we inline its extraction from each participating word, construct the linear combination, and combine the results into the destination 32 bit RGB pixel, using the appropriate shifts.
It is interesting to note that an alternative method, where we do the arithmetic on the 32 bit pixels directly (after shifting the components so they won't overflow into each other) is significantly inferior. Because we have only 8 bits for internal overflows, which can be distributed as 2, 3, 3, it is impossible to add these with the correct linear interpolation coefficients, which require a sum of up to 16. Rounding off to a sum of 4 causes appreciable visual artifacts in the rotated image. The code for the inferior method can be found in prog/rotatefastalt.c, for reference.
*** Warning: explicit assumption about RGB component ordering ***
| LEPT_DLL void rotateAMColorLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_float32 | angle, | ||
| l_uint32 | colorval | ||
| ) |
| LEPT_DLL void rotateAMGrayCornerLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_float32 | angle, | ||
| l_uint8 | grayval | ||
| ) |
| LEPT_DLL void rotateAMGrayLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_float32 | angle, | ||
| l_uint8 | grayval | ||
| ) |
| LEPT_DLL l_int32 runlengthMembershipOnLine | ( | l_int32 * | buffer, |
| l_int32 | size, | ||
| l_int32 | depth, | ||
| l_int32 * | start, | ||
| l_int32 * | end, | ||
| l_int32 | n | ||
| ) |
Input: buffer (into which full line of data is placed)
size (full size of line; w or h)
depth (8 or 16 bpp)
start (array of start positions for fg runs)
end (array of end positions for fg runs)
n (the number of runs)
Return: 0 if OK; 1 on error
Notes: (1) Converts a set of runlengths into a buffer of runlength membership values. (2) Initialization of the array gives pixels that are not within a run the value 0.
Input: sarray (of pathnames for single-page pdf files)
fileout (concatenated pdf file)
Return: 0 if OK, 1 on error
Notes: (1) This only works with leptonica-formatted single-page pdf files.
Input: sarray (of pathnames for single-page pdf files)
&data (<return> concatenated pdf data in memory)
&nbytes (<return> number of bytes in pdf data)
Return: 0 if OK, 1 on error
Notes: (1) This only works with leptonica-formatted single-page pdf files.
| LEPT_DLL l_int32 saConvertFilesToPdf | ( | SARRAY * | sa, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: sarray (of pathnames for images)
res (input resolution of all images)
scalefactor (scaling factor applied to each image; > 0.0)
type (encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
L_FLATE_ENCODE, or 0 for default)
quality (used for JPEG only; 0 for default (75))
title (<optional> pdf title; if null, taken from the first
image filename)
fileout (pdf file of all images)
Return: 0 if OK, 1 on error
Notes: (1) See convertFilesToPdf().
| LEPT_DLL l_int32 saConvertFilesToPdfData | ( | SARRAY * | sa, |
| l_int32 | res, | ||
| l_float32 | scalefactor, | ||
| l_int32 | type, | ||
| l_int32 | quality, | ||
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
Input: sarray (of pathnames for images)
res (input resolution of all images)
scalefactor (scaling factor applied to each image; > 0.0)
type (encoding type (L_JPEG_ENCODE, L_G4_ENCODE,
L_FLATE_ENCODE, or 0 for default)
quality (used for JPEG only; 0 for default (75))
title (<optional> pdf title; if null, taken from the first
image filename)
&data (<return> output pdf data (of all images)
&nbytes (<return> size of output pdf data)
Return: 0 if OK, 1 on error
Notes: (1) See convertFilesToPdf().
| LEPT_DLL l_int32 saConvertUnscaledFilesToPdf | ( | SARRAY * | sa, |
| const char * | title, | ||
| const char * | fileout | ||
| ) |
Input: sarray (of pathnames for images)
title (<optional> pdf title; if null, taken from the first
image filename)
fileout (pdf file of all images)
Return: 0 if OK, 1 on error
Notes: (1) See convertUnscaledFilesToPdf().
| LEPT_DLL l_int32 saConvertUnscaledFilesToPdfData | ( | SARRAY * | sa, |
| const char * | title, | ||
| l_uint8 ** | pdata, | ||
| size_t * | pnbytes | ||
| ) |
saConvertUnscaledFilesToPdfData()
Input: sarray (of pathnames for images)
title (<optional> pdf title; if null, taken from the first
image filename)
&data (<return> output pdf data (of all images)
&nbytes (<return> size of output pdf data)
Return: 0 if OK, 1 on error
Input: sarray
string (string to be added)
copyflag (L_INSERT, L_COPY)
Return: 0 if OK, 1 on error
Notes: (1) Legacy usage decrees that we always use 0 to insert a string directly and 1 to insert a copy of the string. The enums for L_INSERT and L_COPY agree with this convention, and will not change in the future. (2) See usage comments at the top of this file.
Input: filename
sarray
Return: 0 if OK; 1 on error
Input: sa1 (to be added to)
sa2 (append specified range of strings in sa2 to sa1)
start (index of first string of sa2 to append)
end (index of last string of sa2 to append; -1 to end of array)
Return: 0 if OK, 1 on error
Notes: (1) Copies of the strings in sarray2 are added to sarray1. (2) The [start ... end] range is truncated if necessary. (3) Use end == -1 to append to the end of sa2.
sarrayChangeRefCount()
Input: sarray
delta (change to be applied)
Return: 0 if OK, 1 on error
Input: sarray Return: 0 if OK; 1 on error
Input: sarray Return: ptr to same sarray, or null on error
| LEPT_DLL l_int32 sarrayConvertFilesFittedToPS | ( | SARRAY * | sa, |
| l_float32 | xpts, | ||
| l_float32 | ypts, | ||
| const char * | fileout | ||
| ) |
Input: sa (sa of individual words)
linesize (max num of chars in each line)
Return: saout (sa of formatted lines), or null on error
This is useful for re-typesetting text to a specific maximum line length. The individual words in the input sarray are concatenated into textlines. An input word string of zero length is taken to be a paragraph separator. Each time such a string is found, the current line is ended and a new line is also produced that contains just the string of zero length (""). When the output sarray of lines is eventually converted to a string with newlines (typically) appended to each line string, the empty strings are just converted to newlines, producing the visible paragraph separation.
What happens when a word is larger than linesize? We write it out as a single line anyway! Words preceding or following this long word are placed on lines preceding or following the line with the long word. Why this choice? Long "words" found in text documents are typically URLs, and it's often desirable not to put newlines in the middle of a URL. The text display program (e.g., text editor) will typically wrap the long "word" to fit in the window.
Input: sarray Return: copy of sarray, or null on error
Input: size of string ptr array to be alloc'd
(use 0 for default)
Return: sarray, or null on error
Input: n (size of string ptr array to be alloc'd)
initstr (string to be initialized on the full array)
Return: sarray, or null on error
Input: string
blankflag (0 to exclude blank lines; 1 to include)
Return: sarray, or null on error
Notes: (1) This finds the number of line substrings, each of which ends with a newline, and puts a copy of each substring in a new sarray. (2) The newline characters are removed from each substring.
Input: string Return: sarray, or null on error
Notes: (1) This finds the number of word substrings, creates an sarray of this size, and puts copies of each substring into the sarray.
Input: &sarray <to be nulled> Return: void
Notes: (1) Decrements the ref count and, if 0, destroys the sarray. (2) Always nulls the input ptr.
| LEPT_DLL l_int32 sarrayFindStringByHash | ( | SARRAY * | sa, |
| L_DNAHASH * | dahash, | ||
| const char * | str, | ||
| l_int32 * | pindex | ||
| ) |
Input: sa
dahash (built from sa)
str (arbitrary string)
&index (<return> index into @sa if @str is in @sa;
-1 otherwise)
Return: 0 if OK, 1 on error
Notes: (1) Fast lookup in dnaHash associated with a sarray, to see if a random string @str is already stored in the hash table.
Input: sarray
&nalloc (<optional return> number allocated string ptrs)
&n (<optional return> number allocated strings)
Return: ptr to string array, or null on error
Notes: (1) Caution: the returned array is not a copy, so caller must not destroy it!
Input: sarray Return: count, or 0 if no strings or on error
sarrayGetRefCount()
Input: sarray Return: refcount, or UNDEF on error
Input: sarray
index (to the index-th string)
copyflag (L_NOCOPY or L_COPY)
Return: string, or null on error
Notes: (1) Legacy usage decrees that we always use 0 to get the pointer to the string itself, and 1 to get a copy of the string. (2) See usage comments at the top of this file. (3) To get a pointer to the string itself, use for copyflag: L_NOCOPY or 0 or FALSE To get a copy of the string, use for copyflag: L_COPY or 1 or TRUE The const values of L_NOCOPY and L_COPY are guaranteed not to change.
Input: sa1, sa2 Return: sad (with the intersection of the string set), or null on error
Notes: (1) Algorithm: put the smaller sarray into a set, using the string hashes as the key values. Then run through the larger sarray, building an output sarray and a second set from the strings in the larger array: if a string is in the first set but not in the second, add the string to the output sarray and hash it into the second set. The second set is required to make sure only one instance of each string is put into the output sarray. This is O(mlogn), {m,n} = sizes of {smaller,larger} input arrays.
Input: sa1, sa2 Return: sad (intersection of the strings), or null on error
Notes: (1) This is faster than sarrayIntersectionByAset(), because the bucket lookup is O(n).
Input: sa1 (to be added to)
sa2 (append to sa1)
Return: 0 if OK, 1 on error
Notes: (1) Copies of the strings in sarray2 are added to sarray1.
Input: sa1, sa2
padstring
Return: 0 if OK, 1 on error
Notes: (1) If two sarrays have different size, this adds enough instances of @padstring to the smaller so that they are the same size. It is useful when two or more sarrays are being sequenced in parallel, and it is necessary to find a valid string at each index.
| LEPT_DLL l_int32 sarrayParseRange | ( | SARRAY * | sa, |
| l_int32 | start, | ||
| l_int32 * | pactualstart, | ||
| l_int32 * | pend, | ||
| l_int32 * | pnewstart, | ||
| const char * | substr, | ||
| l_int32 | loc | ||
| ) |
Input: sa (input sarray)
start (index to start range search)
&actualstart (<return> index of actual start; may be > 'start')
&end (<return> index of end)
&newstart (<return> index of start of next range)
substr (substring for matching at beginning of string)
loc (byte offset within the string for the pattern; use
-1 if the location does not matter);
Return: 0 if valid range found; 1 otherwise
Notes: (1) This finds the range of the next set of strings in SA, beginning the search at 'start', that does NOT have the substring 'substr' either at the indicated location in the string or anywhere in the string. The input variable 'loc' is the specified offset within the string; use -1 to indicate 'anywhere in the string'. (2) Always check the return value to verify that a valid range was found. (3) If a valid range is not found, the values of actstart, end and newstart are all set to the size of sa. (4) If this is the last valid range, newstart returns the value n. In use, this should be tested before calling the function. (5) Usage example. To find all the valid ranges in a file where the invalid lines begin with two dashes, copy each line in the file to a string in an sarray, and do: start = 0; while (!sarrayParseRange(sa, start, &actstart, &end, &start, "--", 0)) fprintf(stderr, "start = %d, end = %d\n", actstart, end);
Input: filename Return: sarray, or null on error
Input: stream Return: sarray, or null on error
Notes: (1) We store the size of each string along with the string. (2) This allows a string to have embedded newlines. By reading the entire string, as determined by its size, we are not affected by any number of embedded newlines.
Input: sas Return: sad (with duplicates removed), or null on error
Notes: (1) This is O(nlogn), considerably slower than sarrayRemoveDupsByHash() for large string arrays. (2) The key for each string is a 64-bit hash. (3) Build a set, using hashed strings as keys. As the set is built, first do a find; if not found, add the key to the set and add the string to the output sarray.
Input: sas
&sad (<return> unique set of strings; duplicates removed)
&dahash (<optional return> dnahash used for lookup)
Return: 0 if OK, 1 on error
Notes: (1) Generates a sarray with unique values. (2) The dnahash is built up with sad to assure uniqueness. It can be used to find if a string is in the set: sarrayFindValByHash(sad, dahash, str, &index) (3) The hash of the string location is simple and fast. It scales up with the number of buckets to insure a fairly random bucket selection input strings. (4) This is faster than sarrayRemoveDupsByAset(), because the bucket lookup is O(n), although there is a double-loop lookup within the dna in each bucket.
Input: sarray
index (of string within sarray)
Return: removed string, or null on error
| LEPT_DLL l_int32 sarrayReplaceString | ( | SARRAY * | sa, |
| l_int32 | index, | ||
| char * | newstr, | ||
| l_int32 | copyflag | ||
| ) |
Input: sarray
index (of string within sarray to be replaced)
newstr (string to replace existing one)
copyflag (L_INSERT, L_COPY)
Return: 0 if OK, 1 on error
Notes: (1) This destroys an existing string and replaces it with the new string or a copy of it. (2) By design, an sarray is always compacted, so there are never any holes (null ptrs) in the ptr array up to the current count.
Input: sain (input sarray)
first (index of first string to be selected)
last (index of last string to be selected; use 0 to go to the
end of the sarray)
Return: saout (output sarray), or null on error
Notes: (1) This makes @saout consisting of copies of all strings in @sain in the index set [first ... last]. Use @last == 0 to get all strings from @first to the last string in the sarray.
Input: sain (input sarray)
substr (<optional> substring for matching; can be NULL)
Return: saout (output sarray, filtered with substring) or null on error
Notes: (1) This selects all strings in sain that have substr as a substring. Note that we can't use strncmp() because we're looking for a match to the substring anywhere within each filename. (2) If substr == NULL, returns a copy of the sarray.
Input: saout (output sarray; can be NULL or equal to sain)
sain (input sarray)
sortorder (L_SORT_INCREASING or L_SORT_DECREASING)
Return: saout (output sarray, sorted by ascii value), or null on error
Notes: (1) Set saout = sain for in-place; otherwise, set naout = NULL. (2) Shell sort, modified from K&R, 2nd edition, p.62. Slow but simple O(n logn) sort.
Input: sain
naindex (na that maps from the new sarray to the input sarray)
Return: saout (sorted), or null on error
Input: sarray
addnlflag (flag: 0 adds nothing to each substring
1 adds '\n' to each substring
2 adds ' ' to each substring)
Return: dest string, or null on error
Notes: (1) Concatenates all the strings in the sarray, preserving all white space. (2) If addnlflag != 0, adds either a '
' or a ' ' after each substring. (3) This function was NOT implemented as: for (i = 0; i < n; i++) strcat(dest, sarrayGetString(sa, i, L_NOCOPY)); Do you see why?
| LEPT_DLL char* sarrayToStringRange | ( | SARRAY * | sa, |
| l_int32 | first, | ||
| l_int32 | nstrings, | ||
| l_int32 | addnlflag | ||
| ) |
Input: sarray
first (index of first string to use; starts with 0)
nstrings (number of strings to append into the result; use
0 to append to the end of the sarray)
addnlflag (flag: 0 adds nothing to each substring
1 adds '\n' to each substring
2 adds ' ' to each substring)
Return: dest string, or null on error
Notes: (1) Concatenates the specified strings inthe sarray, preserving all white space. (2) If addnlflag != 0, adds either a '
' or a ' ' after each substring. (3) If the sarray is empty, this returns a string with just the character corresponding to @addnlflag.
Input: sa1, sa2 Return: sad (with the union of the string set), or null on error
Notes: (1) Duplicates are removed from the concatenation of the two arrays. (2) The key for each string is a 64-bit hash. (2) Algorithm: Concatenate the two sarrays. Then build a set, using hashed strings as keys. As the set is built, first do a find; if not found, add the key to the set and add the string to the output sarray. This is O(nlogn).
Input: filename
sarray
Return: 0 if OK; 1 on error
Input: stream
sarray
Returns 0 if OK; 1 on error
Notes: (1) This appends a '
' to each string, which is stripped off by sarrayReadStream().
| LEPT_DLL void scaleAreaMapLow2 | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | d, | ||
| l_int32 | wpls | ||
| ) |
Note: This function is called with either 8 bpp gray or 32 bpp RGB. The result is a 2x reduced dest.
| LEPT_DLL l_int32 scaleBinaryLow | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | wpls | ||
| ) |
| LEPT_DLL l_int32 scaleBySamplingLow | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | d, | ||
| l_int32 | wpls | ||
| ) |
Notes: (1) The dest must be cleared prior to this operation, and we clear it here in the low-level code. (2) We reuse dest pixels and dest pixel rows whenever possible. This speeds the upscaling; downscaling is done by strict subsampling and is unaffected. (3) Because we are sampling and not interpolating, this routine works directly, without conversion to full RGB color, for 2, 4 or 8 bpp palette color images.
| LEPT_DLL void scaleColor2xLILineLow | ( | l_uint32 * | lined, |
| l_int32 | wpld, | ||
| l_uint32 * | lines, | ||
| l_int32 | ws, | ||
| l_int32 | wpls, | ||
| l_int32 | lastlineflag | ||
| ) |
Input: lined (ptr to top destline, to be made from current src line)
wpld
lines (ptr to current src line)
ws
wpls
lastlineflag (1 if last src line; 0 otherwise)
Return: void
*** Warning: implicit assumption about RGB component ordering ***
| LEPT_DLL void scaleColor2xLILow | ( | l_uint32 * | datad, |
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | wpls | ||
| ) |
This is a special case of 2x expansion by linear interpolation. Each src pixel contains 4 dest pixels. The 4 dest pixels in src pixel 1 are numbered at their UL corners. The 4 dest pixels in src pixel 1 are related to that src pixel and its 3 neighboring src pixels as follows:
1-----2-----|-----|-----|
| | | | |
| | | | |
| src 1 --> 3--—4--— | <– src 2 | ||
|---|---|---|---|
| --— | --— | --— | --— |
src 3 --> | | | | | <– src 4
| --— | --— | --— | --— |
dest src
dp1 = sp1 dp2 = (sp1 + sp2) / 2 dp3 = (sp1 + sp3) / 2 dp4 = (sp1 + sp2 + sp3 + sp4) / 4
We iterate over the src pixels, and unroll the calculation for each set of 4 dest pixels corresponding to that src pixel, caching pixels for the next src pixel whenever possible. The method is exactly analogous to the one we use for scaleGray2xLILow() and its line version.
P3 speed is about 5 x 10^7 dst pixels/sec/GHz
| LEPT_DLL void scaleColorAreaMapLow | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | wpls | ||
| ) |
This should only be used for downscaling. We choose to divide each pixel into 16 x 16 sub-pixels. This is much slower than scaleSmoothLow(), but it gives a better representation, esp. for downscaling factors between 1.5 and 5. All src pixels are subdivided into 256 sub-pixels, and are weighted by the number of sub-pixels covered by the dest pixel. This is about 2x slower than scaleSmoothLow(), but the results are significantly better on small text.
| LEPT_DLL void scaleColorLILow | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | wpls | ||
| ) |
We choose to divide each pixel into 16 x 16 sub-pixels. Linear interpolation is equivalent to finding the fractional area (i.e., number of sub-pixels divided by 256) associated with each of the four nearest src pixels, and weighting each pixel value by this fractional area.
P3 speed is about 7 x 10^6 dst pixels/sec/GHz
| LEPT_DLL void scaleGray2xLILineLow | ( | l_uint32 * | lined, |
| l_int32 | wpld, | ||
| l_uint32 * | lines, | ||
| l_int32 | ws, | ||
| l_int32 | wpls, | ||
| l_int32 | lastlineflag | ||
| ) |
Input: lined (ptr to top destline, to be made from current src line)
wpld
lines (ptr to current src line)
ws
wpls
lastlineflag (1 if last src line; 0 otherwise)
Return: void
| LEPT_DLL void scaleGray2xLILow | ( | l_uint32 * | datad, |
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | wpls | ||
| ) |
This is a special case of 2x expansion by linear interpolation. Each src pixel contains 4 dest pixels. The 4 dest pixels in src pixel 1 are numbered at their UL corners. The 4 dest pixels in src pixel 1 are related to that src pixel and its 3 neighboring src pixels as follows:
1-----2-----|-----|-----|
| | | | |
| | | | |
| src 1 --> 3--—4--— | <– src 2 | ||
|---|---|---|---|
| --— | --— | --— | --— |
src 3 --> | | | | | <– src 4
| --— | --— | --— | --— |
dest src
dp1 = sp1 dp2 = (sp1 + sp2) / 2 dp3 = (sp1 + sp3) / 2 dp4 = (sp1 + sp2 + sp3 + sp4) / 4
We iterate over the src pixels, and unroll the calculation for each set of 4 dest pixels corresponding to that src pixel, caching pixels for the next src pixel whenever possible.
| LEPT_DLL void scaleGray4xLILineLow | ( | l_uint32 * | lined, |
| l_int32 | wpld, | ||
| l_uint32 * | lines, | ||
| l_int32 | ws, | ||
| l_int32 | wpls, | ||
| l_int32 | lastlineflag | ||
| ) |
Input: lined (ptr to top destline, to be made from current src line)
wpld
lines (ptr to current src line)
ws
wpls
lastlineflag (1 if last src line; 0 otherwise)
Return: void
| LEPT_DLL void scaleGray4xLILow | ( | l_uint32 * | datad, |
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | wpls | ||
| ) |
This is a special case of 4x expansion by linear interpolation. Each src pixel contains 16 dest pixels. The 16 dest pixels in src pixel 1 are numbered at their UL corners. The 16 dest pixels in src pixel 1 are related to that src pixel and its 3 neighboring src pixels as follows:
1---2---3---4---|---|---|---|---|
| | | | | | | | |
5---6---7---8---|---|---|---|---|
| | | | | | | | |
src 1 --> 9—a—b—c—|—|—|—|—| <– src 2 | | | | | | | | | d—e—f—g—|—|—|—|—| | | | | | | | | |
| === | === | === | === | === | === | === | === |
|---|---|---|---|---|---|---|---|
| — | — | — | — | — | — | — | — |
src 3 --> |—|—|—|—|—|—|—|—| <– src 4
| — | — | — | — | — | — | — | — |
dest src
dp1 = sp1 dp2 = (3 * sp1 + sp2) / 4 dp3 = (sp1 + sp2) / 2 dp4 = (sp1 + 3 * sp2) / 4 dp5 = (3 * sp1 + sp3) / 4 dp6 = (9 * sp1 + 3 * sp2 + 3 * sp3 + sp4) / 16 dp7 = (3 * sp1 + 3 * sp2 + sp3 + sp4) / 8 dp8 = (3 * sp1 + 9 * sp2 + 1 * sp3 + 3 * sp4) / 16 dp9 = (sp1 + sp3) / 2 dp10 = (3 * sp1 + sp2 + 3 * sp3 + sp4) / 8 dp11 = (sp1 + sp2 + sp3 + sp4) / 4 dp12 = (sp1 + 3 * sp2 + sp3 + 3 * sp4) / 8 dp13 = (sp1 + 3 * sp3) / 4 dp14 = (3 * sp1 + sp2 + 9 * sp3 + 3 * sp4) / 16 dp15 = (sp1 + sp2 + 3 * sp3 + 3 * sp4) / 8 dp16 = (sp1 + 3 * sp2 + 3 * sp3 + 9 * sp4) / 16
We iterate over the src pixels, and unroll the calculation for each set of 16 dest pixels corresponding to that src pixel, caching pixels for the next src pixel whenever possible.
| LEPT_DLL void scaleGrayAreaMapLow | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | wpls | ||
| ) |
This should only be used for downscaling. We choose to divide each pixel into 16 x 16 sub-pixels. This is about 2x slower than scaleSmoothLow(), but the results are significantly better on small text, esp. for downscaling factors between 1.5 and 5. All src pixels are subdivided into 256 sub-pixels, and are weighted by the number of sub-pixels covered by the dest pixel.
| LEPT_DLL void scaleGrayLILow | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | wpls | ||
| ) |
We choose to divide each pixel into 16 x 16 sub-pixels. Linear interpolation is equivalent to finding the fractional area (i.e., number of sub-pixels divided by 256) associated with each of the four nearest src pixels, and weighting each pixel value by this fractional area.
| LEPT_DLL l_int32 scaleMipmapLow | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas1, | ||
| l_int32 | wpls1, | ||
| l_uint32 * | datas2, | ||
| l_int32 | wpls2, | ||
| l_float32 | red | ||
| ) |
See notes in scale.c for pixScaleToGrayMipmap(). This function is here for pedagogical reasons. It gives poor results on document images because of aliasing.
| LEPT_DLL void scaleRGBToGray2Low | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_float32 | rwt, | ||
| l_float32 | gwt, | ||
| l_float32 | bwt | ||
| ) |
Notes: (1) This function is called with 32 bpp RGB src and 8 bpp, half-resolution dest. The weights should add to 1.0.
| LEPT_DLL l_int32 scaleSmoothLow | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | ws, | ||
| l_int32 | hs, | ||
| l_int32 | d, | ||
| l_int32 | wpls, | ||
| l_int32 | size | ||
| ) |
Notes: (1) This function is called on 8 or 32 bpp src and dest images. (2) size is the full width of the lowpass smoothing filter. It is correlated with the reduction ratio, being the nearest integer such that size is approximately equal to hs / hd.
| LEPT_DLL void scaleToGray16Low | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 * | tab8 | ||
| ) |
Input: usual image variables
tab8 (made from makePixelSumTab8())
Return: 0 if OK; 1 on error.
The output is processed one dest byte at a time, corresponding to 16 rows consisting each of 2 src bytes in the input image. This uses one lookup table, tab8, which gives the sum of ON pixels in a byte. After summing for all ON pixels in the 32 src bytes, which is between 0 and 256, this is converted to an 8 bpp grayscale value between 0 (for 255 or 256 bits ON) and 255 (for 0 bits ON).
| LEPT_DLL void scaleToGray2Low | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_uint32 * | sumtab, | ||
| l_uint8 * | valtab | ||
| ) |
Input: usual image variables
sumtab (made from makeSumTabSG2())
valtab (made from makeValTabSG2())
Return: 0 if OK; 1 on error.
The output is processed in sets of 4 output bytes on a row, corresponding to 4 2x2 bit-blocks in the input image. Two lookup tables are used. The first, sumtab, gets the sum of ON pixels in 4 sets of two adjacent bits, storing the result in 4 adjacent bytes. After sums from two rows have been added, the second table, valtab, converts from the sum of ON pixels in the 2x2 block to an 8 bpp grayscale value between 0 (for 4 bits ON) and 255 (for 0 bits ON).
| LEPT_DLL void scaleToGray3Low | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_uint32 * | sumtab, | ||
| l_uint8 * | valtab | ||
| ) |
Input: usual image variables
sumtab (made from makeSumTabSG3())
valtab (made from makeValTabSG3())
Return: 0 if OK; 1 on error
Each set of 8 3x3 bit-blocks in the source image, which consist of 72 pixels arranged 24 pixels wide by 3 scanlines, is converted to a row of 8 8-bit pixels in the dest image. These 72 pixels of the input image are runs of 24 pixels in three adjacent scanlines. Each run of 24 pixels is stored in the 24 LSbits of a 32-bit word. We use 2 LUTs. The first, sumtab, takes 6 of these bits and stores sum, taken 3 bits at a time, in two bytes. (See makeSumTabSG3). This is done for each of the 3 scanlines, and the results are added. We now have the sum of ON pixels in the first two 3x3 blocks in two bytes. The valtab LUT then converts these values (which go from 0 to 9) to grayscale values between between 255 and 0. (See makeValTabSG3). This process is repeated for each of the other 3 sets of 6x3 input pixels, giving 8 output pixels in total.
Note: because the input image is processed in groups of 24 x 3 pixels, the process clips the input height to (h - h % 3) and the input width to (w - w % 24).
| LEPT_DLL void scaleToGray4Low | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_uint32 * | sumtab, | ||
| l_uint8 * | valtab | ||
| ) |
Input: usual image variables
sumtab (made from makeSumTabSG4())
valtab (made from makeValTabSG4())
Return: 0 if OK; 1 on error.
The output is processed in sets of 2 output bytes on a row, corresponding to 2 4x4 bit-blocks in the input image. Two lookup tables are used. The first, sumtab, gets the sum of ON pixels in two sets of four adjacent bits, storing the result in 2 adjacent bytes. After sums from four rows have been added, the second table, valtab, converts from the sum of ON pixels in the 4x4 block to an 8 bpp grayscale value between 0 (for 16 bits ON) and 255 (for 0 bits ON).
| LEPT_DLL void scaleToGray6Low | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 * | tab8, | ||
| l_uint8 * | valtab | ||
| ) |
Input: usual image variables
tab8 (made from makePixelSumTab8())
valtab (made from makeValTabSG6())
Return: 0 if OK; 1 on error
Each set of 4 6x6 bit-blocks in the source image, which consist of 144 pixels arranged 24 pixels wide by 6 scanlines, is converted to a row of 4 8-bit pixels in the dest image. These 144 pixels of the input image are runs of 24 pixels in six adjacent scanlines. Each run of 24 pixels is stored in the 24 LSbits of a 32-bit word. We use 2 LUTs. The first, tab8, takes 6 of these bits and stores sum in one byte. This is done for each of the 6 scanlines, and the results are added. We now have the sum of ON pixels in the first 6x6 block. The valtab LUT then converts these values (which go from 0 to 36) to grayscale values between between 255 and 0. (See makeValTabSG6). This process is repeated for each of the other 3 sets of 6x6 input pixels, giving 4 output pixels in total.
Note: because the input image is processed in groups of 24 x 6 pixels, the process clips the input height to (h - h % 6) and the input width to (w - w % 24).
| LEPT_DLL void scaleToGray8Low | ( | l_uint32 * | datad, |
| l_int32 | wd, | ||
| l_int32 | hd, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 * | tab8, | ||
| l_uint8 * | valtab | ||
| ) |
Input: usual image variables
tab8 (made from makePixelSumTab8())
valtab (made from makeValTabSG8())
Return: 0 if OK; 1 on error.
The output is processed one dest byte at a time, corresponding to 8 rows of src bytes in the input image. Two lookup tables are used. The first, tab8, gets the sum of ON pixels in a byte. After sums from 8 rows have been added, the second table, valtab, converts from this value (which is between 0 and 64) to an 8 bpp grayscale value between 0 (for all 64 bits ON) and 255 (for 0 bits ON).
| LEPT_DLL void seedfillBinaryLow | ( | l_uint32 * | datas, |
| l_int32 | hs, | ||
| l_int32 | wpls, | ||
| l_uint32 * | datam, | ||
| l_int32 | hm, | ||
| l_int32 | wplm, | ||
| l_int32 | connectivity | ||
| ) |
Notes: (1) This is an in-place fill, where the seed image is filled, clipping to the filling mask, in one full cycle of UL -> LR and LR -> UL raster scans. (2) Assume the mask is a filling mask, not a blocking mask. (3) Assume that the RHS pad bits of the mask are properly set to 0. (4) Clip to the smallest dimensions to avoid invalid reads.
| LEPT_DLL void seedfillGrayInvLow | ( | l_uint32 * | datas, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpls, | ||
| l_uint32 * | datam, | ||
| l_int32 | wplm, | ||
| l_int32 | connectivity | ||
| ) |
Notes: (1) The pixels are numbered as follows: 1 2 3 4 x 5 6 7 8 This low-level filling operation consists of two scans, raster and anti-raster, covering the entire seed image. During the anti-raster scan, every pixel p such that its current value could still be propogated during the next raster scanning is put into the FIFO-queue. Next step is the propagation step where where we update and propagate the values using FIFO structure created in anti-raster scan. (2) The "Inv" signifies the fact that in this case, filling of the seed only takes place when the seed value is greater than the mask value. The mask will act to stop the fill when it is higher than the seed level. (This is in contrast to conventional grayscale filling where the seed always fills below the mask.) (3) An example of use is a basin, described by the mask (pixm), where within the basin, the seed pix (pixs) gets filled to the height of the highest seed pixel that is above its corresponding max pixel. Filling occurs while the propagating seed pixels in pixs are larger than the corresponding mask values in pixm. (4) Reference paper : L. Vincent, Morphological grayscale reconstruction in image analysis: applications and efficient algorithms, IEEE Transactions on Image Processing, vol. 2, no. 2, pp. 176-201, 1993.
| LEPT_DLL void seedfillGrayInvLowSimple | ( | l_uint32 * | datas, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpls, | ||
| l_uint32 * | datam, | ||
| l_int32 | wplm, | ||
| l_int32 | connectivity | ||
| ) |
Notes: (1) The pixels are numbered as follows: 1 2 3 4 x 5 6 7 8 This low-level filling operation consists of two scans, raster and anti-raster, covering the entire seed image. The caller typically iterates until the filling is complete. (2) The "Inv" signifies the fact that in this case, filling of the seed only takes place when the seed value is greater than the mask value. The mask will act to stop the fill when it is higher than the seed level. (This is in contrast to conventional grayscale filling where the seed always fills below the mask.) (3) An example of use is a basin, described by the mask (pixm), where within the basin, the seed pix (pixs) gets filled to the height of the highest seed pixel that is above its corresponding max pixel. Filling occurs while the propagating seed pixels in pixs are larger than the corresponding mask values in pixm.
| LEPT_DLL void seedfillGrayLow | ( | l_uint32 * | datas, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpls, | ||
| l_uint32 * | datam, | ||
| l_int32 | wplm, | ||
| l_int32 | connectivity | ||
| ) |
Notes: (1) The pixels are numbered as follows: 1 2 3 4 x 5 6 7 8 This low-level filling operation consists of two scans, raster and anti-raster, covering the entire seed image. This is followed by a breadth-first propagation operation to complete the fill. During the anti-raster scan, every pixel p whose current value could still be propagated after the anti-raster scan is put into the FIFO queue. The propagation step is a breadth-first fill to completion. Unlike the simple grayscale seedfill pixSeedfillGraySimple(), where at least two full raster/anti-raster iterations are required for completion and verification, the hybrid method uses only a single raster/anti-raster set of scans. (2) The filling action can be visualized from the following example. Suppose the mask, which clips the fill, is a sombrero-shaped surface, where the highest point is 200 and the low pixels around the rim are 30. Beyond the rim, the mask goes up a bit. Suppose the seed, which is filled, consists of a single point of height 150, located below the max of the mask, with the rest 0. Then in the raster scan, nothing happens until the high seed point is encountered, and then this value is propagated right and down, until it hits the side of the sombrero. The seed can never exceed the mask, so it fills to the rim, going lower along the mask surface. When it passes the rim, the seed continues to fill at the rim height to the edge of the seed image. Then on the anti-raster scan, the seed fills flat inside the sombrero to the upper and left, and then out from the rim as before. The final result has a seed that is flat outside the rim, and inside it fills the sombrero but only up to 150. If the rim height varies, the filled seed outside the rim will be at the highest point on the rim, which is a saddle point on the rim. (3) Reference paper : L. Vincent, Morphological grayscale reconstruction in image analysis: applications and efficient algorithms, IEEE Transactions on Image Processing, vol. 2, no. 2, pp. 176-201, 1993.
| LEPT_DLL void seedfillGrayLowSimple | ( | l_uint32 * | datas, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpls, | ||
| l_uint32 * | datam, | ||
| l_int32 | wplm, | ||
| l_int32 | connectivity | ||
| ) |
Notes: (1) The pixels are numbered as follows: 1 2 3 4 x 5 6 7 8 This low-level filling operation consists of two scans, raster and anti-raster, covering the entire seed image. The caller typically iterates until the filling is complete. (2) The filling action can be visualized from the following example. Suppose the mask, which clips the fill, is a sombrero-shaped surface, where the highest point is 200 and the low pixels around the rim are 30. Beyond the rim, the mask goes up a bit. Suppose the seed, which is filled, consists of a single point of height 150, located below the max of the mask, with the rest 0. Then in the raster scan, nothing happens until the high seed point is encountered, and then this value is propagated right and down, until it hits the side of the sombrero. The seed can never exceed the mask, so it fills to the rim, going lower along the mask surface. When it passes the rim, the seed continues to fill at the rim height to the edge of the seed image. Then on the anti-raster scan, the seed fills flat inside the sombrero to the upper and left, and then out from the rim as before. The final result has a seed that is flat outside the rim, and inside it fills the sombrero but only up to 150. If the rim height varies, the filled seed outside the rim will be at the highest point on the rim, which is a saddle point on the rim.
| LEPT_DLL void seedspreadLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datat, | ||
| l_int32 | wplt, | ||
| l_int32 | connectivity | ||
| ) |
See pixSeedspread() for a brief description of the algorithm here.
Input: sela (<optional>) Return: sela with additional sels, or null on error
Notes: (1) Adds the following sels:
| LEPT_DLL SELA* selaAddCrossJunctions | ( | SELA * | sela, |
| l_float32 | hlsize, | ||
| l_float32 | mdist, | ||
| l_int32 | norient, | ||
| l_int32 | debugflag | ||
| ) |
Input: sela (<optional>)
hlsize (length of each line of hits from origin)
mdist (distance of misses from the origin)
norient (number of orientations; max of 8)
debugflag (1 for debug output)
Return: sela with additional sels, or null on error
Notes: (1) Adds hitmiss Sels for the intersection of two lines. If the lines are very thin, they must be nearly orthogonal to register. (2) The number of Sels generated is equal to @norient. (3) If @norient == 2, this generates 2 Sels of crosses, each with two perpendicular lines of hits. One Sel has horizontal and vertical hits; the other has hits along lines at +-45 degrees. Likewise, if @norient == 3, this generates 3 Sels of crosses oriented at 30 degrees with each other. (4) It is suggested that @hlsize be chosen at least 1 greater than @mdist. Try values of (@hlsize, @mdist) such as (6,5), (7,6), (8,7), (9,7), etc.
Input: sela (<optional>) Return: sela with additional sels, or null on error
Notes: (1) Adds all comb (horizontal, vertical) Sels that are used in composite linear morphological operations up to 63 pixels in length, which are the sizes over which dwa code can be generated.
Input: sela (<optional>) Return: sela with additional sels, or null on error
Notes: (1) Adds all linear (horizontal, vertical) sels from 2 to 63 pixels in length, which are the sizes over which dwa code can be generated.
Input: sela (<optional>) Return: sela with additional sels, or null on error
Input: sela
sel to be added
selname (ignored if already defined in sel;
req'd in sel when added to a sela)
copyflag (for sel: 0 inserts, 1 copies)
Return: 0 if OK; 1 on error
Notes: (1) This adds a sel, either inserting or making a copy. (2) Because every sel in a sela must have a name, it copies the input name if necessary. You can input NULL for selname if the sel already has a name.
| LEPT_DLL SELA* selaAddTJunctions | ( | SELA * | sela, |
| l_float32 | hlsize, | ||
| l_float32 | mdist, | ||
| l_int32 | norient, | ||
| l_int32 | debugflag | ||
| ) |
Input: sela (<optional>)
hlsize (length of each line of hits from origin)
mdist (distance of misses from the origin)
norient (number of orientations; max of 8)
debugflag (1 for debug output)
Return: sela with additional sels, or null on error
Notes: (1) Adds hitmiss Sels for the T-junction of two lines. If the lines are very thin, they must be nearly orthogonal to register. (2) The number of Sels generated is 4 * @norient. (3) It is suggested that @hlsize be chosen at least 1 greater than @mdist. Try values of (@hlsize, @mdist) such as (6,5), (7,6), (8,7), (9,7), etc.
Input: n (initial number of sel ptrs; use 0 for default) Return: sela, or null on error
Input: filename Return: sela, or null on error
Notes: (1) The file contains a sequence of Sel descriptions. (2) Each Sel is formatted as follows:
sel_5diag "x " " x " " X " " x " " x"
Input: &sela (<to be nulled>) Return: void
| LEPT_DLL PIX* selaDisplayInPix | ( | SELA * | sela, |
| l_int32 | size, | ||
| l_int32 | gthick, | ||
| l_int32 | spacing, | ||
| l_int32 | ncols | ||
| ) |
Input: sela
size (of grid interiors; odd; minimum size of 13 is enforced)
gthick (grid thickness; minimum size of 2 is enforced)
spacing (between sels, both horizontally and vertically)
ncols (number of sels per "line")
Return: pix (display of all sels in sela), or null on error
Notes: (1) This gives a visual representation of all the sels in a sela. (2) See notes in selDisplayInPix() for display params of each sel. (3) This gives the nicest results when all sels in the sela are the same size.
| LEPT_DLL l_int32 selaFindSelByName | ( | SELA * | sela, |
| const char * | name, | ||
| l_int32 * | pindex, | ||
| SEL ** | psel | ||
| ) |
Input: sela
sel name
&index (<optional, return>)
&sel (<optional, return> sel (not a copy))
Return: 0 if OK; 1 on error
Input: sela
hsize, vsize (of brick sel)
Return: sel name (new string), or null if no name or on error
Input: sela
size (the product of sizes of the brick and comb parts)
direction (L_HORIZ, L_VERT)
Return: sel name (new string), or null if name not found or on error
Notes: (1) Combs are by definition 1-dimensional, either horiz or vert. (2) Use this with comb Sels; e.g., from selaAddDwaCombs().
Input: sela Return: count, or 0 on error
Input: sela
index of sel to be retrieved (not copied)
Return: sel, or null on error
Notes: (1) This returns a ptr to the sel, not a copy, so the caller must not destroy it!
Input: sela Return: sa (of all sel names), or null on error
Input: filename Return: sela, or null on error
Input: stream Return: sela, or null on error
Input: filename
sela
Return: 0 if OK, 1 on error
Input: stream
sela
Return: 0 if OK, 1 on error
Input: height, width
name (<optional> sel name; can be null)
Return: sel, or null on error
Notes: (1) selCreate() initializes all values to 0. (2) After this call, (cy,cx) and nonzero data values must be assigned. If a text name is not assigned here, it will be needed later when the sel is put into a sela.
Input: height, width
cy, cx (origin, relative to UL corner at 0,0)
type (SEL_HIT, SEL_MISS, or SEL_DONT_CARE)
Return: sel, or null on error
Notes: (1) This is a rectangular sel of all hits, misses or don't cares.
Input: factor1 (contiguous space between comb tines)
factor2 (number of comb tines)
direction (L_HORIZ, L_VERT)
Return: sel, or null on error
Notes: (1) This generates a comb Sel of hits with the origin as near the center as possible.
Input: pixs (cmapped or rgb)
selname (<optional> sel name; can be null)
Return: sel if OK, null on error
Notes: (1) The sel size is given by the size of pixs. (2) In pixs, hits are represented by green pixels, misses by red pixels, and don't-cares by white pixels. (3) In pixs, there may be no misses, but there must be at least 1 hit. (4) At most there can be only one origin pixel, which is optionally specified by using a lower-intensity pixel: if a hit: dark green if a miss: dark red if a don't care: gray If there is no such pixel, the origin defaults to the approximate center of the sel.
Input: pix
cy, cx (origin of sel)
name (<optional> sel name; can be null)
Return: sel, or null on error
Notes: (1) The origin must be positive.
Input: pta
cy, cx (origin of sel)
name (<optional> sel name; can be null)
Return: sel (of minimum required size), or null on error
Notes: (1) The origin and all points in the pta must be positive.
Input: text
height, width
name (<optional> sel name; can be null)
Return: sel of the given size, or null on error
Notes: (1) The text is an array of chars (in row-major order) where each char can be one of the following: 'x': hit 'o': miss ' ': don't-care (2) Use an upper case char to indicate the origin of the Sel. When the origin falls on a don't-care, use 'C' as the uppecase for ' '. (3) The text can be input in a format that shows the 2D layout; e.g., static const char *seltext = "x " "x Oo " "x " "xxxxx";
Input: &sel (<to be nulled>) Return: void
Input: sel
size (of grid interiors; odd; minimum size of 13 is enforced)
gthick (grid thickness; minimum size of 2 is enforced)
Return: pix (display of sel), or null on error
Notes: (1) This gives a visual representation of a general (hit-miss) sel. (2) The empty sel is represented by a grid of intersecting lines. (3) Three different patterns are generated for the sel elements:
| LEPT_DLL l_int32 selectComposableSels | ( | l_int32 | size, |
| l_int32 | direction, | ||
| SEL ** | psel1, | ||
| SEL ** | psel2 | ||
| ) |
Input: size (of sel to be decomposed)
&factor1 (<return> larger factor)
&factor2 (<return> smaller factor)
Return: 0 if OK, 1 on error
Notes: (1) This works for Sel sizes up to 62500, which seems sufficient. (2) The composable sel size is typically within +- 1 of the requested size. Up to size = 300, the maximum difference is +- 2. (3) We choose an overall cost function where the penalty for the size difference between input and actual is 4 times the penalty for additional rasterops. (4) Returned values: factor1 >= factor2 If size > 1, then factor1 > 1.
Input: pix
&type (<return> L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE)
Notes: (1) This attempts to choose an encoding for the pix that results in the smallest file, assuming that if jpeg encoded, it will use quality = 75. The decision is approximate, in that (a) all colormapped images will be losslessly encoded with gzip (flate), and (b) an image with less than about 20 colors is likely to be smaller if flate encoded than if encoded as a jpeg (dct). For example, an image made by pixScaleToGray3() will have 10 colors, and flate encoding will give about twice the compression as jpeg with quality = 75.
| LEPT_DLL l_int32 selFindMaxTranslations | ( | SEL * | sel, |
| l_int32 * | pxp, | ||
| l_int32 * | pyp, | ||
| l_int32 * | pxn, | ||
| l_int32 * | pyn | ||
| ) |
Input: sel
&xp, &yp, &xn, &yn (<return> max shifts)
Return: 0 if OK; 1 on error
Note: these are the maximum shifts for the erosion operation. For example, when j < cx, the shift of the image is +x to the cx. This is a positive xp shift.
Input: sel
row
col
&type (<return> SEL_HIT, SEL_MISS, SEL_DONT_CARE)
Return: 0 if OK; 1 on error
Input: sel Return: sel name (not copied), or null if no name or on error
| LEPT_DLL l_int32 selGetParameters | ( | SEL * | sel, |
| l_int32 * | psy, | ||
| l_int32 * | psx, | ||
| l_int32 * | pcy, | ||
| l_int32 * | pcx | ||
| ) |
Input: sel
&sy, &sx, &cy, &cx (<optional return>; each can be null)
Return: 0 if OK, 1 on error
Input: sel
&type (<return> SEL_HIT, SEL_MISS, SEL_DONT_CARE)
Return: 0 if OK; 1 on error or if origin is not found
Input: sel Return: str (string; caller must free)
Notes: (1) This is an inverse function of selCreateFromString. It prints a textual representation of the SEL to a malloc'd string. The format is the same as selCreateFromString except that newlines are inserted into the output between rows. (2) This is useful for debugging. However, if you want to save some Sels in a file, put them in a Sela and write them out with selaWrite(). They can then be read in with selaRead().
Input: pathname Return: sel if OK; null on error
Notes: (1) Loads an image from a file and creates a (hit-miss) sel. (2) The sel name is taken from the pathname without the directory and extension.
Input: stream Return: sel, or null on error
Input: sel
quads (0 - 4; number of 90 degree cw rotations)
Return: seld, or null on error
Input: sel
row
col
type (SEL_HIT, SEL_MISS, SEL_DONT_CARE)
Return: 0 if OK; 1 on error
Notes: (1) Because we use row and column to index into an array, they are always non-negative. The location of the origin (and the type of operation) determine the actual direction of the rasterop.
Input: sel
name (<optional>; can be null)
Return: 0 if OK, 1 on error
Notes: (1) Always frees the existing sel name, if defined. (2) If name is not defined, just clears any existing sel name.
Input: sel
cy, cx
Return: 0 if OK; 1 on error
Input: filename
sel
Return: 0 if OK, 1 on error
Input: stream
sel
Return: 0 if OK, 1 on error
Input: newsev Return: oldsev
Notes: (1) setMsgSeverity() allows the user to specify the desired message severity threshold. Messages of equal or greater severity will be output. The previous message severity is returned when the new severity is set. (2) If L_SEVERITY_EXTERNAL is passed, then the severity will be obtained from the LEPT_MSG_SEVERITY environment variable. If the environmental variable is not set, a warning is issued.
Input: line (ptr to beginning of line),
x (pixel location in line)
depth (bpp)
val (to be inserted)
Return: void
Notes: (1) Caution: input variables are not checked!
| LEPT_DLL void setPixMemoryManager | ( | void * | (*allocator)(size_t), |
| void((*deallocator)(void *)) | |||
| ) |
Input: allocator (<optional>; use null to skip)
deallocator (<optional>; use null to skip)
Return: void
Notes: (1) Use this to change the alloc and/or dealloc functions; e.g., setPixMemoryManager(my_malloc, my_free). (2) The C99 standard (section 6.7.5.3, par. 8) says: A declaration of a parameter as "function returning type" shall be adjusted to "pointer to function returning type" so that it can be in either of these two forms: (a) type (function-ptr(type, ...)) (b) type ((*function-ptr)(type, ...)) because form (a) is implictly converted to form (b), as in the definition of struct PixMemoryManager above. So, for example, we should be able to declare either of these: (a) void *(allocator(size_t)) (b) void *((*allocator)(size_t)) However, MSVC++ only accepts the second version.
| LEPT_DLL void shiftDataHorizontalLow | ( | l_uint32 * | datad, |
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 | shift | ||
| ) |
Input: datad (ptr to beginning of dest line)
wpld (wpl of dest)
datas (ptr to beginning of src line)
wpls (wpl of src)
shift (horizontal shift of block; >0 is to right)
Return: void
Notes: (1) This can also be used for in-place operation; see, e.g., rasteropHipLow(). (2) We are clearing the pixels that are shifted in from outside the image. This can be overridden by the incolor parameter in higher-level functions that call this.
Input: pathname (full path; can be a directory)
&dir (<optional return> root directory name of
input path, including trailing '/')
&tail (<optional return> path tail, which is either
the file name within the root directory or
the last sub-directory in the path)
Return: 0 if OK, 1 on error
Notes: (1) If you only want the tail, input null for the root directory ptr. (2) If you only want the root directory name, input null for the tail ptr. (3) This function makes decisions based only on the lexical structure of the input. Examples: /usr/tmp/abc --> dir: /usr/tmp/ tail: abc /usr/tmp/ --> dir: /usr/tmp/ tail: [empty string] /usr/tmp --> dir: /usr/ tail: tmp abc --> dir: [empty string] tail: abc (4) The input can have either forward (unix) or backward (win) slash separators. The output has unix separators. Note that Win32 pathname functions generally accept both slash forms, but the windows command line interpreter only accepts backward slashes, because forward slashes are used to demarcate switches (vs. dashes in unix).
| LEPT_DLL l_int32 splitPathAtExtension | ( | const char * | pathname, |
| char ** | pbasename, | ||
| char ** | pextension | ||
| ) |
Input: pathname (full path; can be a directory)
&basename (<optional return> pathname not including the
last dot and characters after that)
&extension (<optional return> path extension, which is
the last dot and the characters after it. If
there is no extension, it returns the empty string)
Return: 0 if OK, 1 on error
Notes: (1) If you only want the extension, input null for the basename ptr. (2) If you only want the basename without extension, input null for the extension ptr. (3) This function makes decisions based only on the lexical structure of the input. Examples: /usr/tmp/abc.jpg --> basename: /usr/tmp/abc ext: .jpg /usr/tmp/.jpg --> basename: /usr/tmp/ ext: .jpg /usr/tmp.jpg/ --> basename: /usr/tmp.jpg/ ext: [empty str] ./.jpg --> basename: ./ ext: .jpg (4) The input can have either forward (unix) or backward (win) slash separators. The output has unix separators.
Input: textstring
splitting flag (see enum in bmf.h; valid values in {1,2,3})
Return: sarray (where each string is a paragraph of the input),
or null on error.
| LEPT_DLL l_int32 sreadHeaderSpix | ( | const l_uint32 * | data, |
| l_int32 * | pwidth, | ||
| l_int32 * | pheight, | ||
| l_int32 * | pbps, | ||
| l_int32 * | pspp, | ||
| l_int32 * | piscmap | ||
| ) |
Input: data
&width (<return>)
&height (<return>)
&bps (<return>, bits/sample)
&spp (<return>, samples/pixel)
&iscmap (<optional return>; input NULL to ignore)
Return: 0 if OK, 1 on error
Notes: (1) If there is a colormap, iscmap is returned as 1; else 0.
| LEPT_DLL void startTimer | ( | void | ) |
Notes: (1) These measure the cpu time elapsed between the two calls: startTimer(); .... fprintf(stderr, "Elapsed time = %7.3f sec\n", stopTimer());
startTimerNested(), stopTimerNested()
Example of usage:
L_TIMER t1 = startTimerNested(); .... L_TIMER t2 = startTimerNested(); .... fprintf(stderr, "Elapsed time 2 = %7.3f sec\n", stopTimerNested(t2)); .... fprintf(stderr, "Elapsed time 1 = %7.3f sec\n", stopTimerNested(t1));
| LEPT_DLL L_WALLTIMER* startWallTimer | ( | void | ) |
startWallTimer() Input: void Return: walltimer-ptr
stopWallTimer() Input: &walltimer-ptr Return: time (wall time elapsed in seconds)
Notes: (1) These measure the wall clock time elapsed between the two calls: L_WALLTIMER *timer = startWallTimer(); .... fprintf(stderr, "Elapsed time = %f sec\n", stopWallTimer(&timer); (2) Note that the timer object is destroyed by stopWallTimer().
| LEPT_DLL l_float32 stopWallTimer | ( | L_WALLTIMER ** | ptimer | ) |
Input: fileno (integer that labels the two output files) Return: initialized L_StrCode, or null on error
Notes: (1) This struct exists to build two files containing code for any number of data objects. The two files are named autogen.<fileno>.c autogen.<fileno>.h
Input: filein (containing filenames of serialized data)
fileno (integer that labels the two output files)
outdir (<optional> if null, files are made in /tmp/lept/auto)
Return: 0 if OK, 1 on error
Notes: (1) The @filein has one filename on each line. Comment lines begin with "#". (2) The output is 2 files: autogen.<fileno>.c autogen.<fileno>.h
Input: &strcode (destroys after .c and .h files have been generated)
outdir (<optional> if null, files are made in /tmp/lept/auto)
Return: void
Input: strcode (for accumulating data)
filein (input file with serialized data)
type (of data; use the typedef string)
Return: 0 if OK, 1 on error.
Notes: (1) The generated function name is l_autodecode_<fileno>() where <fileno> is the index label for the pair of output files. (2) To deserialize this data, the function is called with the argument 'ifunc', which increments each time strcodeGenerate() is called.
Input: dest (null-terminated byte buffer)
size (size of dest)
src string (can be null or null-terminated string)
Return: number of bytes added to dest; -1 on error
Notes: (1) Alternative implementation of strncat, that checks the input, is easier to use (since the size of the dest buffer is specified rather than the number of bytes to copy), and does not complain if @src is null. (2) Never writes past end of dest. (3) If it can't append src (an error), it does nothing. (4) N.B. The order of 2nd and 3rd args is reversed from that in strncat, as in the Windows function strcat_s().
Input: str1
str2
Return: 1 if str1 > str2 (lexically); 0 otherwise
Notes: (1) If the lexical values are identical, return a 0, to indicate that no swapping is required to sort the strings.
| LEPT_DLL char* stringConcatNew | ( | const char * | first, |
| ... | |||
| ) |
Input: first (first string in list)
varargs (NULL-terminated list of strings)
Return: result (new string concatenating the input strings), or
NULL if first == NULL
Notes: (1) The last arg in the list of strings must be NULL. (2) Caller must free the returned string.
Input: dest (existing byte buffer)
src string (<optional> can be null)
n (max number of characters to copy)
Return: 0 if OK, 1 on error
Notes: (1) Relatively safe wrapper for strncpy, that checks the input, and does not complain if @src is null or
< 1. If
< 1, this is a no-op. (2) @dest needs to be at least
bytes in size. (3) We don't call strncpy() because valgrind complains about use of uninitialized values.
Input: src (input string; can be of zero length)
sub (substring to be searched for)
&loc (<return optional> location of substring in src)
Return: 1 if found; 0 if not found or on error
Notes: (1) This is a wrapper around strstr(). (2) Both @src and @sub must be defined, and @sub must have length of at least 1. (3) If the substring is not found and loc is returned, it has the value -1.
| LEPT_DLL char* stringJoin | ( | const char * | src1, |
| const char * | src2 | ||
| ) |
Input: src1 string (<optional> can be null)
src2 string (<optional> can be null)
Return: concatenated string, or null on error
Notes: (1) This is a safe version of strcat; it makes a new string. (2) It is not an error if either or both of the strings are empty, or if either or both of the pointers are null.
Input: &src1 string (address of src1; cannot be on the stack)
src2 string (<optional> can be null)
Return: 0 if OK, 1 on error
Notes: (1) This is a safe in-place version of strcat. The contents of src1 is replaced by the concatenation of src1 and src2. (2) It is not an error if either or both of the strings are empty (""), or if the pointers to the strings (*psrc1, src2) are null. (3) src1 should be initialized to null or an empty string before the first call. Use one of these: char *src1 = NULL; char *src1 = stringNew(""); Then call with: stringJoinIP(&src1, src2); (4) This can also be implemented as a macro: #define stringJoinIP(src1, src2) \ {tmpstr = stringJoin((src1),(src2)); \ LEPT_FREE(src1); \ (src1) = tmpstr;} (5) Another function to consider for joining many strings is stringConcatNew().
Input: src string (can be null or null-terminated string)
size (size of src buffer)
Return: length of src in bytes.
Notes: (1) Safe implementation of strlen that only checks size bytes for trailing NUL. (2) Valid returned string lengths are between 0 and size - 1. If size bytes are checked without finding a NUL byte, then an error is indicated by returning size.
| LEPT_DLL char* stringNew | ( | const char * | src | ) |
Input: src string Return: dest copy of src string, or null on error
| LEPT_DLL char* stringRemoveChars | ( | const char * | src, |
| const char * | remchars | ||
| ) |
Input: src (input string; can be of zero length)
remchars (string of chars to be removed from src)
Return: dest (string with specified chars removed), or null on error
Input: &dest string (<return> copy)
src string (<optional> can be null)
Return: 0 if OK; 1 on error
Notes: (1) Frees any existing dest string (2) Puts a copy of src string in the dest (3) If either or both strings are null, does something reasonable.
| LEPT_DLL char* stringReplaceEachSubstr | ( | const char * | src, |
| const char * | sub1, | ||
| const char * | sub2, | ||
| l_int32 * | pcount | ||
| ) |
Input: src (input string; can be of zero length)
sub1 (substring to be replaced)
sub2 (substring to put in; can be "")
&count (<optional return > the number of times that sub1
is found in src; 0 if not found)
Return: dest (string with substring replaced), or null if the
substring not found or on error.
Notes: (1) Replaces every instance. (2) To only remove each instance of sub1, use "" for sub2 (3) Returns NULL if sub1 and sub2 are the same.
| LEPT_DLL char* stringReplaceSubstr | ( | const char * | src, |
| const char * | sub1, | ||
| const char * | sub2, | ||
| l_int32 * | pfound, | ||
| l_int32 * | ploc | ||
| ) |
Input: src (input string; can be of zero length)
sub1 (substring to be replaced)
sub2 (substring to put in; can be "")
&found (<return optional> 1 if sub1 is found; 0 otherwise)
&loc (<return optional> location of ptr after replacement)
Return: dest (string with substring replaced), or null if the
substring not found or on error.
Notes: (1) Replaces the first instance. (2) To only remove sub1, use "" for sub2 (3) Returns a new string if sub1 and sub2 are the same. (4) The optional loc is input as the byte offset within the src from which the search starts, and after the search it is the char position in the string of the next character after the substituted string. (5) N.B. If ploc is not null, loc must always be initialized. To search the string from the beginning, set loc = 0.
| LEPT_DLL char* stringReverse | ( | const char * | src | ) |
Input: src (string) Return: dest (newly-allocated reversed string)
| LEPT_DLL l_int32 stringSplitOnToken | ( | char * | cstr, |
| const char * | seps, | ||
| char ** | phead, | ||
| char ** | ptail | ||
| ) |
Input: cstr (input string to be split; not altered)
seps (a string of character separators)
&head (<return> ptr to copy of the input string, up to
the first separator token encountered)
&tail (<return> ptr to copy of the part of the input string
starting with the first non-separator character
that occurs after the first separator is found)
Return: 0 if OK, 1 on error
Notes: (1) The input string is not altered; all split parts are new strings. (2) The split occurs around the first consecutive sequence of tokens encountered. (3) The head goes from the beginning of the string up to but not including the first token found. (4) The tail contains the second part of the string, starting with the first char in that part that is NOT a token. (5) If no separator token is found, 'head' contains a copy of the input string and 'tail' is null.
| LEPT_DLL char* strtokSafe | ( | char * | cstr, |
| const char * | seps, | ||
| char ** | psaveptr | ||
| ) |
Input: cstr (input string to be sequentially parsed;
use NULL after the first call)
seps (a string of character separators)
&saveptr (<return> ptr to the next char after
the last encountered separator)
Return: substr (a new string that is copied from the previous
saveptr up to but not including the next
separator character), or NULL if end of cstr.
Notes: (1) This is a thread-safe implementation of strtok. (2) It has the same interface as strtok_r. (3) It differs from strtok_r in usage in two respects: (a) the input string is not altered (b) each returned substring is newly allocated and must be freed after use. (4) Let me repeat that. This is "safe" because the input string is not altered and because each returned string is newly allocated on the heap. (5) It is here because, surprisingly, some C libraries don't include strtok_r. (6) Important usage points:
Input: array (of 81 numbers, 9 rows of 9 numbers each) Return: l_sudoku, or null on error
Notes: (1) The input array has 0 for the unknown values, and 1-9 for the known initial values. It is generated from a file using sudokuReadInput(), which checks that the file data has 81 numbers in 9 rows.
Input: &l_sudoku (<to be nulled>) Return: void
| LEPT_DLL L_SUDOKU* sudokuGenerate | ( | l_int32 * | array, |
| l_int32 | seed, | ||
| l_int32 | minelems, | ||
| l_int32 | maxtries | ||
| ) |
Input: array (of 81 numbers, 9 rows of 9 numbers each)
seed (random number)
minelems (min non-zero elements allowed; <= 80)
maxtries (max tries to remove a number and get a valid sudoku)
Return: l_sudoku, or null on error
Notes: (1) This is a brute force generator. It starts with a completed sudoku solution and, by removing elements (setting them to 0), generates a valid (unique) sudoku initial condition. (2) The process stops when either @minelems, the minimum number of non-zero elements, is reached, or when the number of attempts to remove the next element exceeds @maxtries. (3) No sudoku is known with less than 17 nonzero elements.
Input: l_sudoku (at any stage)
arraytype (L_SUDOKU_INIT, L_SUDOKU_STATE)
Return: void
Notes: (1) Prints either the initial array or the current state of the solution.
Input: filename (of formatted sudoku file) Return: array (of 81 numbers), or null on error
Notes: (1) The file format has:
Input: str (of input data) Return: array (of 81 numbers), or null on error
Notes: (1) The string is formatted as 81 single digits, each separated by 81 spaces.
Input: l_sudoku (starting in initial state)
Return: 1 on success, 0 on failure to solve (note reversal of
typical unix returns)
Input: array (of 81 numbers, 9 lines of 9 numbers each)
&punique (<return> 1 if unique, 0 if not)
Return: 0 if OK, 1 on error
Notes: (1) This applies the brute force method to all four 90 degree rotations. If there is more than one solution, it is highly unlikely that all four results will be the same; consequently, if they are the same, the solution is most likely to be unique.
| LEPT_DLL void thresholdTo2bppLow | ( | l_uint32 * | datad, |
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 * | tab | ||
| ) |
| LEPT_DLL void thresholdTo4bppLow | ( | l_uint32 * | datad, |
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | wpls, | ||
| l_int32 * | tab | ||
| ) |
| LEPT_DLL void thresholdToBinaryLineLow | ( | l_uint32 * | lined, |
| l_int32 | w, | ||
| l_uint32 * | lines, | ||
| l_int32 | d, | ||
| l_int32 | thresh | ||
| ) |
| LEPT_DLL void thresholdToBinaryLow | ( | l_uint32 * | datad, |
| l_int32 | w, | ||
| l_int32 | h, | ||
| l_int32 | wpld, | ||
| l_uint32 * | datas, | ||
| l_int32 | d, | ||
| l_int32 | wpls, | ||
| l_int32 | thresh | ||
| ) |
| LEPT_DLL l_int32 writeImageCompressedToPSFile | ( | const char * | filein, |
| const char * | fileout, | ||
| l_int32 | res, | ||
| l_int32 * | pfirstfile, | ||
| l_int32 * | pindex | ||
| ) |
| LEPT_DLL l_int32 writeMultipageTiff | ( | const char * | dirin, |
| const char * | substr, | ||
| const char * | fileout | ||
| ) |
Input: wshed (generated from wshedCreate()) Return: 0 if OK, 1 on error
Iportant note: (1) This is buggy. It seems to locate watersheds that are duplicates. The watershed extraction after complete fill grabs some regions belonging to existing watersheds. See prog/watershedtest.c for testing.
Input: wshed
&pixa (<optional return> mask of watershed basins)
&nalevels (<optional return> watershed levels)
Return: 0 if OK, 1 on error
Input: pixs (8 bpp source)
pixm (1 bpp 'marker' seed)
mindepth (minimum depth; anything less is not saved)
debugflag (1 for debug output)
Return: WShed, or null on error
Notes: (1) It is not necessary for the fg pixels in the seed image be at minima, or that they be isolated. We extract a single pixel from each connected component, and a seed anywhere in a watershed will eventually label the watershed when the filling level reaches it. (2) Set mindepth to some value to ignore noise in pixs that can create small local minima. Any watershed shallower than mindepth, even if it has a seed, will not be saved; It will either be incorporated in another watershed or eliminated.
Input: &wshed (<will be set to null before returning>) Return: void
Input: wshed Return: pixd (initial image with all basins filled), or null on error
Input: wshed Return: pixd (initial image with all basins filled), or null on error
Input: datain (byte buffer with input data)
nin (number of bytes of input data)
&nout (<return> number of bytes of output data)
Return: dataout (compressed data), or null on error
Notes: (1) We repeatedly read in and fill up an input buffer, compress the data, and read it back out. zlib uses two byte buffers internally in the z_stream data structure. We use the bbuffers to feed data into the fixed bufferin, and feed it out of bufferout, in the same way that a pair of streams would normally be used if the data were being read from one file and written to another. This is done iteratively, compressing L_BUF_SIZE bytes of input data at a time.
Input: datain (byte buffer with compressed input data)
nin (number of bytes of input data)
&nout (<return> number of bytes of output data)
Return: dataout (uncompressed data), or null on error
Notes: (1) See zlibCompress().