Leptonica 1.83.1
Image processing and image analysis suite
Loading...
Searching...
No Matches
sarray2.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
70
71#ifdef HAVE_CONFIG_H
72#include <config_auto.h>
73#endif /* HAVE_CONFIG_H */
74
75#include <string.h>
76#include "allheaders.h"
77#include "array_internal.h"
78
79/*----------------------------------------------------------------------*
80 * Sort *
81 *----------------------------------------------------------------------*/
97SARRAY *
99 SARRAY *sain,
100 l_int32 sortorder)
101{
102char **array;
103char *tmp;
104l_int32 n, i, j, gap;
105
106 if (!sain)
107 return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL);
108
109 /* Make saout if necessary; otherwise do in-place */
110 if (!saout)
111 saout = sarrayCopy(sain);
112 else if (sain != saout)
113 return (SARRAY *)ERROR_PTR("invalid: not in-place", __func__, NULL);
114 array = saout->array; /* operate directly on the array */
115 n = sarrayGetCount(saout);
116
117 /* Shell sort */
118 for (gap = n/2; gap > 0; gap = gap / 2) {
119 for (i = gap; i < n; i++) {
120 for (j = i - gap; j >= 0; j -= gap) {
121 if ((sortorder == L_SORT_INCREASING &&
122 stringCompareLexical(array[j], array[j + gap])) ||
123 (sortorder == L_SORT_DECREASING &&
124 stringCompareLexical(array[j + gap], array[j])))
125 {
126 tmp = array[j];
127 array[j] = array[j + gap];
128 array[j + gap] = tmp;
129 }
130 }
131 }
132 }
133
134 return saout;
135}
136
137
145SARRAY *
147 NUMA *naindex)
148{
149char *str;
150l_int32 i, n, index;
151SARRAY *saout;
152
153 if (!sain)
154 return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL);
155 if (!naindex)
156 return (SARRAY *)ERROR_PTR("naindex not defined", __func__, NULL);
157
158 n = sarrayGetCount(sain);
159 saout = sarrayCreate(n);
160 for (i = 0; i < n; i++) {
161 numaGetIValue(naindex, i, &index);
162 str = sarrayGetString(sain, index, L_COPY);
163 sarrayAddString(saout, str, L_INSERT);
164 }
165
166 return saout;
167}
168
169
183l_int32
184stringCompareLexical(const char *str1,
185 const char *str2)
186{
187l_int32 i, len1, len2, len;
188
189 if (!str1)
190 return ERROR_INT("str1 not defined", __func__, 1);
191 if (!str2)
192 return ERROR_INT("str2 not defined", __func__, 1);
193
194 len1 = strlen(str1);
195 len2 = strlen(str2);
196 len = L_MIN(len1, len2);
197
198 for (i = 0; i < len; i++) {
199 if (str1[i] == str2[i])
200 continue;
201 if (str1[i] > str2[i])
202 return 1;
203 else
204 return 0;
205 }
206
207 if (len1 > len2)
208 return 1;
209 else
210 return 0;
211}
212
213
214/*----------------------------------------------------------------------*
215 * Set operations using aset (rbtree) *
216 *----------------------------------------------------------------------*/
223L_ASET *
225{
226char *str;
227l_int32 i, n;
228l_uint64 hash;
229L_ASET *set;
230RB_TYPE key;
231
232 if (!sa)
233 return (L_ASET *)ERROR_PTR("sa not defined", __func__, NULL);
234
235 set = l_asetCreate(L_UINT_TYPE);
236 n = sarrayGetCount(sa);
237 for (i = 0; i < n; i++) {
238 str = sarrayGetString(sa, i, L_NOCOPY);
239 l_hashStringToUint64Fast(str, &hash);
240 key.utype = hash;
241 l_asetInsert(set, key);
242 }
243
244 return set;
245}
246
247
265l_ok
267 SARRAY **psad)
268{
269char *str;
270l_int32 i, n;
271l_uint64 hash;
272L_ASET *set;
273RB_TYPE key;
274SARRAY *sad;
275
276 if (!psad)
277 return ERROR_INT("&sad not defined", __func__, 1);
278 *psad = NULL;
279 if (!sas)
280 return ERROR_INT("sas not defined", __func__, 1);
281
282 set = l_asetCreate(L_UINT_TYPE);
283 sad = sarrayCreate(0);
284 *psad = sad;
285 n = sarrayGetCount(sas);
286 for (i = 0; i < n; i++) {
287 str = sarrayGetString(sas, i, L_NOCOPY);
288 l_hashStringToUint64Fast(str, &hash);
289 key.utype = hash;
290 if (!l_asetFind(set, key)) {
291 sarrayAddString(sad, str, L_COPY);
292 l_asetInsert(set, key);
293 }
294 }
295
296 l_asetDestroy(&set);
297 return 0;
298}
299
300
319l_ok
321 SARRAY *sa2,
322 SARRAY **psad)
323{
324SARRAY *sa3;
325
326 if (!psad)
327 return ERROR_INT("&sad not defined", __func__, 1);
328 *psad = NULL;
329 if (!sa1)
330 return ERROR_INT("sa1 not defined", __func__, 1);
331 if (!sa2)
332 return ERROR_INT("sa2 not defined", __func__, 1);
333
334 /* Join */
335 sa3 = sarrayCopy(sa1);
336 if (sarrayJoin(sa3, sa2) == 1) {
337 sarrayDestroy(&sa3);
338 return ERROR_INT("join failed for sa3", __func__, 1);
339 }
340
341 /* Eliminate duplicates */
342 sarrayRemoveDupsByAset(sa3, psad);
343 sarrayDestroy(&sa3);
344 return 0;
345}
346
347
368l_ok
370 SARRAY *sa2,
371 SARRAY **psad)
372{
373char *str;
374l_int32 n1, n2, i, n;
375l_uint64 hash;
376L_ASET *set1, *set2;
377RB_TYPE key;
378SARRAY *sa_small, *sa_big, *sad;
379
380 if (!psad)
381 return ERROR_INT("&sad not defined", __func__, 1);
382 *psad = NULL;
383 if (!sa1)
384 return ERROR_INT("sa1 not defined", __func__, 1);
385 if (!sa2)
386 return ERROR_INT("sa2 not defined", __func__, 1);
387
388 /* Put the elements of the biggest array into a set */
389 n1 = sarrayGetCount(sa1);
390 n2 = sarrayGetCount(sa2);
391 sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
392 sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
393 set1 = l_asetCreateFromSarray(sa_big);
394
395 /* Build up the intersection of strings */
396 sad = sarrayCreate(0);
397 *psad = sad;
398 n = sarrayGetCount(sa_small);
399 set2 = l_asetCreate(L_UINT_TYPE);
400 for (i = 0; i < n; i++) {
401 str = sarrayGetString(sa_small, i, L_NOCOPY);
402 l_hashStringToUint64Fast(str, &hash);
403 key.utype = hash;
404 if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
405 sarrayAddString(sad, str, L_COPY);
406 l_asetInsert(set2, key);
407 }
408 }
409
410 l_asetDestroy(&set1);
411 l_asetDestroy(&set2);
412 return 0;
413}
414
415
416/*----------------------------------------------------------------------*
417 * Hashmap operations *
418 *----------------------------------------------------------------------*/
425L_HASHMAP *
427{
428l_int32 i, n;
429l_uint64 key;
430char *str;
431L_HASHITEM *hitem;
432L_HASHMAP *hmap;
433
434 if (!sa)
435 return (L_HASHMAP *)ERROR_PTR("sa not defined", __func__, NULL);
436
437 n = sarrayGetCount(sa);
438 if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)
439 return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL);
440 for (i = 0; i < n; i++) {
441 str = sarrayGetString(sa, i, L_NOCOPY);
442 l_hashStringToUint64Fast(str, &key);
443 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CREATE);
444 }
445 return hmap;
446}
447
448
457l_ok
459 SARRAY **psad,
460 L_HASHMAP **phmap)
461{
462l_int32 i, tabsize;
463char *str;
464SARRAY *sad;
465L_HASHITEM *hitem;
466L_HASHMAP *hmap;
467
468 if (phmap) *phmap = NULL;
469 if (!psad)
470 return ERROR_INT("&sad not defined", __func__, 1);
471 *psad = NULL;
472 if (!sas)
473 return ERROR_INT("sas not defined", __func__, 1);
474
475 /* Traverse the hashtable lists */
476 if ((hmap = l_hmapCreateFromSarray(sas)) == NULL)
477 return ERROR_INT("hmap not made", __func__, 1);
478 sad = sarrayCreate(0);
479 *psad = sad;
480 tabsize = hmap->tabsize;
481 for (i = 0; i < tabsize; i++) {
482 hitem = hmap->hashtab[i];
483 while (hitem) {
484 str = sarrayGetString(sas, hitem->val, L_COPY);
485 sarrayAddString(sad, str, L_INSERT);
486 hitem = hitem->next;
487 }
488 }
489
490 if (phmap)
491 *phmap = hmap;
492 else
493 l_hmapDestroy(&hmap);
494 return 0;
495}
496
497
506l_ok
508 SARRAY *sa2,
509 SARRAY **psad)
510{
511SARRAY *sa3;
512
513 if (!psad)
514 return ERROR_INT("&sad not defined", __func__, 1);
515 *psad = NULL;
516 if (!sa1)
517 return ERROR_INT("sa1 not defined", __func__, 1);
518 if (!sa2)
519 return ERROR_INT("sa2 not defined", __func__, 1);
520
521 sa3 = sarrayCopy(sa1);
522 if (sarrayJoin(sa3, sa2) == 1) {
523 sarrayDestroy(&sa3);
524 return ERROR_INT("sa3 join failed", __func__, 1);
525 }
526 sarrayRemoveDupsByHmap(sa3, psad, NULL);
527 sarrayDestroy(&sa3);
528 return 0;
529}
530
531
540l_ok
542 SARRAY *sa2,
543 SARRAY **psad)
544{
545l_int32 i, n1, n2, n;
546l_uint64 key;
547char *str;
548SARRAY *sa_small, *sa_big, *sa3, *sad;
549L_HASHITEM *hitem;
550L_HASHMAP *hmap;
551
552 if (!psad)
553 return ERROR_INT("&sad not defined", __func__, 1);
554 *psad = NULL;
555 if (!sa1)
556 return ERROR_INT("sa1 not defined", __func__, 1);
557 if (!sa2)
558 return ERROR_INT("sa2 not defined", __func__, 1);
559
560 /* Make a hashmap for the elements of the biggest array */
561 n1 = sarrayGetCount(sa1);
562 n2 = sarrayGetCount(sa2);
563 sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
564 sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
565 if ((hmap = l_hmapCreateFromSarray(sa_big)) == NULL)
566 return ERROR_INT("hmap not made", __func__, 1);
567
568 /* Remove duplicates from the smallest array. Alternatively,
569 * we can skip this step and avoid counting duplicates in
570 * sa_small by modifying the count fields in the sa_big hashitems;
571 * e.g., see l_hmapIntersectionDna(). */
572 sarrayRemoveDupsByHmap(sa_small, &sa3, NULL);
573
574 /* Go through sa3, the set of strings derived from the smallest array,
575 * hashing into the big array table. Any string found belongs to both,
576 * so add it to the output array. */
577 sad = sarrayCreate(0);
578 *psad = sad;
579 n = sarrayGetCount(sa3);
580 for (i = 0; i < n; i++) {
581 str = sarrayGetString(sa3, i, L_NOCOPY);
582 l_hashStringToUint64Fast(str, &key);
583 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);
584 if (hitem)
585 sarrayAddString(sad, str, L_COPY);
586 }
587 l_hmapDestroy(&hmap);
588 sarrayDestroy(&sa3);
589 return 0;
590}
591
592
593/*----------------------------------------------------------------------*
594 * Miscellaneous operations *
595 *----------------------------------------------------------------------*/
602SARRAY *
604{
605char buf[32];
606l_int32 i;
607SARRAY *sa;
608
609 if ((sa = sarrayCreate(n)) == NULL)
610 return (SARRAY *)ERROR_PTR("sa not made", __func__, NULL);
611 for (i = 0; i < n; i++) {
612 snprintf(buf, sizeof(buf), "%d", i);
613 sarrayAddString(sa, buf, L_COPY);
614 }
615 return sa;
616}
617
618
641l_ok
643 const char *keystring,
644 char **pvalstring)
645{
646char *key, *val, *str;
647l_int32 i, n;
648SARRAY *sa1;
649
650 if (!pvalstring)
651 return ERROR_INT("&valstring not defined", __func__, 1);
652 *pvalstring = NULL;
653 if (!sa)
654 return ERROR_INT("sa not defined", __func__, 1);
655 if (!keystring)
656 return ERROR_INT("keystring not defined", __func__, 1);
657
658 n = sarrayGetCount(sa);
659 for (i = 0; i < n; i++) {
660 str = sarrayGetString(sa, i, L_NOCOPY);
661 sa1 = sarrayCreate(2);
662 sarraySplitString(sa1, str, ",");
663 if (sarrayGetCount(sa1) != 2) {
664 sarrayDestroy(&sa1);
665 continue;
666 }
667 key = sarrayGetString(sa1, 0, L_NOCOPY);
668 val = sarrayGetString(sa1, 1, L_NOCOPY);
669 if (!strcmp(key, keystring)) {
670 *pvalstring = stringNew(val);
671 sarrayDestroy(&sa1);
672 return 0;
673 }
674 sarrayDestroy(&sa1);
675 }
676
677 return 0;
678}
struct Numa NUMA
Definition array.h:66
struct Sarray SARRAY
Definition array.h:81
@ L_COPY
Definition pix.h:505
@ L_NOCOPY
Definition pix.h:503
@ L_INSERT
Definition pix.h:504
@ L_SORT_DECREASING
Definition pix.h:523
@ L_SORT_INCREASING
Definition pix.h:522
L_HASHMAP * l_hmapCreateFromSarray(SARRAY *sa)
l_hmapCreateFromSarray()
Definition sarray2.c:426
l_ok sarrayRemoveDupsByAset(SARRAY *sas, SARRAY **psad)
sarrayRemoveDupsByAset()
Definition sarray2.c:266
l_ok sarrayIntersectionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByHmap()
Definition sarray2.c:541
SARRAY * sarraySortByIndex(SARRAY *sain, NUMA *naindex)
sarraySortByIndex()
Definition sarray2.c:146
l_ok sarrayUnionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByAset()
Definition sarray2.c:320
l_int32 stringCompareLexical(const char *str1, const char *str2)
stringCompareLexical()
Definition sarray2.c:184
l_ok sarrayLookupCSKV(SARRAY *sa, const char *keystring, char **pvalstring)
sarrayLookupCSKV()
Definition sarray2.c:642
SARRAY * sarrayGenerateIntegers(l_int32 n)
sarrayGenerateIntegers()
Definition sarray2.c:603
l_ok sarrayUnionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByHmap()
Definition sarray2.c:507
l_ok sarrayIntersectionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByAset()
Definition sarray2.c:369
L_ASET * l_asetCreateFromSarray(SARRAY *sa)
l_asetCreateFromSarray()
Definition sarray2.c:224
l_ok sarrayRemoveDupsByHmap(SARRAY *sas, SARRAY **psad, L_HASHMAP **phmap)
sarrayRemoveDupsByHmap()
Definition sarray2.c:458
SARRAY * sarraySort(SARRAY *saout, SARRAY *sain, l_int32 sortorder)
sarraySort()
Definition sarray2.c:98
l_uint64 val
Definition hashmap.h:117
struct L_Hashitem * next
Definition hashmap.h:119
struct L_Hashitem ** hashtab
Definition hashmap.h:106
l_int32 tabsize
Definition hashmap.h:107
char ** array