Blender  V2.93
string_utf8.c
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * The Original Code is Copyright (C) 2011 Blender Foundation.
17  * Code from gutf8.c Copyright (C) 1999 Tom Tromey
18  * Copyright (C) 2000 Red Hat, Inc.
19  * All rights reserved.
20  */
21 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <wchar.h>
30 #include <wctype.h>
31 #include <wcwidth.h>
32 
33 #include "BLI_utildefines.h"
34 
35 #include "BLI_string_utf8.h" /* own include */
36 #ifdef WIN32
37 # include "utfconv.h"
38 #endif
39 #ifdef __GNUC__
40 # pragma GCC diagnostic error "-Wsign-conversion"
41 #endif
42 
43 // #define DEBUG_STRSIZE
44 
45 /* array copied from glib's gutf8.c, */
46 /* Note: last two values (0xfe and 0xff) are forbidden in utf-8,
47  * so they are considered 1 byte length too. */
48 static const size_t utf8_skip_data[256] = {
49  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
50  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
56  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
57 };
58 
59 /* from libswish3, originally called u8_isvalid(),
60  * modified to return the index of the bad character (byte index not utf).
61  * http://svn.swish-e.org/libswish3/trunk/src/libswish3/utf8.c r3044 - campbell */
62 
63 /* based on the valid_utf8 routine from the PCRE library by Philip Hazel
64  *
65  * length is in bytes, since without knowing whether the string is valid
66  * it's hard to know how many characters there are! */
67 
73 ptrdiff_t BLI_utf8_invalid_byte(const char *str, size_t length)
74 {
75  const unsigned char *p, *perr, *pend = (const unsigned char *)str + length;
76  unsigned char c;
77  int ab;
78 
79  for (p = (const unsigned char *)str; p < pend; p++, length--) {
80  c = *p;
81  perr = p; /* Erroneous char is always the first of an invalid utf8 sequence... */
82  if (ELEM(c, 0xfe, 0xff, 0x00)) {
83  /* Those three values are not allowed in utf8 string. */
84  goto utf8_error;
85  }
86  if (c < 128) {
87  continue;
88  }
89  if ((c & 0xc0) != 0xc0) {
90  goto utf8_error;
91  }
92 
93  /* Note that since we always increase p (and decrease length) by one byte in main loop,
94  * we only add/subtract extra utf8 bytes in code below
95  * (ab number, aka number of bytes remaining in the utf8 sequence after the initial one). */
96  ab = (int)utf8_skip_data[c] - 1;
97  if (length <= ab) {
98  goto utf8_error;
99  }
100 
101  /* Check top bits in the second byte */
102  p++;
103  length--;
104  if ((*p & 0xc0) != 0x80) {
105  goto utf8_error;
106  }
107 
108  /* Check for overlong sequences for each different length */
109  switch (ab) {
110  case 1:
111  /* Check for xx00 000x */
112  if ((c & 0x3e) == 0) {
113  goto utf8_error;
114  }
115  continue; /* We know there aren't any more bytes to check */
116 
117  case 2:
118  /* Check for 1110 0000, xx0x xxxx */
119  if (c == 0xe0 && (*p & 0x20) == 0) {
120  goto utf8_error;
121  }
122  /* Some special cases, see section 5 of utf-8 decoder stress-test by Markus Kuhn
123  * (https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt). */
124  /* From section 5.1 (and 5.2) */
125  if (c == 0xed) {
126  if (*p == 0xa0 && *(p + 1) == 0x80) {
127  goto utf8_error;
128  }
129  if (*p == 0xad && *(p + 1) == 0xbf) {
130  goto utf8_error;
131  }
132  if (*p == 0xae && *(p + 1) == 0x80) {
133  goto utf8_error;
134  }
135  if (*p == 0xaf && *(p + 1) == 0xbf) {
136  goto utf8_error;
137  }
138  if (*p == 0xb0 && *(p + 1) == 0x80) {
139  goto utf8_error;
140  }
141  if (*p == 0xbe && *(p + 1) == 0x80) {
142  goto utf8_error;
143  }
144  if (*p == 0xbf && *(p + 1) == 0xbf) {
145  goto utf8_error;
146  }
147  }
148  /* From section 5.3 */
149  if (c == 0xef) {
150  if (*p == 0xbf && *(p + 1) == 0xbe) {
151  goto utf8_error;
152  }
153  if (*p == 0xbf && *(p + 1) == 0xbf) {
154  goto utf8_error;
155  }
156  }
157  break;
158 
159  case 3:
160  /* Check for 1111 0000, xx00 xxxx */
161  if (c == 0xf0 && (*p & 0x30) == 0) {
162  goto utf8_error;
163  }
164  break;
165 
166  case 4:
167  /* Check for 1111 1000, xx00 0xxx */
168  if (c == 0xf8 && (*p & 0x38) == 0) {
169  goto utf8_error;
170  }
171  break;
172 
173  case 5:
174  /* Check for 1111 1100, xx00 00xx */
175  if (c == 0xfc && (*p & 0x3c) == 0) {
176  goto utf8_error;
177  }
178  break;
179  }
180 
181  /* Check for valid bytes after the 2nd, if any; all must start 10 */
182  while (--ab > 0) {
183  p++;
184  length--;
185  if ((*p & 0xc0) != 0x80) {
186  goto utf8_error;
187  }
188  }
189  }
190 
191  return -1;
192 
193 utf8_error:
194 
195  return ((const char *)perr - (const char *)str);
196 }
197 
204 {
205  ptrdiff_t bad_char;
206  int tot = 0;
207 
208  BLI_assert(str[length] == '\0');
209 
210  while ((bad_char = BLI_utf8_invalid_byte(str, length)) != -1) {
211  str += bad_char;
212  length -= (size_t)(bad_char + 1);
213 
214  if (length == 0) {
215  /* last character bad, strip it */
216  *str = '\0';
217  tot++;
218  break;
219  }
220  /* strip, keep looking */
221  memmove(str, str + 1, length + 1); /* +1 for NULL char! */
222  tot++;
223  }
224 
225  return tot;
226 }
227 
229 #define BLI_STR_UTF8_CPY(dst, src, maxncpy) \
230  { \
231  size_t utf8_size; \
232  while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) { \
233  maxncpy -= utf8_size; \
234  switch (utf8_size) { \
235  case 6: \
236  *dst++ = *src++; \
237  ATTR_FALLTHROUGH; \
238  case 5: \
239  *dst++ = *src++; \
240  ATTR_FALLTHROUGH; \
241  case 4: \
242  *dst++ = *src++; \
243  ATTR_FALLTHROUGH; \
244  case 3: \
245  *dst++ = *src++; \
246  ATTR_FALLTHROUGH; \
247  case 2: \
248  *dst++ = *src++; \
249  ATTR_FALLTHROUGH; \
250  case 1: \
251  *dst++ = *src++; \
252  } \
253  } \
254  *dst = '\0'; \
255  } \
256  (void)0
257 
258 char *BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy)
259 {
260  char *r_dst = dst;
261 
262  BLI_assert(maxncpy != 0);
263 
264 #ifdef DEBUG_STRSIZE
265  memset(dst, 0xff, sizeof(*dst) * maxncpy);
266 #endif
267 
268  /* note: currently we don't attempt to deal with invalid utf8 chars */
269  BLI_STR_UTF8_CPY(dst, src, maxncpy);
270 
271  return r_dst;
272 }
273 
274 size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t maxncpy)
275 {
276  char *r_dst = dst;
277 
278  BLI_assert(maxncpy != 0);
279 
280 #ifdef DEBUG_STRSIZE
281  memset(dst, 0xff, sizeof(*dst) * maxncpy);
282 #endif
283 
284  /* note: currently we don't attempt to deal with invalid utf8 chars */
285  BLI_STR_UTF8_CPY(dst, src, maxncpy);
286 
287  return (size_t)(dst - r_dst);
288 }
289 
290 #undef BLI_STR_UTF8_CPY
291 
292 /* --------------------------------------------------------------------------*/
293 /* wchar_t / utf8 functions */
294 
295 size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst,
296  const wchar_t *__restrict src,
297  const size_t maxncpy)
298 {
299  const size_t maxlen = maxncpy - 1;
300  /* 6 is max utf8 length of an unicode char. */
301  const int64_t maxlen_secured = (int64_t)maxlen - 6;
302  size_t len = 0;
303 
304  BLI_assert(maxncpy != 0);
305 
306 #ifdef DEBUG_STRSIZE
307  memset(dst, 0xff, sizeof(*dst) * maxncpy);
308 #endif
309 
310  while (*src && len <= maxlen_secured) {
311  len += BLI_str_utf8_from_unicode((uint)*src++, dst + len);
312  }
313 
314  /* We have to be more careful for the last six bytes,
315  * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */
316  while (*src) {
317  char t[6];
318  size_t l = BLI_str_utf8_from_unicode((uint)*src++, t);
319  BLI_assert(l <= 6);
320  if (len + l > maxlen) {
321  break;
322  }
323  memcpy(dst + len, t, l);
324  len += l;
325  }
326 
327  dst[len] = '\0';
328 
329  return len;
330 }
331 
332 /* wchar len in utf8 */
333 size_t BLI_wstrlen_utf8(const wchar_t *src)
334 {
335  size_t len = 0;
336 
337  while (*src) {
338  len += BLI_str_utf8_from_unicode((uint)*src++, NULL);
339  }
340 
341  return len;
342 }
343 
344 size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes)
345 {
346  size_t len;
347  const char *strc_orig = strc;
348 
349  for (len = 0; *strc; len++) {
350  strc += BLI_str_utf8_size_safe(strc);
351  }
352 
353  *r_len_bytes = (size_t)(strc - strc_orig);
354  return len;
355 }
356 
357 size_t BLI_strlen_utf8(const char *strc)
358 {
359  size_t len_bytes;
360  return BLI_strlen_utf8_ex(strc, &len_bytes);
361 }
362 
363 size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_bytes)
364 {
365  size_t len = 0;
366  const char *strc_orig = strc;
367  const char *strc_end = strc + maxlen;
368 
369  while (true) {
370  size_t step = (size_t)BLI_str_utf8_size_safe(strc);
371  if (!*strc || strc + step > strc_end) {
372  break;
373  }
374  strc += step;
375  len++;
376  }
377 
378  *r_len_bytes = (size_t)(strc - strc_orig);
379  return len;
380 }
381 
387 size_t BLI_strnlen_utf8(const char *strc, const size_t maxlen)
388 {
389  size_t len_bytes;
390  return BLI_strnlen_utf8_ex(strc, maxlen, &len_bytes);
391 }
392 
393 size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w,
394  const char *__restrict src_c,
395  const size_t maxncpy)
396 {
397 #ifdef WIN32
398  return conv_utf_8_to_16(src_c, dst_w, maxncpy);
399 #else
400  return BLI_str_utf8_as_utf32((char32_t *)dst_w, src_c, maxncpy);
401 #endif
402 }
403 
404 /* end wchar_t / utf8 functions */
405 /* --------------------------------------------------------------------------*/
406 
407 /* count columns that character/string occupies, based on wcwidth.c */
408 
409 int BLI_wcwidth(char32_t ucs)
410 {
411  return mk_wcwidth(ucs);
412 }
413 
414 int BLI_wcswidth(const char32_t *pwcs, size_t n)
415 {
416  return mk_wcswidth(pwcs, n);
417 }
418 
419 int BLI_str_utf8_char_width(const char *p)
420 {
421  uint unicode = BLI_str_utf8_as_unicode(p);
422  if (unicode == BLI_UTF8_ERR) {
423  return -1;
424  }
425 
426  return BLI_wcwidth((char32_t)unicode);
427 }
428 
430 {
431  int columns;
432 
433  uint unicode = BLI_str_utf8_as_unicode(p);
434  if (unicode == BLI_UTF8_ERR) {
435  return 1;
436  }
437 
438  columns = BLI_wcwidth((char32_t)unicode);
439 
440  return (columns < 0) ? 1 : columns;
441 }
442 
443 /* --------------------------------------------------------------------------*/
444 
445 /* copied from glib's gutf8.c, added 'Err' arg */
446 
447 /* note, glib uses uint for unicode, best we do the same,
448  * though we don't typedef it - campbell */
449 
450 #define UTF8_COMPUTE(Char, Mask, Len, Err) \
451  if (Char < 128) { \
452  Len = 1; \
453  Mask = 0x7f; \
454  } \
455  else if ((Char & 0xe0) == 0xc0) { \
456  Len = 2; \
457  Mask = 0x1f; \
458  } \
459  else if ((Char & 0xf0) == 0xe0) { \
460  Len = 3; \
461  Mask = 0x0f; \
462  } \
463  else if ((Char & 0xf8) == 0xf0) { \
464  Len = 4; \
465  Mask = 0x07; \
466  } \
467  else if ((Char & 0xfc) == 0xf8) { \
468  Len = 5; \
469  Mask = 0x03; \
470  } \
471  else if ((Char & 0xfe) == 0xfc) { \
472  Len = 6; \
473  Mask = 0x01; \
474  } \
475  else { \
476  Len = Err; /* -1 is the typical error value or 1 to skip */ \
477  } \
478  (void)0
479 
480 /* same as glib define but added an 'Err' arg */
481 #define UTF8_GET(Result, Chars, Count, Mask, Len, Err) \
482  (Result) = (Chars)[0] & (Mask); \
483  for ((Count) = 1; (Count) < (Len); ++(Count)) { \
484  if (((Chars)[(Count)] & 0xc0) != 0x80) { \
485  (Result) = Err; \
486  break; \
487  } \
488  (Result) <<= 6; \
489  (Result) |= ((Chars)[(Count)] & 0x3f); \
490  } \
491  (void)0
492 
493 /* uses glib functions but not from glib */
494 /* gets the size of a single utf8 char */
495 int BLI_str_utf8_size(const char *p)
496 {
497  int mask = 0, len;
498  const unsigned char c = (unsigned char)*p;
499 
500  UTF8_COMPUTE(c, mask, len, -1);
501 
502  (void)mask; /* quiet warning */
503 
504  return len;
505 }
506 
507 /* use when we want to skip errors */
508 int BLI_str_utf8_size_safe(const char *p)
509 {
510  int mask = 0, len;
511  const unsigned char c = (unsigned char)*p;
512 
513  UTF8_COMPUTE(c, mask, len, 1);
514 
515  (void)mask; /* quiet warning */
516 
517  return len;
518 }
519 
520 /* was g_utf8_get_char */
534 {
535  int i, len;
536  uint mask = 0;
537  uint result;
538  const unsigned char c = (unsigned char)*p;
539 
540  UTF8_COMPUTE(c, mask, len, -1);
541  if (UNLIKELY(len == -1)) {
542  return BLI_UTF8_ERR;
543  }
544  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
545 
546  return result;
547 }
548 
549 /* variant that increments the length */
550 uint BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t *__restrict index)
551 {
552  int i, len;
553  uint mask = 0;
554  uint result;
555  const unsigned char c = (unsigned char)*p;
556 
557  UTF8_COMPUTE(c, mask, len, -1);
558  if (UNLIKELY(len == -1)) {
559  return BLI_UTF8_ERR;
560  }
561  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
562  *index += (size_t)len;
563  return result;
564 }
565 
566 uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__restrict index)
567 {
568  int i, len;
569  uint mask = 0;
570  uint result;
571  const unsigned char c = (unsigned char)*p;
572 
573  UTF8_COMPUTE(c, mask, len, -1);
574  if (UNLIKELY(len == -1)) {
575  *index += 1;
576  return c;
577  }
578  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
579  *index += (size_t)len;
580  return result;
581 }
582 
583 /* another variant that steps over the index,
584  * note, currently this also falls back to latin1 for text drawing. */
585 uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index)
586 {
587  int i, len;
588  uint mask = 0;
589  uint result;
590  unsigned char c;
591 
592  p += *index;
593  c = (unsigned char)*p;
594 
595  UTF8_COMPUTE(c, mask, len, -1);
596  if (UNLIKELY(len == -1)) {
597  /* when called with NULL end, result will never be NULL,
598  * checks for a NULL character */
599  const char *p_next = BLI_str_find_next_char_utf8(p, NULL);
600  /* will never return the same pointer unless '\0',
601  * eternal loop is prevented */
602  *index += (size_t)(p_next - p);
603  return BLI_UTF8_ERR;
604  }
605 
606  /* this is tricky since there are a few ways we can bail out of bad unicode
607  * values, 3 possible solutions. */
608 #if 0
609  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
610 #elif 1
611  /* WARNING: this is NOT part of glib, or supported by similar functions.
612  * this is added for text drawing because some filepaths can have latin1
613  * characters */
614  UTF8_GET(result, p, i, mask, len, BLI_UTF8_ERR);
615  if (result == BLI_UTF8_ERR) {
616  len = 1;
617  result = *p;
618  }
619  /* end warning! */
620 #else
621  /* without a fallback like '?', text drawing will stop on this value */
622  UTF8_GET(result, p, i, mask, len, '?');
623 #endif
624 
625  *index += (size_t)len;
626  return result;
627 }
628 
629 /* was g_unichar_to_utf8 */
641 size_t BLI_str_utf8_from_unicode(uint c, char *outbuf)
642 {
643  /* If this gets modified, also update the copy in g_string_insert_unichar() */
644  uint len = 0;
645  uint first;
646  uint i;
647 
648  if (c < 0x80) {
649  first = 0;
650  len = 1;
651  }
652  else if (c < 0x800) {
653  first = 0xc0;
654  len = 2;
655  }
656  else if (c < 0x10000) {
657  first = 0xe0;
658  len = 3;
659  }
660  else if (c < 0x200000) {
661  first = 0xf0;
662  len = 4;
663  }
664  else if (c < 0x4000000) {
665  first = 0xf8;
666  len = 5;
667  }
668  else {
669  first = 0xfc;
670  len = 6;
671  }
672 
673  if (outbuf) {
674  for (i = len - 1; i > 0; i--) {
675  outbuf[i] = (c & 0x3f) | 0x80;
676  c >>= 6;
677  }
678  outbuf[0] = c | first;
679  }
680 
681  return len;
682 }
683 
684 size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w,
685  const char *__restrict src_c,
686  const size_t maxncpy)
687 {
688  const size_t maxlen = maxncpy - 1;
689  size_t len = 0;
690 
691  BLI_assert(maxncpy != 0);
692 
693 #ifdef DEBUG_STRSIZE
694  memset(dst_w, 0xff, sizeof(*dst_w) * maxncpy);
695 #endif
696 
697  while (*src_c && len != maxlen) {
698  size_t step = 0;
699  uint unicode = BLI_str_utf8_as_unicode_and_size(src_c, &step);
700  if (unicode != BLI_UTF8_ERR) {
701  *dst_w = unicode;
702  src_c += step;
703  }
704  else {
705  *dst_w = '?';
706  src_c = BLI_str_find_next_char_utf8(src_c, NULL);
707  }
708  dst_w++;
709  len++;
710  }
711 
712  *dst_w = 0;
713 
714  return len;
715 }
716 
717 size_t BLI_str_utf32_as_utf8(char *__restrict dst,
718  const char32_t *__restrict src,
719  const size_t maxncpy)
720 {
721  const size_t maxlen = maxncpy - 1;
722  /* 6 is max utf8 length of an unicode char. */
723  const int64_t maxlen_secured = (int64_t)maxlen - 6;
724  size_t len = 0;
725 
726  BLI_assert(maxncpy != 0);
727 
728 #ifdef DEBUG_STRSIZE
729  memset(dst, 0xff, sizeof(*dst) * maxncpy);
730 #endif
731 
732  while (*src && len <= maxlen_secured) {
733  len += BLI_str_utf8_from_unicode((uint)*src++, dst + len);
734  }
735 
736  /* We have to be more careful for the last six bytes,
737  * to avoid buffer overflow in case utf8-encoded char would be too long for our dst buffer. */
738  while (*src) {
739  char t[6];
740  size_t l = BLI_str_utf8_from_unicode((uint)*src++, t);
741  BLI_assert(l <= 6);
742  if (len + l > maxlen) {
743  break;
744  }
745  memcpy(dst + len, t, l);
746  len += l;
747  }
748 
749  dst[len] = '\0';
750 
751  return len;
752 }
753 
754 /* utf32 len in utf8 */
755 size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
756 {
757  size_t len = 0;
758 
759  while (*src) {
760  len += BLI_str_utf8_from_unicode((uint)*src++, NULL);
761  }
762 
763  return len;
764 }
765 
766 /* was g_utf8_find_prev_char */
782 char *BLI_str_find_prev_char_utf8(const char *str, const char *p)
783 {
784  for (--p; p >= str; p--) {
785  if ((*p & 0xc0) != 0x80) {
786  return (char *)p;
787  }
788  }
789  return NULL;
790 }
791 
792 /* was g_utf8_find_next_char */
807 char *BLI_str_find_next_char_utf8(const char *p, const char *end)
808 {
809  if (*p) {
810  if (end) {
811  for (++p; p < end && (*p & 0xc0) == 0x80; p++) {
812  /* do nothing */
813  }
814  }
815  else {
816  for (++p; (*p & 0xc0) == 0x80; p++) {
817  /* do nothing */
818  }
819  }
820  }
821  return (p == end) ? NULL : (char *)p;
822 }
823 
824 /* was g_utf8_prev_char */
838 char *BLI_str_prev_char_utf8(const char *p)
839 {
840  while (1) {
841  p--;
842  if ((*p & 0xc0) != 0x80) {
843  return (char *)p;
844  }
845  }
846 }
847 /* end glib copy */
848 
849 size_t BLI_str_partition_utf8(const char *str,
850  const uint delim[],
851  const char **sep,
852  const char **suf)
853 {
854  return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, false);
855 }
856 
857 size_t BLI_str_rpartition_utf8(const char *str,
858  const uint delim[],
859  const char **sep,
860  const char **suf)
861 {
862  return BLI_str_partition_ex_utf8(str, NULL, delim, sep, suf, true);
863 }
864 
865 size_t BLI_str_partition_ex_utf8(const char *str,
866  const char *end,
867  const uint delim[],
868  const char **sep,
869  const char **suf,
870  const bool from_right)
871 {
872  const uint *d;
873  const size_t str_len = end ? (size_t)(end - str) : strlen(str);
874  size_t index;
875 
876  /* Note that here, we assume end points to a valid utf8 char! */
877  BLI_assert(end == NULL || (end >= str && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR)));
878 
879  *suf = (char *)(str + str_len);
880 
881  for (*sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, str + str_len) : str),
882  index = 0;
883  *sep >= str && (!end || *sep < end) && **sep != '\0';
884  *sep = (char *)(from_right ? BLI_str_find_prev_char_utf8(str, *sep) : str + index)) {
885  const uint c = BLI_str_utf8_as_unicode_and_size(*sep, &index);
886 
887  if (c == BLI_UTF8_ERR) {
888  *suf = *sep = NULL;
889  break;
890  }
891 
892  for (d = delim; *d != '\0'; d++) {
893  if (*d == c) {
894  /* *suf is already correct in case from_right is true. */
895  if (!from_right) {
896  *suf = (char *)(str + index);
897  }
898  return (size_t)(*sep - str);
899  }
900  }
901 
902  *suf = *sep; /* Useful in 'from_right' case! */
903  }
904 
905  *suf = *sep = NULL;
906  return str_len;
907 }
908 
909 /* -------------------------------------------------------------------- */
913 int BLI_str_utf8_offset_to_index(const char *str, int offset)
914 {
915  int index = 0, pos = 0;
916  while (pos != offset) {
918  index++;
919  }
920  return index;
921 }
922 
923 int BLI_str_utf8_offset_from_index(const char *str, int index)
924 {
925  int offset = 0, pos = 0;
926  while (pos != index) {
927  offset += BLI_str_utf8_size(str + offset);
928  pos++;
929  }
930  return offset;
931 }
932 
933 int BLI_str_utf8_offset_to_column(const char *str, int offset)
934 {
935  int column = 0, pos = 0;
936  while (pos < offset) {
937  column += BLI_str_utf8_char_width_safe(str + pos);
939  }
940  return column;
941 }
942 
943 int BLI_str_utf8_offset_from_column(const char *str, int column)
944 {
945  int offset = 0, pos = 0, col;
946  while (*(str + offset) && pos < column) {
948  if (pos + col > column) {
949  break;
950  }
951  offset += BLI_str_utf8_size_safe(str + offset);
952  pos += col;
953  }
954  return offset;
955 }
956 
#define BLI_assert(a)
Definition: BLI_assert.h:58
#define BLI_UTF8_ERR
unsigned int uint
Definition: BLI_sys_types.h:83
#define UNLIKELY(x)
#define ELEM(...)
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
ATTR_WARN_UNUSED_RESULT const BMLoop * l
SIMD_FORCE_INLINE btScalar length(const btQuaternion &q)
Return the length of a quaternion.
Definition: btQuaternion.h:895
#define str(s)
uint pos
uint col
static unsigned c
Definition: RandGen.cpp:97
__int64 int64_t
Definition: stdint.h:92
#define UTF8_COMPUTE(Char, Mask, Len, Err)
Definition: string_utf8.c:450
int BLI_wcwidth(char32_t ucs)
Definition: string_utf8.c:409
char * BLI_strncpy_utf8(char *__restrict dst, const char *__restrict src, size_t maxncpy)
Definition: string_utf8.c:258
size_t BLI_strncpy_utf8_rlen(char *__restrict dst, const char *__restrict src, size_t maxncpy)
Definition: string_utf8.c:274
size_t BLI_strlen_utf8_ex(const char *strc, size_t *r_len_bytes)
Definition: string_utf8.c:344
size_t BLI_strnlen_utf8_ex(const char *strc, const size_t maxlen, size_t *r_len_bytes)
Definition: string_utf8.c:363
size_t BLI_str_partition_ex_utf8(const char *str, const char *end, const uint delim[], const char **sep, const char **suf, const bool from_right)
Definition: string_utf8.c:865
size_t BLI_strnlen_utf8(const char *strc, const size_t maxlen)
Definition: string_utf8.c:387
ptrdiff_t BLI_utf8_invalid_byte(const char *str, size_t length)
Definition: string_utf8.c:73
size_t BLI_str_utf32_as_utf8(char *__restrict dst, const char32_t *__restrict src, const size_t maxncpy)
Definition: string_utf8.c:717
int BLI_str_utf8_offset_from_column(const char *str, int column)
Definition: string_utf8.c:943
size_t BLI_strlen_utf8(const char *strc)
Definition: string_utf8.c:357
size_t BLI_str_utf32_as_utf8_len(const char32_t *src)
Definition: string_utf8.c:755
#define BLI_STR_UTF8_CPY(dst, src, maxncpy)
Definition: string_utf8.c:229
size_t BLI_str_utf8_from_unicode(uint c, char *outbuf)
Definition: string_utf8.c:641
static const size_t utf8_skip_data[256]
Definition: string_utf8.c:48
size_t BLI_strncpy_wchar_as_utf8(char *__restrict dst, const wchar_t *__restrict src, const size_t maxncpy)
Definition: string_utf8.c:295
int BLI_str_utf8_offset_to_index(const char *str, int offset)
Definition: string_utf8.c:913
size_t BLI_str_partition_utf8(const char *str, const uint delim[], const char **sep, const char **suf)
Definition: string_utf8.c:849
size_t BLI_wstrlen_utf8(const wchar_t *src)
Definition: string_utf8.c:333
int BLI_str_utf8_char_width(const char *p)
Definition: string_utf8.c:419
size_t BLI_str_rpartition_utf8(const char *str, const uint delim[], const char **sep, const char **suf)
Definition: string_utf8.c:857
uint BLI_str_utf8_as_unicode_and_size_safe(const char *__restrict p, size_t *__restrict index)
Definition: string_utf8.c:566
int BLI_utf8_invalid_strip(char *str, size_t length)
Definition: string_utf8.c:203
#define UTF8_GET(Result, Chars, Count, Mask, Len, Err)
Definition: string_utf8.c:481
uint BLI_str_utf8_as_unicode_step(const char *__restrict p, size_t *__restrict index)
Definition: string_utf8.c:585
int BLI_wcswidth(const char32_t *pwcs, size_t n)
Definition: string_utf8.c:414
int BLI_str_utf8_size(const char *p)
Definition: string_utf8.c:495
uint BLI_str_utf8_as_unicode(const char *p)
Definition: string_utf8.c:533
int BLI_str_utf8_size_safe(const char *p)
Definition: string_utf8.c:508
char * BLI_str_find_next_char_utf8(const char *p, const char *end)
Definition: string_utf8.c:807
int BLI_str_utf8_offset_to_column(const char *str, int offset)
Definition: string_utf8.c:933
uint BLI_str_utf8_as_unicode_and_size(const char *__restrict p, size_t *__restrict index)
Definition: string_utf8.c:550
int BLI_str_utf8_char_width_safe(const char *p)
Definition: string_utf8.c:429
size_t BLI_str_utf8_as_utf32(char32_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
Definition: string_utf8.c:684
char * BLI_str_find_prev_char_utf8(const char *str, const char *p)
Definition: string_utf8.c:782
size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
Definition: string_utf8.c:393
char * BLI_str_prev_char_utf8(const char *p)
Definition: string_utf8.c:838
int BLI_str_utf8_offset_from_index(const char *str, int index)
Definition: string_utf8.c:923
int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
Definition: utfconv.c:189
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
uint len