Blender  V2.93
Macros | Functions | Variables
string_utf8.c File Reference
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include <wcwidth.h>
#include "BLI_utildefines.h"
#include "BLI_string_utf8.h"

Go to the source code of this file.

Macros

#define BLI_STR_UTF8_CPY(dst, src, maxncpy)
 
#define UTF8_COMPUTE(Char, Mask, Len, Err)
 
#define UTF8_GET(Result, Chars, Count, Mask, Len, Err)
 

Functions

ptrdiff_t BLI_utf8_invalid_byte (const char *str, size_t length)
 
int BLI_utf8_invalid_strip (char *str, size_t length)
 
char * BLI_strncpy_utf8 (char *__restrict dst, const char *__restrict src, size_t maxncpy)
 
size_t BLI_strncpy_utf8_rlen (char *__restrict dst, const char *__restrict src, size_t maxncpy)
 
size_t BLI_strncpy_wchar_as_utf8 (char *__restrict dst, const wchar_t *__restrict src, const size_t maxncpy)
 
size_t BLI_wstrlen_utf8 (const wchar_t *src)
 
size_t BLI_strlen_utf8_ex (const char *strc, size_t *r_len_bytes)
 
size_t BLI_strlen_utf8 (const char *strc)
 
size_t BLI_strnlen_utf8_ex (const char *strc, const size_t maxlen, size_t *r_len_bytes)
 
size_t BLI_strnlen_utf8 (const char *strc, const size_t maxlen)
 
size_t BLI_strncpy_wchar_from_utf8 (wchar_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
 
int BLI_wcwidth (char32_t ucs)
 
int BLI_wcswidth (const char32_t *pwcs, size_t n)
 
int BLI_str_utf8_char_width (const char *p)
 
int BLI_str_utf8_char_width_safe (const char *p)
 
int BLI_str_utf8_size (const char *p)
 
int BLI_str_utf8_size_safe (const char *p)
 
uint BLI_str_utf8_as_unicode (const char *p)
 
uint BLI_str_utf8_as_unicode_and_size (const char *__restrict p, size_t *__restrict index)
 
uint BLI_str_utf8_as_unicode_and_size_safe (const char *__restrict p, size_t *__restrict index)
 
uint BLI_str_utf8_as_unicode_step (const char *__restrict p, size_t *__restrict index)
 
size_t BLI_str_utf8_from_unicode (uint c, char *outbuf)
 
size_t BLI_str_utf8_as_utf32 (char32_t *__restrict dst_w, const char *__restrict src_c, const size_t maxncpy)
 
size_t BLI_str_utf32_as_utf8 (char *__restrict dst, const char32_t *__restrict src, const size_t maxncpy)
 
size_t BLI_str_utf32_as_utf8_len (const char32_t *src)
 
char * BLI_str_find_prev_char_utf8 (const char *str, const char *p)
 
char * BLI_str_find_next_char_utf8 (const char *p, const char *end)
 
char * BLI_str_prev_char_utf8 (const char *p)
 
size_t BLI_str_partition_utf8 (const char *str, const uint delim[], const char **sep, const char **suf)
 
size_t BLI_str_rpartition_utf8 (const char *str, const uint delim[], const char **sep, const char **suf)
 
size_t BLI_str_partition_ex_utf8 (const char *str, const char *end, const uint delim[], const char **sep, const char **suf, const bool from_right)
 
Offset Conversion in Strings
int BLI_str_utf8_offset_to_index (const char *str, int offset)
 
int BLI_str_utf8_offset_from_index (const char *str, int index)
 
int BLI_str_utf8_offset_to_column (const char *str, int offset)
 
int BLI_str_utf8_offset_from_column (const char *str, int column)
 

Variables

static const size_t utf8_skip_data [256]
 

Macro Definition Documentation

◆ BLI_STR_UTF8_CPY

#define BLI_STR_UTF8_CPY (   dst,
  src,
  maxncpy 
)
Value:
{ \
size_t utf8_size; \
while (*src != '\0' && (utf8_size = utf8_skip_data[*src]) < maxncpy) { \
maxncpy -= utf8_size; \
switch (utf8_size) { \
case 6: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 5: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 4: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 3: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 2: \
*dst++ = *src++; \
ATTR_FALLTHROUGH; \
case 1: \
*dst++ = *src++; \
} \
} \
*dst = '\0'; \
} \
(void)0
static const size_t utf8_skip_data[256]
Definition: string_utf8.c:48

Compatible with BLI_strncpy, but ensure no partial UTF8 chars.

Definition at line 229 of file string_utf8.c.

◆ UTF8_COMPUTE

#define UTF8_COMPUTE (   Char,
  Mask,
  Len,
  Err 
)
Value:
if (Char < 128) { \
Len = 1; \
Mask = 0x7f; \
} \
else if ((Char & 0xe0) == 0xc0) { \
Len = 2; \
Mask = 0x1f; \
} \
else if ((Char & 0xf0) == 0xe0) { \
Len = 3; \
Mask = 0x0f; \
} \
else if ((Char & 0xf8) == 0xf0) { \
Len = 4; \
Mask = 0x07; \
} \
else if ((Char & 0xfc) == 0xf8) { \
Len = 5; \
Mask = 0x03; \
} \
else if ((Char & 0xfe) == 0xfc) { \
Len = 6; \
Mask = 0x01; \
} \
else { \
Len = Err; /* -1 is the typical error value or 1 to skip */ \
} \
(void)0

Definition at line 450 of file string_utf8.c.

◆ UTF8_GET

#define UTF8_GET (   Result,
  Chars,
  Count,
  Mask,
  Len,
  Err 
)
Value:
(Result) = (Chars)[0] & (Mask); \
for ((Count) = 1; (Count) < (Len); ++(Count)) { \
if (((Chars)[(Count)] & 0xc0) != 0x80) { \
(Result) = Err; \
break; \
} \
(Result) <<= 6; \
(Result) |= ((Chars)[(Count)] & 0x3f); \
} \
(void)0
struct Mask Mask

Definition at line 481 of file string_utf8.c.

Function Documentation

◆ BLI_str_find_next_char_utf8()

char* BLI_str_find_next_char_utf8 ( const char *  p,
const char *  end 
)

BLI_str_find_next_char_utf8:

Parameters
pa pointer to a position within a UTF-8 encoded string
enda pointer to the byte following the end of the string, or NULL to indicate that the string is nul-terminated.

Finds the start of the next UTF-8 character in the string after p

p does not have to be at the beginning of a UTF-8 character. No check is made to see if the character found is actually valid other than it starts with an appropriate byte.

Return value: a pointer to the found character or NULL

Definition at line 807 of file string_utf8.c.

References NULL.

Referenced by BLI_str_cursor_step_next_utf8(), BLI_str_utf8_as_unicode_step(), BLI_str_utf8_as_utf32(), ui_text_clip_give_next_off(), and ui_text_position_from_hidden().

◆ BLI_str_find_prev_char_utf8()

char* BLI_str_find_prev_char_utf8 ( const char *  str,
const char *  p 
)

BLI_str_find_prev_char_utf8:

Parameters
strpointer to the beginning of a UTF-8 encoded string
ppointer to some position within str

Given a position p with a UTF-8 encoded string str, find the start of the previous UTF-8 character starting before. p Returns NULL if no UTF-8 characters are present in str before p

p does not have to be at the beginning of a UTF-8 character. No check is made to see if the character found is actually valid other than it starts with an appropriate byte.

Return value: a pointer to the found character or NULL.

Definition at line 782 of file string_utf8.c.

References NULL, and str.

Referenced by blf_font_width_to_rstrlen(), BLI_str_cursor_step_prev_utf8(), BLI_str_partition_ex_utf8(), ui_text_clip_cursor(), ui_text_clip_give_prev_off(), and ui_text_clip_right_label().

◆ BLI_str_partition_ex_utf8()

size_t BLI_str_partition_ex_utf8 ( const char *  str,
const char *  end,
const uint  delim[],
const char **  sep,
const char **  suf,
const bool  from_right 
)

◆ BLI_str_partition_utf8()

size_t BLI_str_partition_utf8 ( const char *  str,
const uint  delim[],
const char **  sep,
const char **  suf 
)

Definition at line 849 of file string_utf8.c.

References BLI_str_partition_ex_utf8(), NULL, and str.

Referenced by TEST().

◆ BLI_str_prev_char_utf8()

char* BLI_str_prev_char_utf8 ( const char *  p)

BLI_str_prev_char_utf8:

Parameters
pa pointer to a position within a UTF-8 encoded string

Finds the previous UTF-8 character in the string before p

p does not have to be at the beginning of a UTF-8 character. No check is made to see if the character found is actually valid other than it starts with an appropriate byte. If p might be the first character of the string, you must use g_utf8_find_prev_char() instead.

Return value: a pointer to the found character.

Definition at line 838 of file string_utf8.c.

Referenced by txt_backspace_char(), txt_move_left(), txt_wrap_move_eol(), and unit_find_str().

◆ BLI_str_rpartition_utf8()

size_t BLI_str_rpartition_utf8 ( const char *  str,
const uint  delim[],
const char **  sep,
const char **  suf 
)

Definition at line 857 of file string_utf8.c.

References BLI_str_partition_ex_utf8(), NULL, and str.

Referenced by TEST().

◆ BLI_str_utf32_as_utf8()

size_t BLI_str_utf32_as_utf8 ( char *__restrict  dst,
const char32_t *__restrict  src,
const size_t  maxncpy 
)

◆ BLI_str_utf32_as_utf8_len()

size_t BLI_str_utf32_as_utf8_len ( const char32_t *  src)

Definition at line 755 of file string_utf8.c.

References BLI_str_utf8_from_unicode(), len, and NULL.

Referenced by BKE_vfont_clipboard_set(), and ED_curve_editfont_load().

◆ BLI_str_utf8_as_unicode()

uint BLI_str_utf8_as_unicode ( const char *  p)

BLI_str_utf8_as_unicode:

Parameters
pa pointer to Unicode character encoded as UTF-8

Converts a sequence of bytes encoded as UTF-8 to a Unicode character. If p does not point to a valid UTF-8 encoded character, results are undefined. If you are not sure that the bytes are complete valid Unicode characters, you should use g_utf8_get_char_validated() instead.

Return value: the resulting character

Definition at line 533 of file string_utf8.c.

References BLI_UTF8_ERR, Freestyle::c, len, mask(), result, UNLIKELY, UTF8_COMPUTE, and UTF8_GET.

Referenced by BLI_str_partition_ex_utf8(), BLI_str_utf8_char_width(), BLI_str_utf8_char_width_safe(), cursor_delim_type_utf8(), blender::string_search::extract_normalized_words(), blender::string_search::get_fuzzy_match_errors(), insert_text_invoke(), key_event_glyph_or_text(), and text_autocomplete_build().

◆ BLI_str_utf8_as_unicode_and_size()

uint BLI_str_utf8_as_unicode_and_size ( const char *__restrict  p,
size_t *__restrict  index 
)

◆ BLI_str_utf8_as_unicode_and_size_safe()

uint BLI_str_utf8_as_unicode_and_size_safe ( const char *__restrict  p,
size_t *__restrict  index 
)

Definition at line 566 of file string_utf8.c.

References BLI_UTF8_ERR, Freestyle::c, len, mask(), result, UNLIKELY, UTF8_COMPUTE, and UTF8_GET.

Referenced by text_autocomplete_build().

◆ BLI_str_utf8_as_unicode_step()

uint BLI_str_utf8_as_unicode_step ( const char *__restrict  p,
size_t *__restrict  index 
)

◆ BLI_str_utf8_as_utf32()

size_t BLI_str_utf8_as_utf32 ( char32_t *__restrict  dst_w,
const char *__restrict  src_c,
const size_t  maxncpy 
)

◆ BLI_str_utf8_char_width()

int BLI_str_utf8_char_width ( const char *  p)

Definition at line 419 of file string_utf8.c.

References BLI_str_utf8_as_unicode(), BLI_UTF8_ERR, and BLI_wcwidth().

◆ BLI_str_utf8_char_width_safe()

int BLI_str_utf8_char_width_safe ( const char *  p)

◆ BLI_str_utf8_from_unicode()

size_t BLI_str_utf8_from_unicode ( uint  c,
char *  outbuf 
)

BLI_str_utf8_from_unicode:

Parameters
ca Unicode character code
outbufoutput buffer, must have at least 6 bytes of space. If NULL, the length will be computed and returned and nothing will be written to outbuf.

Converts a single character to UTF-8.

Returns
number of bytes written

Definition at line 641 of file string_utf8.c.

References Freestyle::c, and len.

Referenced by BLI_str_utf32_as_utf8(), BLI_str_utf32_as_utf8_len(), BLI_strncpy_wchar_as_utf8(), BLI_wstrlen_utf8(), console_insert_invoke(), find_family_object(), text_insert_invoke(), txt_add_char_intern(), txt_extended_ascii_as_utf8(), and txt_replace_char().

◆ BLI_str_utf8_offset_from_column()

int BLI_str_utf8_offset_from_column ( const char *  str,
int  column 
)

◆ BLI_str_utf8_offset_from_index()

int BLI_str_utf8_offset_from_index ( const char *  str,
int  index 
)

Definition at line 923 of file string_utf8.c.

References BLI_str_utf8_size(), pos, and str.

Referenced by txt_sel_set().

◆ BLI_str_utf8_offset_to_column()

int BLI_str_utf8_offset_to_column ( const char *  str,
int  offset 
)

◆ BLI_str_utf8_offset_to_index()

int BLI_str_utf8_offset_to_index ( const char *  str,
int  offset 
)

Definition at line 913 of file string_utf8.c.

References BLI_str_utf8_size(), pos, and str.

◆ BLI_str_utf8_size()

int BLI_str_utf8_size ( const char *  p)

◆ BLI_str_utf8_size_safe()

int BLI_str_utf8_size_safe ( const char *  p)

◆ BLI_strlen_utf8()

size_t BLI_strlen_utf8 ( const char *  strc)

◆ BLI_strlen_utf8_ex()

size_t BLI_strlen_utf8_ex ( const char *  strc,
size_t *  r_len_bytes 
)

Definition at line 344 of file string_utf8.c.

References BLI_str_utf8_size_safe(), and len.

Referenced by BLI_strlen_utf8(), and txt_add_object().

◆ BLI_strncpy_utf8()

char* BLI_strncpy_utf8 ( char *__restrict  dst,
const char *__restrict  src,
size_t  maxncpy 
)

◆ BLI_strncpy_utf8_rlen()

size_t BLI_strncpy_utf8_rlen ( char *__restrict  dst,
const char *__restrict  src,
size_t  maxncpy 
)

Definition at line 274 of file string_utf8.c.

References BLI_assert, and BLI_STR_UTF8_CPY.

Referenced by BLI_uniquename_cb(), and WM_keymap_item_raw_to_string().

◆ BLI_strncpy_wchar_as_utf8()

size_t BLI_strncpy_wchar_as_utf8 ( char *__restrict  dst,
const wchar_t *__restrict  src,
const size_t  maxncpy 
)

◆ BLI_strncpy_wchar_from_utf8()

size_t BLI_strncpy_wchar_from_utf8 ( wchar_t *__restrict  dst_w,
const char *__restrict  src_c,
const size_t  maxncpy 
)

Definition at line 393 of file string_utf8.c.

References BLI_str_utf8_as_utf32(), and conv_utf_8_to_16().

Referenced by fsmenu_read_system().

◆ BLI_strnlen_utf8()

size_t BLI_strnlen_utf8 ( const char *  strc,
const size_t  maxlen 
)
Parameters
strcthe string to measure the length.
maxlenthe string length (in bytes)
Returns
the unicode length (not in bytes!)

Definition at line 387 of file string_utf8.c.

References BLI_strnlen_utf8_ex().

Referenced by blender::string_search::count_utf8_code_points(), and ui_text_position_to_hidden().

◆ BLI_strnlen_utf8_ex()

size_t BLI_strnlen_utf8_ex ( const char *  strc,
const size_t  maxlen,
size_t *  r_len_bytes 
)

Definition at line 363 of file string_utf8.c.

References BLI_str_utf8_size_safe(), and len.

Referenced by BLI_strnlen_utf8(), and ui_textedit_insert_buf().

◆ BLI_utf8_invalid_byte()

ptrdiff_t BLI_utf8_invalid_byte ( const char *  str,
size_t  length 
)

Find first utf-8 invalid byte in given str, of length bytes.

Returns
the offset of the first invalid byte.

Definition at line 73 of file string_utf8.c.

References Freestyle::c, ELEM, length(), str, and utf8_skip_data.

Referenced by BLI_utf8_invalid_strip(), and txt_extended_ascii_as_utf8().

◆ BLI_utf8_invalid_strip()

int BLI_utf8_invalid_strip ( char *  str,
size_t  length 
)

Remove any invalid utf-8 byte (taking into account multi-bytes sequence of course).

Returns
number of stripped bytes.

Definition at line 203 of file string_utf8.c.

References BLI_assert, BLI_utf8_invalid_byte(), length(), and str.

Referenced by BKE_id_new_name_validate(), id_name_final_build(), outputNumInput(), seq_add_set_name(), TEST(), ui_textedit_copypaste(), and ui_textedit_end().

◆ BLI_wcswidth()

int BLI_wcswidth ( const char32_t *  pwcs,
size_t  n 
)

Definition at line 414 of file string_utf8.c.

◆ BLI_wcwidth()

int BLI_wcwidth ( char32_t  ucs)

◆ BLI_wstrlen_utf8()

size_t BLI_wstrlen_utf8 ( const wchar_t *  src)

Definition at line 333 of file string_utf8.c.

References BLI_str_utf8_from_unicode(), len, and NULL.

Variable Documentation

◆ utf8_skip_data

const size_t utf8_skip_data[256]
static
Initial value:
= {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
}

Definition at line 48 of file string_utf8.c.

Referenced by BLI_utf8_invalid_byte().