Blender  V2.93
string_cursor_utf8.c
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * The Original Code is Copyright (C) 2011 Blender Foundation.
17  * All rights reserved.
18  */
19 
24 #include <stdio.h>
25 #include <stdlib.h>
26 
27 #include "BLI_string_utf8.h"
28 #include "BLI_utildefines.h"
29 
30 #include "BLI_string_cursor_utf8.h" /* own include */
31 
32 #ifdef __GNUC__
33 # pragma GCC diagnostic error "-Wsign-conversion"
34 #endif
35 
36 typedef enum eStrCursorDelimType {
46 
48 {
49  switch (uch) {
50  case ',':
51  case '.':
52  return STRCUR_DELIM_PUNCT;
53 
54  case '{':
55  case '}':
56  case '[':
57  case ']':
58  case '(':
59  case ')':
60  return STRCUR_DELIM_BRACE;
61 
62  case '+':
63  case '-':
64  case '=':
65  case '~':
66  case '%':
67  case '/':
68  case '<':
69  case '>':
70  case '^':
71  case '*':
72  case '&':
73  case '|':
74  return STRCUR_DELIM_OPERATOR;
75 
76  case '\'':
77  case '\"':
78  return STRCUR_DELIM_QUOTE;
79 
80  case ' ':
81  case '\t':
82  case '\n':
84 
85  case '\\':
86  case '@':
87  case '#':
88  case '$':
89  case ':':
90  case ';':
91  case '?':
92  case '!':
93  case 0xA3: /* pound */
94  case 0x80: /* euro */
95  /* case '_': */ /* special case, for python */
96  return STRCUR_DELIM_OTHER;
97 
98  default:
99  break;
100  }
101  return STRCUR_DELIM_ALPHANUMERIC; /* Not quite true, but ok for now */
102 }
103 
104 static eStrCursorDelimType cursor_delim_type_utf8(const char *ch_utf8)
105 {
106  /* for full unicode support we really need to have large lookup tables to figure
107  * out what's what in every possible char set - and python, glib both have these. */
108  uint uch = BLI_str_utf8_as_unicode(ch_utf8);
109  return cursor_delim_type_unicode(uch);
110 }
111 
112 bool BLI_str_cursor_step_next_utf8(const char *str, size_t maxlen, int *pos)
113 {
114  const char *str_end = str + (maxlen + 1);
115  const char *str_pos = str + (*pos);
116  const char *str_next = BLI_str_find_next_char_utf8(str_pos, str_end);
117  if (str_next) {
118  (*pos) += (str_next - str_pos);
119  if ((*pos) > (int)maxlen) {
120  (*pos) = (int)maxlen;
121  }
122  return true;
123  }
124 
125  return false;
126 }
127 
128 bool BLI_str_cursor_step_prev_utf8(const char *str, size_t UNUSED(maxlen), int *pos)
129 {
130  if ((*pos) > 0) {
131  const char *str_pos = str + (*pos);
132  const char *str_prev = BLI_str_find_prev_char_utf8(str, str_pos);
133  if (str_prev) {
134  (*pos) -= (str_pos - str_prev);
135  return true;
136  }
137  }
138 
139  return false;
140 }
141 
142 void BLI_str_cursor_step_utf8(const char *str,
143  size_t maxlen,
144  int *pos,
145  eStrCursorJumpDirection direction,
147  bool use_init_step)
148 {
149  const int pos_orig = *pos;
150 
151  if (direction == STRCUR_DIR_NEXT) {
152  if (use_init_step) {
154  }
155  else {
157  }
158 
159  if (jump != STRCUR_JUMP_NONE) {
160  const eStrCursorDelimType delim_type = (*pos) < maxlen ? cursor_delim_type_utf8(&str[*pos]) :
162  /* jump between special characters (/,\,_,-, etc.),
163  * look at function cursor_delim_type() for complete
164  * list of special character, ctr -> */
165  while ((*pos) < maxlen) {
166  if (BLI_str_cursor_step_next_utf8(str, maxlen, pos)) {
167  if ((jump != STRCUR_JUMP_ALL) && (delim_type != cursor_delim_type_utf8(&str[*pos]))) {
168  break;
169  }
170  }
171  else {
172  break; /* unlikely but just in case */
173  }
174  }
175  }
176  }
177  else if (direction == STRCUR_DIR_PREV) {
178  if (use_init_step) {
180  }
181  else {
183  }
184 
185  if (jump != STRCUR_JUMP_NONE) {
186  const eStrCursorDelimType delim_type = (*pos) > 0 ?
187  cursor_delim_type_utf8(&str[(*pos) - 1]) :
189  /* jump between special characters (/,\,_,-, etc.),
190  * look at function cursor_delim_type() for complete
191  * list of special character, ctr -> */
192  while ((*pos) > 0) {
193  const int pos_prev = *pos;
194  if (BLI_str_cursor_step_prev_utf8(str, maxlen, pos)) {
195  if ((jump != STRCUR_JUMP_ALL) && (delim_type != cursor_delim_type_utf8(&str[*pos]))) {
196  /* left only: compensate for index/change in direction */
197  if ((pos_orig - (*pos)) >= 1) {
198  *pos = pos_prev;
199  }
200  break;
201  }
202  }
203  else {
204  break;
205  }
206  }
207  }
208  }
209  else {
210  BLI_assert(0);
211  }
212 }
213 
214 /* UTF32 version of BLI_str_cursor_step_utf8 (keep in sync!)
215  * less complex since it doesn't need to do multi-byte stepping.
216  */
217 
218 /* helper funcs so we can match BLI_str_cursor_step_utf8 */
219 static bool cursor_step_next_utf32(const char32_t *UNUSED(str), size_t maxlen, int *pos)
220 {
221  if ((*pos) >= (int)maxlen) {
222  return false;
223  }
224  (*pos)++;
225  return true;
226 }
227 
228 static bool cursor_step_prev_utf32(const char32_t *UNUSED(str), size_t UNUSED(maxlen), int *pos)
229 {
230  if ((*pos) <= 0) {
231  return false;
232  }
233  (*pos)--;
234  return true;
235 }
236 
237 void BLI_str_cursor_step_utf32(const char32_t *str,
238  size_t maxlen,
239  int *pos,
240  eStrCursorJumpDirection direction,
242  bool use_init_step)
243 {
244  const int pos_orig = *pos;
245 
246  if (direction == STRCUR_DIR_NEXT) {
247  if (use_init_step) {
248  cursor_step_next_utf32(str, maxlen, pos);
249  }
250  else {
252  }
253 
254  if (jump != STRCUR_JUMP_NONE) {
255  const eStrCursorDelimType delim_type = (*pos) < maxlen ?
258  /* jump between special characters (/,\,_,-, etc.),
259  * look at function cursor_delim_type_unicode() for complete
260  * list of special character, ctr -> */
261  while ((*pos) < maxlen) {
262  if (cursor_step_next_utf32(str, maxlen, pos)) {
263  if ((jump != STRCUR_JUMP_ALL) &&
264  (delim_type != cursor_delim_type_unicode((uint)str[*pos]))) {
265  break;
266  }
267  }
268  else {
269  break; /* unlikely but just in case */
270  }
271  }
272  }
273  }
274  else if (direction == STRCUR_DIR_PREV) {
275  if (use_init_step) {
276  cursor_step_prev_utf32(str, maxlen, pos);
277  }
278  else {
280  }
281 
282  if (jump != STRCUR_JUMP_NONE) {
283  const eStrCursorDelimType delim_type = (*pos) > 0 ?
286  /* jump between special characters (/,\,_,-, etc.),
287  * look at function cursor_delim_type() for complete
288  * list of special character, ctr -> */
289  while ((*pos) > 0) {
290  const int pos_prev = *pos;
291  if (cursor_step_prev_utf32(str, maxlen, pos)) {
292  if ((jump != STRCUR_JUMP_ALL) &&
293  (delim_type != cursor_delim_type_unicode((uint)str[*pos]))) {
294  /* left only: compensate for index/change in direction */
295  if ((pos_orig - (*pos)) >= 1) {
296  *pos = pos_prev;
297  }
298  break;
299  }
300  }
301  else {
302  break;
303  }
304  }
305  }
306  }
307  else {
308  BLI_assert(0);
309  }
310 }
#define BLI_assert(a)
Definition: BLI_assert.h:58
eStrCursorJumpDirection
@ STRCUR_DIR_NEXT
@ STRCUR_DIR_PREV
eStrCursorJumpType
@ STRCUR_JUMP_ALL
@ STRCUR_JUMP_NONE
@ STRCUR_JUMP_DELIM
char * BLI_str_find_prev_char_utf8(const char *str, const char *p) ATTR_NONNULL()
Definition: string_utf8.c:782
char * BLI_str_find_next_char_utf8(const char *p, const char *end) ATTR_NONNULL(1)
Definition: string_utf8.c:807
unsigned int BLI_str_utf8_as_unicode(const char *p) ATTR_NONNULL()
Definition: string_utf8.c:533
unsigned int uint
Definition: BLI_sys_types.h:83
#define UNUSED(x)
void jump(const btVector3 &v=btVector3(0, 0, 0))
#define str(s)
uint pos
void BLI_str_cursor_step_utf8(const char *str, size_t maxlen, int *pos, eStrCursorJumpDirection direction, eStrCursorJumpType jump, bool use_init_step)
static eStrCursorDelimType cursor_delim_type_unicode(const uint uch)
static bool cursor_step_prev_utf32(const char32_t *UNUSED(str), size_t UNUSED(maxlen), int *pos)
bool BLI_str_cursor_step_next_utf8(const char *str, size_t maxlen, int *pos)
eStrCursorDelimType
@ STRCUR_DELIM_OTHER
@ STRCUR_DELIM_BRACE
@ STRCUR_DELIM_QUOTE
@ STRCUR_DELIM_WHITESPACE
@ STRCUR_DELIM_ALPHANUMERIC
@ STRCUR_DELIM_PUNCT
@ STRCUR_DELIM_NONE
@ STRCUR_DELIM_OPERATOR
void BLI_str_cursor_step_utf32(const char32_t *str, size_t maxlen, int *pos, eStrCursorJumpDirection direction, eStrCursorJumpType jump, bool use_init_step)
static bool cursor_step_next_utf32(const char32_t *UNUSED(str), size_t maxlen, int *pos)
static eStrCursorDelimType cursor_delim_type_utf8(const char *ch_utf8)
bool BLI_str_cursor_step_prev_utf8(const char *str, size_t UNUSED(maxlen), int *pos)