Blender  V2.93
utfconv.c
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * The Original Code is Copyright (C) 2012 Blender Foundation.
17  * All rights reserved.
18  *
19  */
20 
21 #include "utfconv.h"
22 
23 size_t count_utf_8_from_16(const wchar_t *string16)
24 {
25  int i;
26  size_t count = 0;
27  wchar_t u = 0;
28  if (!string16) {
29  return 0;
30  }
31 
32  for (i = 0; (u = string16[i]); i++) {
33  if (u < 0x0080) {
34  count += 1;
35  }
36  else {
37  if (u < 0x0800) {
38  count += 2;
39  }
40  else {
41  if (u < 0xD800) {
42  count += 3;
43  }
44  else {
45  if (u < 0xDC00) {
46  i++;
47  if ((u = string16[i]) == 0) {
48  break;
49  }
50  if (u >= 0xDC00 && u < 0xE000) {
51  count += 4;
52  }
53  }
54  else {
55  if (u < 0xE000) {
56  /*illigal*/;
57  }
58  else {
59  count += 3;
60  }
61  }
62  }
63  }
64  }
65  }
66 
67  return ++count;
68 }
69 
70 size_t count_utf_16_from_8(const char *string8)
71 {
72  size_t count = 0;
73  char u;
74  char type = 0;
75  unsigned int u32 = 0;
76 
77  if (!string8)
78  return 0;
79 
80  for (; (u = *string8); string8++) {
81  if (type == 0) {
82  if ((u & 0x01 << 7) == 0) {
83  count++;
84  u32 = 0;
85  continue;
86  } // 1 utf-8 char
87  if ((u & 0x07 << 5) == 0xC0) {
88  type = 1;
89  u32 = u & 0x1F;
90  continue;
91  } // 2 utf-8 char
92  if ((u & 0x0F << 4) == 0xE0) {
93  type = 2;
94  u32 = u & 0x0F;
95  continue;
96  } // 3 utf-8 char
97  if ((u & 0x1F << 3) == 0xF0) {
98  type = 3;
99  u32 = u & 0x07;
100  continue;
101  } // 4 utf-8 char
102  continue;
103  }
104  else {
105  if ((u & 0xC0) == 0x80) {
106  u32 = (u32 << 6) | (u & 0x3F);
107  type--;
108  }
109  else {
110  u32 = 0;
111  type = 0;
112  }
113  }
114 
115  if (type == 0) {
116  if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000))
117  count++;
118  else if (0x10000 <= u32 && u32 < 0x110000)
119  count += 2;
120  u32 = 0;
121  }
122  }
123 
124  return ++count;
125 }
126 
127 int conv_utf_16_to_8(const wchar_t *in16, char *out8, size_t size8)
128 {
129  char *out8end = out8 + size8;
130  wchar_t u = 0;
131  int err = 0;
132  if (!size8 || !in16 || !out8)
133  return UTF_ERROR_NULL_IN;
134  out8end--;
135 
136  for (; out8 < out8end && (u = *in16); in16++, out8++) {
137  if (u < 0x0080) {
138  *out8 = u;
139  }
140  else if (u < 0x0800) {
141  if (out8 + 1 >= out8end)
142  break;
143  *out8++ = (0x3 << 6) | (0x1F & (u >> 6));
144  *out8 = (0x1 << 7) | (0x3F & (u));
145  }
146  else if (u < 0xD800 || u >= 0xE000) {
147  if (out8 + 2 >= out8end)
148  break;
149  *out8++ = (0x7 << 5) | (0xF & (u >> 12));
150  *out8++ = (0x1 << 7) | (0x3F & (u >> 6));
151  *out8 = (0x1 << 7) | (0x3F & (u));
152  }
153  else if (u < 0xDC00) {
154  wchar_t u2 = *++in16;
155 
156  if (!u2)
157  break;
158  if (u2 >= 0xDC00 && u2 < 0xE000) {
159  if (out8 + 3 >= out8end)
160  break;
161  else {
162  unsigned int uc = 0x10000 + (u2 - 0xDC00) + ((u - 0xD800) << 10);
163 
164  *out8++ = (0xF << 4) | (0x7 & (uc >> 18));
165  *out8++ = (0x1 << 7) | (0x3F & (uc >> 12));
166  *out8++ = (0x1 << 7) | (0x3F & (uc >> 6));
167  *out8 = (0x1 << 7) | (0x3F & (uc));
168  }
169  }
170  else {
171  out8--;
173  }
174  }
175  else if (u < 0xE000) {
176  out8--;
178  }
179  }
180 
181  *out8 = *out8end = 0;
182 
183  if (*in16)
184  err |= UTF_ERROR_SMALL;
185 
186  return err;
187 }
188 
189 int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
190 {
191  char u;
192  char type = 0;
193  unsigned int u32 = 0;
194  wchar_t *out16end = out16 + size16;
195  int err = 0;
196  if (!size16 || !in8 || !out16)
197  return UTF_ERROR_NULL_IN;
198  out16end--;
199 
200  for (; out16 < out16end && (u = *in8); in8++) {
201  if (type == 0) {
202  if ((u & 0x01 << 7) == 0) {
203  *out16 = u;
204  out16++;
205  u32 = 0;
206  continue;
207  } // 1 utf-8 char
208  if ((u & 0x07 << 5) == 0xC0) {
209  type = 1;
210  u32 = u & 0x1F;
211  continue;
212  } // 2 utf-8 char
213  if ((u & 0x0F << 4) == 0xE0) {
214  type = 2;
215  u32 = u & 0x0F;
216  continue;
217  } // 3 utf-8 char
218  if ((u & 0x1F << 3) == 0xF0) {
219  type = 3;
220  u32 = u & 0x07;
221  continue;
222  } // 4 utf-8 char
224  continue;
225  }
226  else {
227  if ((u & 0xC0) == 0x80) {
228  u32 = (u32 << 6) | (u & 0x3F);
229  type--;
230  }
231  else {
232  u32 = 0;
233  type = 0;
235  }
236  }
237  if (type == 0) {
238  if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
239  *out16 = u32;
240  out16++;
241  }
242  else if (0x10000 <= u32 && u32 < 0x110000) {
243  if (out16 + 1 >= out16end)
244  break;
245  u32 -= 0x10000;
246  *out16 = 0xD800 + (u32 >> 10);
247  out16++;
248  *out16 = 0xDC00 + (u32 & 0x3FF);
249  out16++;
250  }
251  u32 = 0;
252  }
253  }
254 
255  *out16 = *out16end = 0;
256 
257  if (*in8)
258  err |= UTF_ERROR_SMALL;
259 
260  return err;
261 }
262 
263 /* UNUSED FUNCTIONS */
264 #if 0
265 static int is_ascii(const char *in8)
266 {
267  for (; *in8; in8++)
268  if (0x80 & *in8)
269  return 0;
270 
271  return 1;
272 }
273 
274 static void utf_8_cut_end(char *inout8, size_t maxcutpoint)
275 {
276  char *cur = inout8 + maxcutpoint;
277  char cc;
278  if (!inout8)
279  return;
280 
281  cc = *cur;
282 }
283 #endif
284 
285 char *alloc_utf_8_from_16(const wchar_t *in16, size_t add)
286 {
287  size_t bsize = count_utf_8_from_16(in16);
288  char *out8 = NULL;
289  if (!bsize)
290  return NULL;
291  out8 = (char *)malloc(sizeof(char) * (bsize + add));
292  conv_utf_16_to_8(in16, out8, bsize);
293  return out8;
294 }
295 
296 wchar_t *alloc_utf16_from_8(const char *in8, size_t add)
297 {
298  size_t bsize = count_utf_16_from_8(in8);
299  wchar_t *out16 = NULL;
300  if (!bsize)
301  return NULL;
302  out16 = (wchar_t *)malloc(sizeof(wchar_t) * (bsize + add));
303  conv_utf_8_to_16(in8, out16, bsize);
304  return out16;
305 }
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble u2
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum type
static FT_Error err
Definition: freetypefont.c:52
int count
static void add(GHash *messages, MemArena *memarena, const Message *msg)
Definition: msgfmt.c:268
size_t count_utf_8_from_16(const wchar_t *string16)
Definition: utfconv.c:23
wchar_t * alloc_utf16_from_8(const char *in8, size_t add)
Definition: utfconv.c:296
int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
Definition: utfconv.c:189
char * alloc_utf_8_from_16(const wchar_t *in16, size_t add)
Definition: utfconv.c:285
int conv_utf_16_to_8(const wchar_t *in16, char *out8, size_t size8)
Definition: utfconv.c:127
size_t count_utf_16_from_8(const char *string8)
Definition: utfconv.c:70
#define UTF_ERROR_ILLSEQ
Definition: utfconv.h:57
#define UTF_ERROR_ILLCHAR
Definition: utfconv.h:53
#define UTF_ERROR_NULL_IN
Definition: utfconv.h:51
#define UTF_ERROR_SMALL
Definition: utfconv.h:55