Blender  V2.93
msgfmt.c
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * The Original Code is Copyright (C) 2017 by Blender Foundation.
17  * All rights reserved.
18  */
19 
20 /*
21  * Based on C++ version by Sergey Sharybin <sergey.vfx@gmail.com>.
22  * Based on Python script msgfmt.py from Python source code tree, which was written by
23  * Martin v. Löwis <loewis@informatik.hu-berlin.de>
24  *
25  * Generate binary message catalog from textual translation description.
26  *
27  * This program converts a textual Uniform-style message catalog (.po file)
28  * into a binary GNU catalog (.mo file).
29  * This is essentially the same function as the GNU msgfmt program,
30  * however, it is a simpler implementation.
31  *
32  * Usage: msgfmt input.po output.po
33  */
34 
35 #include <stdlib.h>
36 #include <string.h>
37 
38 #include "BLI_dynstr.h"
39 #include "BLI_fileops.h"
40 #include "BLI_ghash.h"
41 #include "BLI_linklist.h"
42 #include "BLI_memarena.h"
43 #include "BLI_utildefines.h"
44 
45 #include "MEM_guardedalloc.h"
46 
47 /* Stupid stub necessary because some BLI files includes winstuff.h, which uses G a bit... */
48 #ifdef WIN32
49 typedef struct Global {
50  void *dummy;
51 } Global;
52 
53 Global G;
54 #endif
55 
56 /* We cannot use NULL char until ultimate step, would give nightmare to our C string processing...
57  * Using one of the UTF-8 invalid bytes (as per our BLI string_utf8.c) */
58 #define NULLSEP_STR "\xff"
59 #define NULLSEP_CHR '\xff'
60 
61 typedef enum {
66 } eSectionType;
67 
68 typedef struct Message {
72 
73  bool is_fuzzy;
75 
76 static char *trim(char *str)
77 {
78  const size_t len = strlen(str);
79  size_t i;
80 
81  if (len == 0) {
82  return str;
83  }
84 
85  for (i = 0; i < len && ELEM(str[0], ' ', '\t', '\r', '\n'); str++, i++) {
86  /* pass */
87  }
88 
89  char *end = &str[len - 1 - i];
90  for (i = len; i > 0 && ELEM(end[0], ' ', '\t', '\r', '\n'); end--, i--) {
91  /* pass */
92  }
93 
94  end[1] = '\0';
95 
96  return str;
97 }
98 
99 static char *unescape(char *str)
100 {
101  char *curr, *next;
102  for (curr = next = str; next[0] != '\0'; curr++, next++) {
103  if (next[0] == '\\') {
104  switch (next[1]) {
105  case '\0':
106  /* Get rid of trailing escape char... */
107  curr--;
108  break;
109  case '\\':
110  *curr = '\\';
111  next++;
112  break;
113  case 'n':
114  *curr = '\n';
115  next++;
116  break;
117  case 't':
118  *curr = '\t';
119  next++;
120  break;
121  default:
122  /* Get rid of useless escape char. */
123  next++;
124  *curr = *next;
125  }
126  }
127  else if (curr != next) {
128  *curr = *next;
129  }
130  }
131  *curr = '\0';
132 
133  if (str[0] == '"' && *(curr - 1) == '"') {
134  *(curr - 1) = '\0';
135  return str + 1;
136  }
137  return str;
138 }
139 
140 static int qsort_str_cmp(const void *a, const void *b)
141 {
142  return strcmp(*(const char **)a, *(const char **)b);
143 }
144 
145 static char **get_keys_sorted(GHash *messages, const uint32_t num_keys)
146 {
147  GHashIterator iter;
148 
149  char **keys = MEM_mallocN(sizeof(*keys) * num_keys, __func__);
150  char **k = keys;
151 
152  GHASH_ITER (iter, messages) {
153  *k = BLI_ghashIterator_getKey(&iter);
154  k++;
155  }
156 
157  qsort(keys, num_keys, sizeof(*keys), qsort_str_cmp);
158 
159  return keys;
160 }
161 
162 BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
163 {
164  size_t i;
165  for (i = 0; i < sizeof(value); i++) {
166  bytes[i] = (char)((value >> ((int)i * 8)) & 0xff);
167  }
168  return i;
169 }
170 
171 BLI_INLINE size_t msg_to_bytes(char *msg, char *bytes, uint32_t size)
172 {
173  /* Note that we also perform replacing of our NULLSEP placeholder by real NULL char... */
174  size_t i;
175  for (i = 0; i < size; i++, msg++, bytes++) {
176  *bytes = (*msg == NULLSEP_CHR) ? '\0' : *msg;
177  }
178  return i;
179 }
180 
181 typedef struct Offset {
184 
185 /* Return the generated binary output. */
186 static char *generate(GHash *messages, size_t *r_output_size)
187 {
188  const uint32_t num_keys = BLI_ghash_len(messages);
189 
190  /* Get list of sorted keys. */
191  char **keys = get_keys_sorted(messages, num_keys);
192  char **vals = MEM_mallocN(sizeof(*vals) * num_keys, __func__);
193  uint32_t tot_keys_len = 0;
194  uint32_t tot_vals_len = 0;
195 
196  Offset *offsets = MEM_mallocN(sizeof(*offsets) * num_keys, __func__);
197 
198  for (int i = 0; i < num_keys; i++) {
199  Offset *off = &offsets[i];
200 
201  vals[i] = BLI_ghash_lookup(messages, keys[i]);
202 
203  /* For each string, we need size and file offset.
204  * Each string is NULL terminated; the NULL does not count into the size. */
205  off->key_offset = tot_keys_len;
206  off->key_len = (uint32_t)strlen(keys[i]);
207  tot_keys_len += off->key_len + 1;
208 
209  off->val_offset = tot_vals_len;
210  off->val_len = (uint32_t)strlen(vals[i]);
211  tot_vals_len += off->val_len + 1;
212  }
213 
214  /* The header is 7 32-bit unsigned integers.
215  * Then comes the keys index table, then the values index table. */
216  const uint32_t idx_keystart = 7 * 4;
217  const uint32_t idx_valstart = idx_keystart + 8 * num_keys;
218  /* We don't use hash tables, so the keys start right after the index tables. */
219  const uint32_t keystart = idx_valstart + 8 * num_keys;
220  /* and the values start after the keys */
221  const uint32_t valstart = keystart + tot_keys_len;
222 
223  /* Final buffer representing the binary MO file. */
224  *r_output_size = valstart + tot_vals_len;
225  char *output = MEM_mallocN(*r_output_size, __func__);
226  char *h = output;
227  char *ik = output + idx_keystart;
228  char *iv = output + idx_valstart;
229  char *k = output + keystart;
230  char *v = output + valstart;
231 
232  h += uint32_to_bytes(0x950412de, h); /* Magic */
233  h += uint32_to_bytes(0x0, h); /* Version */
234  h += uint32_to_bytes(num_keys, h); /* Number of entries */
235  h += uint32_to_bytes(idx_keystart, h); /* Start of key index */
236  h += uint32_to_bytes(idx_valstart, h); /* Start of value index */
237  h += uint32_to_bytes(0, h); /* Size of hash table */
238  h += uint32_to_bytes(0, h); /* Offset of hash table */
239 
240  BLI_assert(h == ik);
241 
242  for (int i = 0; i < num_keys; i++) {
243  Offset *off = &offsets[i];
244 
245  /* The index table first has the list of keys, then the list of values.
246  * Each entry has first the size of the string, then the file offset. */
247  ik += uint32_to_bytes(off->key_len, ik);
248  ik += uint32_to_bytes(off->key_offset + keystart, ik);
249  iv += uint32_to_bytes(off->val_len, iv);
250  iv += uint32_to_bytes(off->val_offset + valstart, iv);
251 
252  k += msg_to_bytes(keys[i], k, off->key_len + 1);
253  v += msg_to_bytes(vals[i], v, off->val_len + 1);
254  }
255 
256  BLI_assert(ik == output + idx_valstart);
257  BLI_assert(iv == output + keystart);
258  BLI_assert(k == output + valstart);
259 
260  MEM_freeN(keys);
261  MEM_freeN(vals);
262  MEM_freeN(offsets);
263 
264  return output;
265 }
266 
267 /* Add a non-fuzzy translation to the dictionary. */
268 static void add(GHash *messages, MemArena *memarena, const Message *msg)
269 {
270  const size_t msgctxt_len = (size_t)BLI_dynstr_get_len(msg->ctxt);
271  const size_t msgid_len = (size_t)BLI_dynstr_get_len(msg->id);
272  const size_t msgstr_len = (size_t)BLI_dynstr_get_len(msg->str);
273  const size_t msgkey_len = msgid_len + ((msgctxt_len == 0) ? 0 : msgctxt_len + 1);
274 
275  if (!msg->is_fuzzy && msgstr_len != 0) {
276  char *msgkey = BLI_memarena_alloc(memarena, sizeof(*msgkey) * (msgkey_len + 1));
277  char *msgstr = BLI_memarena_alloc(memarena, sizeof(*msgstr) * (msgstr_len + 1));
278 
279  if (msgctxt_len != 0) {
280  BLI_dynstr_get_cstring_ex(msg->ctxt, msgkey);
281  msgkey[msgctxt_len] = '\x04'; /* Context/msgid separator */
282  BLI_dynstr_get_cstring_ex(msg->id, &msgkey[msgctxt_len + 1]);
283  }
284  else {
285  BLI_dynstr_get_cstring_ex(msg->id, msgkey);
286  }
287 
288  BLI_dynstr_get_cstring_ex(msg->str, msgstr);
289 
290  BLI_ghash_insert(messages, msgkey, msgstr);
291  }
292 }
293 
294 static void clear(Message *msg)
295 {
296  BLI_dynstr_clear(msg->ctxt);
297  BLI_dynstr_clear(msg->id);
298  BLI_dynstr_clear(msg->str);
299  msg->is_fuzzy = false;
300 }
301 
302 static int make(const char *input_file_name, const char *output_file_name)
303 {
305  MemArena *msgs_memarena = BLI_memarena_new(BLI_MEMARENA_STD_BUFSIZE, __func__);
306 
307  const char *msgctxt_kw = "msgctxt";
308  const char *msgid_kw = "msgid";
309  const char *msgid_plural_kw = "msgid_plural";
310  const char *msgstr_kw = "msgstr";
311  const size_t msgctxt_len = strlen(msgctxt_kw);
312  const size_t msgid_len = strlen(msgid_kw);
313  const size_t msgid_plural_len = strlen(msgid_plural_kw);
314  const size_t msgstr_len = strlen(msgstr_kw);
315 
316  /* Note: For now, we assume file encoding is always utf-8. */
317 
318  eSectionType section = SECTION_NONE;
319  bool is_plural = false;
320 
321  Message msg = {
323  .id = BLI_dynstr_new_memarena(),
324  .str = BLI_dynstr_new_memarena(),
325  .is_fuzzy = false,
326  };
327 
328  LinkNode *input_file_lines = BLI_file_read_as_lines(input_file_name);
329  LinkNode *ifl = input_file_lines;
330 
331  /* Parse the catalog. */
332  for (int lno = 1; ifl; ifl = ifl->next, lno++) {
333  char *l = ifl->link;
334  const bool is_comment = (l[0] == '#');
335  /* If we get a comment line after a msgstr, this is a new entry. */
336  if (is_comment) {
337  if (section == SECTION_STR) {
338  add(messages, msgs_memarena, &msg);
339  clear(&msg);
340  section = SECTION_NONE;
341  }
342  /* Record a fuzzy mark. */
343  if (l[1] == ',' && strstr(l, "fuzzy") != NULL) {
344  msg.is_fuzzy = true;
345  }
346  /* Skip comments */
347  continue;
348  }
349  if (strstr(l, msgctxt_kw) == l) {
350  if (section == SECTION_STR) {
351  /* New message, output previous section. */
352  add(messages, msgs_memarena, &msg);
353  }
354  if (!ELEM(section, SECTION_NONE, SECTION_STR)) {
355  printf("msgctxt not at start of new message on %s:%d\n", input_file_name, lno);
356  return EXIT_FAILURE;
357  }
358  section = SECTION_CTX;
359  l = l + msgctxt_len;
360  clear(&msg);
361  }
362  else if (strstr(l, msgid_plural_kw) == l) {
363  /* This is a message with plural forms. */
364  if (section != SECTION_ID) {
365  printf("msgid_plural not preceded by msgid on %s:%d\n", input_file_name, lno);
366  return EXIT_FAILURE;
367  }
368  l = l + msgid_plural_len;
369  BLI_dynstr_append(msg.id, NULLSEP_STR); /* separator of singular and plural */
370  is_plural = true;
371  }
372  else if (strstr(l, msgid_kw) == l) {
373  if (section == SECTION_STR) {
374  add(messages, msgs_memarena, &msg);
375  }
376  if (section != SECTION_CTX) {
377  clear(&msg);
378  }
379  section = SECTION_ID;
380  l = l + msgid_len;
381  is_plural = false;
382  }
383  else if (strstr(l, msgstr_kw) == l) {
384  l = l + msgstr_len;
385  // Now we are in a msgstr section
386  section = SECTION_STR;
387  if (l[0] == '[') {
388  if (!is_plural) {
389  printf("plural without msgid_plural on %s:%d\n", input_file_name, lno);
390  return EXIT_FAILURE;
391  }
392  if ((l = strchr(l, ']')) == NULL) {
393  printf("Syntax error on %s:%d\n", input_file_name, lno);
394  return EXIT_FAILURE;
395  }
396  if (BLI_dynstr_get_len(msg.str) != 0) {
397  BLI_dynstr_append(msg.str, NULLSEP_STR); /* Separator of the various plural forms. */
398  }
399  }
400  else {
401  if (is_plural) {
402  printf("indexed msgstr required for plural on %s:%d\n", input_file_name, lno);
403  return EXIT_FAILURE;
404  }
405  }
406  }
407  /* Skip empty lines. */
408  l = trim(l);
409  if (l[0] == '\0') {
410  if (section == SECTION_STR) {
411  add(messages, msgs_memarena, &msg);
412  clear(&msg);
413  }
414  section = SECTION_NONE;
415  continue;
416  }
417  l = unescape(l);
418  if (section == SECTION_CTX) {
419  BLI_dynstr_append(msg.ctxt, l);
420  }
421  else if (section == SECTION_ID) {
422  BLI_dynstr_append(msg.id, l);
423  }
424  else if (section == SECTION_STR) {
425  BLI_dynstr_append(msg.str, l);
426  }
427  else {
428  printf("Syntax error on %s:%d\n", input_file_name, lno);
429  return EXIT_FAILURE;
430  }
431  }
432  /* Add last entry */
433  if (section == SECTION_STR) {
434  add(messages, msgs_memarena, &msg);
435  }
436 
437  BLI_dynstr_free(msg.ctxt);
438  BLI_dynstr_free(msg.id);
439  BLI_dynstr_free(msg.str);
440  BLI_file_free_lines(input_file_lines);
441 
442  /* Compute output */
443  size_t output_size;
444  char *output = generate(messages, &output_size);
445 
446  FILE *fp = BLI_fopen(output_file_name, "wb");
447  fwrite(output, 1, output_size, fp);
448  fclose(fp);
449 
450  MEM_freeN(output);
451  BLI_ghash_free(messages, NULL, NULL);
452  BLI_memarena_free(msgs_memarena);
453 
454  return EXIT_SUCCESS;
455 }
456 
457 int main(int argc, char **argv)
458 {
459  if (argc != 3) {
460  printf("Usage: %s <input.po> <output.mo>\n", argv[0]);
461  return EXIT_FAILURE;
462  }
463  const char *input_file = argv[1];
464  const char *output_file = argv[2];
465 
466  return make(input_file, output_file);
467 }
struct Global Global
#define BLI_assert(a)
Definition: BLI_assert.h:58
#define BLI_INLINE
A dynamically sized string ADT.
int BLI_dynstr_get_len(DynStr *ds) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition: BLI_dynstr.c:286
void BLI_dynstr_clear(DynStr *ds) ATTR_NONNULL()
Definition: BLI_dynstr.c:335
void BLI_dynstr_free(DynStr *ds) ATTR_NONNULL()
Definition: BLI_dynstr.c:358
DynStr * BLI_dynstr_new_memarena(void) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
Definition: BLI_dynstr.c:86
void BLI_dynstr_get_cstring_ex(DynStr *__restrict ds, char *__restrict rets) ATTR_NONNULL()
Definition: BLI_dynstr.c:299
void BLI_dynstr_append(DynStr *__restrict ds, const char *cstr) ATTR_NONNULL()
Definition: BLI_dynstr.c:107
File and directory operations.
void BLI_file_free_lines(struct LinkNode *lines)
Definition: storage.c:639
struct LinkNode * BLI_file_read_as_lines(const char *file) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition: storage.c:590
FILE * BLI_fopen(const char *filename, const char *mode) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL()
Definition: fileops.c:1003
bool BLI_ghashutil_strcmp(const void *a, const void *b)
BLI_INLINE void * BLI_ghashIterator_getKey(GHashIterator *ghi) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.h:146
#define GHASH_ITER(gh_iter_, ghash_)
Definition: BLI_ghash.h:169
GHash * BLI_ghash_new(GHashHashFP hashfp, GHashCmpFP cmpfp, const char *info) ATTR_MALLOC ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:718
unsigned int BLI_ghash_len(GHash *gh) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:744
unsigned int BLI_ghashutil_strhash_p_murmur(const void *ptr)
void BLI_ghash_insert(GHash *gh, void *key, void *val)
Definition: BLI_ghash.c:756
void BLI_ghash_free(GHash *gh, GHashKeyFreeFP keyfreefp, GHashValFreeFP valfreefp)
Definition: BLI_ghash.c:1008
void * BLI_ghash_lookup(GHash *gh, const void *key) ATTR_WARN_UNUSED_RESULT
Definition: BLI_ghash.c:803
void BLI_memarena_free(struct MemArena *ma) ATTR_NONNULL(1)
Definition: BLI_memarena.c:109
#define BLI_MEMARENA_STD_BUFSIZE
Definition: BLI_memarena.h:36
void * BLI_memarena_alloc(struct MemArena *ma, size_t size) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1) ATTR_MALLOC ATTR_ALLOC_SIZE(2)
Definition: BLI_memarena.c:131
struct MemArena * BLI_memarena_new(const size_t bufsize, const char *name) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(2) ATTR_MALLOC
Definition: BLI_memarena.c:79
#define ELEM(...)
Read Guarded memory(de)allocation.
ATTR_WARN_UNUSED_RESULT const BMLoop * l
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
#define output
#define str(s)
void(* MEM_freeN)(void *vmemh)
Definition: mallocn.c:41
void *(* MEM_mallocN)(size_t len, const char *str)
Definition: mallocn.c:47
static ulong * next
static char * generate(GHash *messages, size_t *r_output_size)
Definition: msgfmt.c:186
struct Offset Offset
BLI_INLINE size_t uint32_to_bytes(const int value, char *bytes)
Definition: msgfmt.c:162
#define NULLSEP_STR
Definition: msgfmt.c:58
static int make(const char *input_file_name, const char *output_file_name)
Definition: msgfmt.c:302
int main(int argc, char **argv)
Definition: msgfmt.c:457
static char * unescape(char *str)
Definition: msgfmt.c:99
eSectionType
Definition: msgfmt.c:61
@ SECTION_CTX
Definition: msgfmt.c:63
@ SECTION_STR
Definition: msgfmt.c:65
@ SECTION_NONE
Definition: msgfmt.c:62
@ SECTION_ID
Definition: msgfmt.c:64
static void clear(Message *msg)
Definition: msgfmt.c:294
static char ** get_keys_sorted(GHash *messages, const uint32_t num_keys)
Definition: msgfmt.c:145
struct Message Message
static int qsort_str_cmp(const void *a, const void *b)
Definition: msgfmt.c:140
static char * trim(char *str)
Definition: msgfmt.c:76
static void add(GHash *messages, MemArena *memarena, const Message *msg)
Definition: msgfmt.c:268
#define NULLSEP_CHR
Definition: msgfmt.c:59
BLI_INLINE size_t msg_to_bytes(char *msg, char *bytes, uint32_t size)
Definition: msgfmt.c:171
static unsigned a[3]
Definition: RandGen.cpp:92
unsigned int uint32_t
Definition: stdint.h:83
void * link
Definition: BLI_linklist.h:40
struct LinkNode * next
Definition: BLI_linklist.h:39
Definition: msgfmt.c:68
DynStr * id
Definition: msgfmt.c:70
DynStr * str
Definition: msgfmt.c:71
bool is_fuzzy
Definition: msgfmt.c:73
DynStr * ctxt
Definition: msgfmt.c:69
Definition: msgfmt.c:181
uint32_t key_len
Definition: msgfmt.c:182
uint32_t val_offset
Definition: msgfmt.c:182
uint32_t val_len
Definition: msgfmt.c:182
uint32_t key_offset
Definition: msgfmt.c:182
#define G(x, y, z)
uint len