vul_string.cxx
Go to the documentation of this file.
1 // This is core/vul/vul_string.cxx
2 #include <cstdlib>
3 #include <cstring>
4 #include <cctype>
5 #include <algorithm>
6 #include <sstream>
7 #include <cmath>
8 #include "vul_string.h"
9 //:
10 // \file
11 
12 #include <cassert>
13 #ifdef _MSC_VER
14 # include <vcl_msvc_warnings.h>
15 #endif
16 #include <vul/vul_reg_exp.h>
17 #include <vul/vul_sprintf.h>
18 
19 #ifndef END_OF_STRING // If END_OF_STRING not defined
20 #define END_OF_STRING (0)
21 #endif
22 
23 // Converts all alphabetical characters to uppercase.
24 char* vul_string_c_upcase(char* s) // Convert entire string to upper case
25 {
26  char* p = s; // Point to beginning of string
27  while (*p) { // While there are still valid characters
28  if (std::islower(*p)) // if this is lower case
29  *p = (char)std::toupper(*p);// convert to uppercase
30  p++; // Advance pointer
31  }
32  return s; // Return reference to modified string
33 }
34 
35 // Converts all alphabetical characters to lowercase.
36 char* vul_string_c_downcase(char* s) // Convert entire string to lower case
37 {
38  char* p = s; // Point to beginning of string
39  while (*p) { // While there are still valid characters
40  if (std::isupper(*p)) // if this is upper case
41  *p = (char)std::tolower(*p);// convert to lowercase
42  p++; // Advance pointer
43  }
44  return s; // Return reference to modified string
45 }
46 
47 // Capitalizes all words in a string. A word is defined as
48 // a sequence of characters separated by non-alphanumerics.
49 char* vul_string_c_capitalize(char* s) // Capitalize each word in string
50 {
51  char* p = s; // Point to beginning of string
52  while (true) { // Infinite loop
53  for (; *p && !std::isalnum(*p); p++) ;// Skip to first alphanumeric
54  if (*p == END_OF_STRING) // If end of string
55  return s; // Return string
56  *p = (char)std::toupper(*p); // Convert character
57  while (*++p && std::isalnum(*p)) ; // Search for next word
58  }
59 }
60 
61 // Removes any occurrence of the string rem from string str,
62 // and returns the modified string str.
63 char* vul_string_c_trim(char* str, const char* rem) // Trim characters from string
64 {
65  char* s = str;
66  char* result = str;
67  char c;
68  while ((c=*s++) != END_OF_STRING) {
69  const char* r = rem;
70  char t;
71  while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
72  if (t == END_OF_STRING) // If no match found
73  *result++ = c;
74  }
75  *result = END_OF_STRING; // NULL terminate string
76  return str; // Return pointer to string
77 }
78 
79 // Removes any prefix occurrence of the string rem from
80 // the first string str, and returns the modified string str.
81 char* vul_string_c_left_trim(char* str, const char* rem) // Trim prefix from string
82 {
83  char* result = str;
84  char* s;
85  char c;
86  for (s=str; (c=*s) != END_OF_STRING; s++) {
87  const char* r = rem;
88  char t;
89  while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
90  if (t == END_OF_STRING) // If no match found
91  break;
92  }
93  if (s != result) // when characters trimed
94  while ((*result++ = *s++) != END_OF_STRING) ; // shift string down
95  return str; // Return pointer to string
96 }
97 
98 // Removes any suffix occurrence of the string rem
99 // from the first string str, and returns the modified string str.
100 char* vul_string_c_right_trim(char* str, const char* rem) // Trim suffix from string
101 {
102  char* s = str + std::strlen(str) - 1; // last character of str
103  for (; s >= str; s--) {
104  const char* r = rem;
105  char t;
106  char c = *s;
107  while ((t=*r++) != END_OF_STRING && t != c) ; // Scan for match
108  if (t == END_OF_STRING) // If no match found
109  break;
110  }
111  *(s+1) = END_OF_STRING;
112  return str; // Return pointer to string
113 }
114 
115 // Reverses the order of the characters in char*.
116 char* vul_string_c_reverse(char* c) // Reverse the order of characters
117 {
118  int length = (int)std::strlen(c); // Number of characters in string
119  char temp;
120 
121  for (int i = 0, j = length-1; // Counting from front and rear
122  i < j; ++i, --j) // until we reach the middle
123  {
124  temp = c[i]; // Save front character
125  c[i] = c[j]; // Switch with rear character
126  c[j] = temp; // Copy new rear character
127  }
128  return c;
129 }
130 
131 // Reverses the order of the characters in string
132 std::string& vul_string_reverse(std::string& s)
133 {
134  for (int i=0, j=(int)std::strlen(s.c_str())-1; i<j; ++i,--j)
135  {
136  char c = s[i]; s[i] = s[j]; s[j] = c;
137  }
138  return s;
139 }
140 
141 // In some implementations of <cctype>, toupper and tolower are macros
142 // instead of functions. In that case, they cannot be passed as 4th argument
143 // to std::transform. Hence it's easier to "inline" std::transform here,
144 // instead of using it explicitly. - PVr.
145 
146 // Converts all alphabetical characters in string s to uppercase.
147 std::string& vul_string_upcase(std::string& s)
148 {
149  for (char & i : s)
150  i = (char)std::toupper(i);
151  return s;
152 }
153 
154 // Converts all alphabetical characters in string s to lowercase.
155 std::string& vul_string_downcase(std::string& s)
156 {
157  for (char & i : s)
158  i = (char)std::tolower(i);
159  return s;
160 }
161 
162 // Capitalizes all words in string s.
163 std::string& vul_string_capitalize(std::string& s)
164 {
165  // Word beginnings are defined as the transition from
166  // non-alphanumeric to alphanumeric, and word endings as the reverse
167  // transition.
168  std::string::iterator si;
169  bool in_word = false;
170  for ( si = s.begin(); si != s.end(); ++si ) {
171  if ( !in_word && std::isalnum( *si ) ) {
172  *si = (char)std::toupper( *si );
173  in_word = true;
174  }
175  else if ( in_word && !std::isalnum( *si ) ) {
176  in_word = false;
177  }
178  }
179  return s;
180 }
181 
182 // Removes any occurrence of the character string rem
183 // from the string sr, and returns the modified string sr.
184 std::string& vul_string_trim(std::string& sr, const char* rem)
185 {
186  int l = (int)std::strlen(rem);
187  for (;;) {
188  std::string::size_type loc = sr.find(rem);
189  if (loc == std::string::npos)
190  break;
191  sr.erase(loc, l);
192  }
193  return sr;
194 }
195 
196 // Removes any prefix occurrence of the character string rem
197 // from the string sr, and returns the modified string sr.
198 std::string& vul_string_left_trim(std::string& sr, const char* rem)
199 {
200  int l = (int)std::strlen(rem);
201  if (std::strncmp(sr.c_str(), rem, l) == 0)
202  sr.erase(0, l);
203  return sr;
204 }
205 
206 // Removes any suffix occurrence of the character string rem
207 // from the string sr, and returns the modified string sr.
208 std::string& vul_string_right_trim(std::string& sr, const char* rem)
209 {
210  int l = (int)std::strlen(rem);
211  int lsr = int(sr.length());
212  if (std::strncmp(sr.c_str() + lsr - l, rem, l) == 0)
213  sr.erase(lsr - l, l);
214  return sr;
215 }
216 
217 int vul_string_atoi(std::string const& s)
218 {
219  return std::atoi(s.c_str());
220 }
221 
222 double vul_string_atof(std::string const& s)
223 {
224  return std::atof(s.c_str());
225 }
226 
227 
228 //: Reads a double from a string, with k, kb, M, etc suffix.
229 // No space is allowed between the number and the suffix.
230 // k=10^3, kb=2^10, M=10^6, Mb=2^20, G=10^9, Gb=2^30, T=10^12, Tb=2^40
231 // If parse fails, return 0.0;
232 double vul_string_atof_withsuffix(std::string const& s)
233 {
234  std::istringstream ss(s);
235  double d;
236  ss >> d;
237  if (!ss) return 0.0;
238  if (ss.eof()) return d;
239 
240  char c='A';
241  ss >> c;
242  if (ss.eof()) return d;
243 
244  double e=0;
245  switch (c)
246  {
247  case 'k': e=1; break;
248  case 'M': e=2; break;
249  case 'G': e=3; break;
250  case 'T': e=4; break;
251  default: return 0.0;
252  }
253  if (ss.eof()) return d*std::pow(10.0,3.0*e);
254 
255  c='A';
256  ss >> c;
257  if (ss.eof()) return d*std::pow(10.0,3.0*e);
258  if (!ss || c!='i') return 0.0;
259 
260  ss >> c;
261  if (!ss.eof()) return 0.0;
262 
263  return d*std::pow(2.0,10.0*e);
264 }
265 
266 static bool NotSpace(char a)
267 {
268  return !std::isspace(a);
269 }
270 
271 template <class IT>
272 static bool myequals(IT b1, IT e1,
273  const char * b2, const char * e2)
274 {
275  for (;b1 != e1 && b2 != e2; ++b1, ++b2)
276  if (std::toupper(*b1) != *b2) return false;
277  return b1 == e1
278  && b2 == e2;
279 }
280 
281 bool vul_string_to_bool(const std::string &str)
282 {
283  std::string::const_iterator begin = std::find_if(str.begin(), str.end(), NotSpace);
284  const std::string::const_reverse_iterator rend(begin);
285  std::string::const_iterator end = std::find_if(str.rbegin(), rend, NotSpace).base();
286  const char *syes = "YES";
287  const char *strue = "TRUE";
288  const char *s1 = "1";
289  const char *son = "ON";
290  return myequals(begin, end, syes, syes+3)
291  || myequals(begin, end, strue, strue+4)
292  || myequals(begin, end, s1, s1+1)
293  || myequals(begin, end, son, son+2);
294 }
295 
296 
297 //: Convert a string to a list of ints, using the matlab index format.
298 // e.g. "0,1,10:14,20:-2:10" results in 0,1,10,11,12,13,14,20,18,16,14,12,10
299 // No spaces are allowed.
300 // \return empty on error.
301 std::vector<int> vul_string_to_int_list(std::string str)
302 {
303  std::vector<int> rv;
304 
305 
306 #define REGEXP_INTEGER "\\-?[0123456789]+"
307 
308  vul_reg_exp range_regexp("(" REGEXP_INTEGER ")" // int
309  "([:-]" REGEXP_INTEGER ")?" // :int [optional]
310  "([:-]" REGEXP_INTEGER ")?" // :int [optional]
311  );
312 
313 
314  while (str.length() > 0 && range_regexp.find(str)) {
315  // the start/end positions (ref from 0) of the
316  // current ',' separated token.
317  std::ptrdiff_t start= range_regexp.start(0);
318  std::ptrdiff_t endp = range_regexp.end(0);
319  if (start != 0)
320  {
321  rv.clear();
322  return rv;
323  }
324 
325 
326  std::string match1 = range_regexp.match(1);
327  std::string match2 = range_regexp.match(2);
328  std::string match3 = range_regexp.match(3);
329 
330 
331  // Remove this match from the front of string.
332  str.erase(0, endp);
333  if (str.size() > 1 && str[0] == ',' ) str.erase(0, 1);
334 
335  bool matched2 = range_regexp.match(2).size() > 0;
336  bool matched3 = range_regexp.match(3).size() > 0;
337 
338  int s = vul_string_atoi(match1);
339  int d = 1;
340  int e = s;
341  if (matched3) {
342  // "1:2:10"
343  d = vul_string_atoi(match2.substr(1));
344  e = vul_string_atoi(match3.substr(1));
345  }
346  else if (matched2)
347  e = vul_string_atoi(match2.substr(1));
348 
349  if (d==0)
350  {
351  rv.clear();
352  return rv;
353  }
354 
355  if (e >= s)
356  {
357  if (d < 0) d = -d;
358  for (int i = s; i <= e; i += d)
359  rv.push_back(i);
360  }
361  else
362  {
363  if (d > 0) d = -d;
364  for (int i = s; i >= e; i += d)
365  rv.push_back(i);
366  }
367  }
368 
369  if (!str.empty())
370  rv.clear();
371 
372  return rv;
373 }
374 
375 
376 //Leave verbatim in to avoid $->LaTeX munging.
377 
378 //: Expand any environment variables in the string.
379 // Expands "foo$VARfoo" to "foobarfoo" when $VAR=bar. If both $VAR and $VARfoo
380 // exist, an arbitrary choice will be made of which variable to use.
381 // This problem can be avoided by using the syntax "foo${VAR}foo." "$(VAR)"
382 // and "$[VAR]" can also be used.
383 // There are no inbuilt variables like in shell scripting, and variable names
384 // cannot contain whitespace or "$"s.
385 // "$$" can be used to insert a literal "$" into the output.
386 // \returns false if a matching variable could not be found.
387 bool vul_string_expand_var(std::string &str)
388 {
389  std::string::size_type i = 0; // index to current char.
390  const std::string::size_type npos = std::string::npos;
391 
392  // If there is a problem, carry on trying to convert rest
393  bool success=true; // of string, but remember failure.
394 
395  enum {not_in_var, start_var, in_var, in_bracket_var} state = not_in_var;
396  std::string::size_type var_begin = 0;
397 
398  std::string::size_type bracket_type = npos; //index into open_brackets.
399  const std::string open_brackets("{([");
400  const std::string close_brackets("})]");
401 
402  while (i<str.size())
403  {
404  switch (state)
405  {
406  case not_in_var: // not currently in a variable
407  if (str[i] == '$')
408  {
409  state = start_var;
410  var_begin = i;
411  }
412  break;
413  case start_var: // just started a variable
414  if (str[i] == '$')
415  {
416  str.erase(i,1);
417  state=not_in_var;
418  continue;
419  }
420  else if ((bracket_type = open_brackets.find_first_of(str[i])) != npos)
421  {
422  state=in_bracket_var;
423  break;
424  }
425  else // or this is the first letter of the variable, in which case go through
426  state=in_var;
427  case in_var: // in a non-bracketed variable
428  assert(var_begin+1 < str.size());
429  assert(i > var_begin);
430  if (str[i] == '$')
431  { // no dollars allowed - assume we missed last variable and this is a new one.
432  success=false;
433  state = start_var;
434  var_begin = i;
435  break;
436  }
437  else
438  {
439  const char * value= std::getenv(str.substr(var_begin+1, i-var_begin).c_str());
440  if (value)
441  {
442  str.replace(var_begin, i+1-var_begin, value);
443  i = var_begin + std::strlen(value);
444  state=not_in_var;
445  continue;
446  }
447  }
448  break;
449  case in_bracket_var: // in a bracketed variable
450  if (str[i] == close_brackets[bracket_type])
451  {
452  assert(var_begin+2 < str.size());
453  assert(i > var_begin+1);
454  state=not_in_var;
455  if (i==var_begin+2) // empty variable name
456  {
457  success=false;
458  break;
459  }
460  else
461  {
462  const char * value= std::getenv(str.substr(var_begin+2, i-var_begin-2).c_str());
463  if (value)
464  {
465  str.replace(var_begin, i+1-var_begin, value);
466  i = var_begin + std::strlen(value);
467  continue;
468  }
469  else
470  success=false;
471  }
472  }
473  break;
474  default: // do nothing (silently ignore invalid state)
475  break;
476  }
477  ++i;
478  }
479  return success;
480 }
481 
482 //: replaces instances "find_str" in "full_str" with "replace_str" a given "num_times".
483 // \returns true iff at least one replacement took place.
484 bool vul_string_replace(std::string& full_str,
485  const std::string& find_str,
486  const std::string& replace_str,
487  int num_times)
488 {
489  bool rep=false;
490  for (int i = 0; i<num_times; i++)
491  {
492  int loc = int(full_str.find( find_str,0));
493  if (loc >= 0)
494  {
495  full_str.replace( loc, find_str.length(), replace_str );
496  rep=true;
497  }
498  else
499  {
500  return rep;
501  }
502  }
503  return rep;
504 }
505 
506 
507 //: Replace control chars with escaped representations.
508 // Space and "\n" are preserved, but tabs, CR, etc are escaped.
509 // This is not aimed and is not suitable for any particular input-validation
510 // security problem, such as sql-injection.
511 std::string vul_string_escape_ctrl_chars(const std::string &in)
512 {
513  std::string out;
514 
515  const static std::string special("\t\v\b\r\f\a\\");
516  const static std::string special_tr("tvbrfa\\");
517 
518  for (char it : in)
519  {
520  if (!std::iscntrl(it) || it=='\n')
521  out+=it;
522  else
523  {
524  std::string::size_type i=special.find(it);
525  if (i==std::string::npos)
526  out+=vul_sprintf("\\x%02x",static_cast<int>(it));
527  else
528  {
529  out+='\\';
530  out+=special_tr[i];
531  }
532  }
533  }
534  return out;
535 }
std::string & vul_string_upcase(std::string &s)
Converts all alphabetical characters to uppercase.
Definition: vul_string.cxx:147
Utility functions for C strings and std::strings.
char * vul_string_c_reverse(char *c)
Reverses the order of the characters in string.
Definition: vul_string.cxx:116
double vul_string_atof(std::string const &s)
Reads a double from a string.
Definition: vul_string.cxx:222
Pattern matching with regular expressions.
Definition: vul_reg_exp.h:82
char * vul_string_c_right_trim(char *str, const char *rem)
Removes any suffix occurrence of rem from str and returns modified string.
Definition: vul_string.cxx:100
bool find(char const *)
true if regexp in char* arg.
int vul_string_atoi(std::string const &s)
Reads an integer from a string.
Definition: vul_string.cxx:217
std::string & vul_string_trim(std::string &sr, const char *rem)
Removes any occurrences of rem from str and returns modified string.
Definition: vul_string.cxx:184
std::ptrdiff_t end() const
Returns the end index of the last item found.
Definition: vul_reg_exp.h:117
#define END_OF_STRING
Definition: vul_string.cxx:20
char * vul_string_c_downcase(char *s)
Converts all alphabetical characters to lowercase.
Definition: vul_string.cxx:36
std::string vul_string_escape_ctrl_chars(const std::string &in)
Replace control chars with escaped representations.
Definition: vul_string.cxx:511
bool vul_string_to_bool(const std::string &str)
Convert a string to a boolean.
Definition: vul_string.cxx:281
std::string & vul_string_right_trim(std::string &sr, const char *rem)
Removes any suffix occurrence of rem from str and returns modified string.
Definition: vul_string.cxx:208
std::string & vul_string_downcase(std::string &s)
Converts all alphabetical characters to lowercase.
Definition: vul_string.cxx:155
std::string & vul_string_reverse(std::string &s)
Reverses the order of the characters in string.
Definition: vul_string.cxx:132
bool vul_string_replace(std::string &full_str, const std::string &find_str, const std::string &replace_str, int num_times)
replaces instances "find_str" in "full_str" with "replace_str" a given "num_times".
Definition: vul_string.cxx:484
char * vul_string_c_trim(char *str, const char *rem)
Removes any occurrences of rem from str, and returns the modified string.
Definition: vul_string.cxx:63
double vul_string_atof_withsuffix(std::string const &s)
Reads a double from a string, with k, kb, M, etc suffix.
Definition: vul_string.cxx:232
contains class for pattern matching with regular expressions
std::vector< int > vul_string_to_int_list(std::string str)
Convert a string to a list of ints, using the matlab index format.
Definition: vul_string.cxx:301
std::ptrdiff_t start() const
Returns the start index of the last item found.
Definition: vul_reg_exp.h:115
bool vul_string_expand_var(std::string &str)
Expand any environment variables in the string.
Definition: vul_string.cxx:387
char * vul_string_c_capitalize(char *s)
Capitalizes all words in a string.
Definition: vul_string.cxx:49
C++ conforming replacement to the ANSI C functions sprintf and printf.
Definition: vul_sprintf.h:31
std::string match(int n) const
Return nth submatch as a string.
Definition: vul_reg_exp.h:136
char * vul_string_c_upcase(char *s)
Converts all alphabetical characters to uppercase.
Definition: vul_string.cxx:24
std::string & vul_string_left_trim(std::string &sr, const char *rem)
Removes any prefix occurrence of rem from str and returns modified string.
Definition: vul_string.cxx:198
std::string & vul_string_capitalize(std::string &s)
Capitalizes all words in string.
Definition: vul_string.cxx:163
#define REGEXP_INTEGER
creates a formatted ANSI C++ string
char * vul_string_c_left_trim(char *str, const char *rem)
Removes any prefix occurrence of rem from str and returns modified string.
Definition: vul_string.cxx:81