Libcroco
cr-input.c
Go to the documentation of this file.
00001 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8-*- */
00002 
00003 /*
00004  * This file is part of The Croco Library
00005  *
00006  * This program is free software; you can redistribute it and/or
00007  * modify it under the terms of version 2.1 of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation.
00009  *
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public License
00016  * along with this program; if not, write to the Free Software
00017  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00018  * USA
00019  *
00020  * Author: Dodji Seketeli
00021  * See COPYRIGHTS file for copyright information.
00022  */
00023 
00024 #include "stdio.h"
00025 #include <string.h>
00026 #include "cr-input.h"
00027 #include "cr-enc-handler.h"
00028 
00029 /**
00030  *@CRInput:
00031  *
00032  *The definition of the #CRInput class.
00033  */
00034 
00035 /*******************
00036  *Private type defs
00037  *******************/
00038 
00039 /**
00040  *The private attributes of
00041  *the #CRInputPriv class.
00042  */
00043 struct _CRInputPriv {
00044         /*
00045          *The input buffer
00046          */
00047         guchar *in_buf;
00048         gulong in_buf_size;
00049 
00050         gulong nb_bytes;
00051 
00052         /*
00053          *The index of the next byte
00054          *to be read.
00055          */
00056         gulong next_byte_index;
00057 
00058         /*
00059          *The current line number
00060          */
00061         gulong line;
00062 
00063         /*
00064          *The current col number
00065          */
00066         gulong col;
00067 
00068         gboolean end_of_line;
00069         gboolean end_of_input;
00070 
00071         /*
00072          *the reference count of this
00073          *instance.
00074          */
00075         guint ref_count;
00076         gboolean free_in_buf;
00077 };
00078 
00079 #define PRIVATE(object) (object)->priv
00080 
00081 /***************************
00082  *private constants
00083  **************************/
00084 #define CR_INPUT_MEM_CHUNK_SIZE 1024 * 4
00085 
00086 static CRInput *cr_input_new_real (void);
00087 
00088 static CRInput *
00089 cr_input_new_real (void)
00090 {
00091         CRInput *result = NULL;
00092 
00093         result = g_try_malloc (sizeof (CRInput));
00094         if (!result) {
00095                 cr_utils_trace_info ("Out of memory");
00096                 return NULL;
00097         }
00098         memset (result, 0, sizeof (CRInput));
00099 
00100         PRIVATE (result) = g_try_malloc (sizeof (CRInputPriv));
00101         if (!PRIVATE (result)) {
00102                 cr_utils_trace_info ("Out of memory");
00103                 g_free (result);
00104                 return NULL;
00105         }
00106         memset (PRIVATE (result), 0, sizeof (CRInputPriv));
00107         PRIVATE (result)->free_in_buf = TRUE;
00108         return result;
00109 }
00110 
00111 /****************
00112  *Public methods
00113  ***************/
00114 
00115 /**
00116  * cr_input_new_from_buf:
00117  *@a_buf: the memory buffer to create the input stream from.
00118  *The #CRInput keeps this pointer so user should not free it !.
00119  *@a_len: the size of the input buffer.
00120  *@a_enc: the buffer's encoding.
00121  *@a_free_buf: if set to TRUE, this a_buf will be freed
00122  *at the destruction of this instance. If set to false, it is up
00123  *to the caller to free it.
00124  *
00125  *Creates a new input stream from a memory buffer.
00126  *Returns the newly built instance of #CRInput.
00127  */
00128 CRInput *
00129 cr_input_new_from_buf (guchar * a_buf,
00130                        gulong a_len,
00131                        enum CREncoding a_enc,
00132                        gboolean a_free_buf)
00133 {
00134         CRInput *result = NULL;
00135         enum CRStatus status = CR_OK;
00136         CREncHandler *enc_handler = NULL;
00137         gulong len = a_len;
00138 
00139         g_return_val_if_fail (a_buf, NULL);
00140 
00141         result = cr_input_new_real ();
00142         g_return_val_if_fail (result, NULL);
00143 
00144         /*transform the encoding in utf8 */
00145         if (a_enc != CR_UTF_8) {
00146                 enc_handler = cr_enc_handler_get_instance (a_enc);
00147                 if (!enc_handler) {
00148                         goto error;
00149                 }
00150 
00151                 status = cr_enc_handler_convert_input
00152                         (enc_handler, a_buf, &len,
00153                          &PRIVATE (result)->in_buf,
00154                          &PRIVATE (result)->in_buf_size);
00155                 if (status != CR_OK)
00156                         goto error;
00157                 PRIVATE (result)->free_in_buf = TRUE;
00158                 if (a_free_buf == TRUE && a_buf) {
00159                         g_free (a_buf) ;
00160                         a_buf = NULL ;
00161                 }                
00162                 PRIVATE (result)->nb_bytes = PRIVATE (result)->in_buf_size;
00163         } else {
00164                 PRIVATE (result)->in_buf = (guchar *) a_buf;
00165                 PRIVATE (result)->in_buf_size = a_len;
00166                 PRIVATE (result)->nb_bytes = a_len;
00167                 PRIVATE (result)->free_in_buf = a_free_buf;
00168         }
00169         PRIVATE (result)->line = 1;
00170         PRIVATE (result)->col =  0;
00171         return result;
00172 
00173  error:
00174         if (result) {
00175                 cr_input_destroy (result);
00176                 result = NULL;
00177         }
00178 
00179         return NULL;
00180 }
00181 
00182 /**
00183  * cr_input_new_from_uri:
00184  *@a_file_uri: the file to create *the input stream from.
00185  *@a_enc: the encoding of the file *to create the input from.
00186  *
00187  *Creates a new input stream from
00188  *a file.
00189  *
00190  *Returns the newly created input stream if
00191  *this method could read the file and create it,
00192  *NULL otherwise.
00193  */
00194 
00195 CRInput *
00196 cr_input_new_from_uri (const gchar * a_file_uri, enum CREncoding a_enc)
00197 {
00198         CRInput *result = NULL;
00199         enum CRStatus status = CR_OK;
00200         FILE *file_ptr = NULL;
00201         guchar tmp_buf[CR_INPUT_MEM_CHUNK_SIZE] = { 0 };
00202         gulong nb_read = 0,
00203                 len = 0,
00204                 buf_size = 0;
00205         gboolean loop = TRUE;
00206         guchar *buf = NULL;
00207 
00208         g_return_val_if_fail (a_file_uri, NULL);
00209 
00210         file_ptr = fopen (a_file_uri, "r");
00211 
00212         if (file_ptr == NULL) {
00213 
00214 #ifdef CR_DEBUG
00215                 cr_utils_trace_debug ("could not open file");
00216 #endif
00217                 g_warning ("Could not open file %s\n", a_file_uri);
00218 
00219                 return NULL;
00220         }
00221 
00222         /*load the file */
00223         while (loop) {
00224                 nb_read = fread (tmp_buf, 1 /*read bytes */ ,
00225                                  CR_INPUT_MEM_CHUNK_SIZE /*nb of bytes */ ,
00226                                  file_ptr);
00227 
00228                 if (nb_read != CR_INPUT_MEM_CHUNK_SIZE) {
00229                         /*we read less chars than we wanted */
00230                         if (feof (file_ptr)) {
00231                                 /*we reached eof */
00232                                 loop = FALSE;
00233                         } else {
00234                                 /*a pb occured !! */
00235                                 cr_utils_trace_debug ("an io error occured");
00236                                 status = CR_ERROR;
00237                                 goto cleanup;
00238                         }
00239                 }
00240 
00241                 if (status == CR_OK) {
00242                         /*read went well */
00243                         buf = g_realloc (buf, len + CR_INPUT_MEM_CHUNK_SIZE);
00244                         memcpy (buf + len, tmp_buf, nb_read);
00245                         len += nb_read;
00246                         buf_size += CR_INPUT_MEM_CHUNK_SIZE;
00247                 }
00248         }
00249 
00250         if (status == CR_OK) {
00251                 result = cr_input_new_from_buf (buf, len, a_enc, TRUE);
00252                 if (!result) {
00253                         goto cleanup;
00254                 }
00255                 /*
00256                  *we should  free buf here because it's own by CRInput.
00257                  *(see the last parameter of cr_input_new_from_buf().
00258                  */
00259                 buf = NULL;
00260         }
00261 
00262  cleanup:
00263         if (file_ptr) {
00264                 fclose (file_ptr);
00265                 file_ptr = NULL;
00266         }
00267 
00268         if (buf) {
00269                 g_free (buf);
00270                 buf = NULL;
00271         }
00272 
00273         return result;
00274 }
00275 
00276 /**
00277  * cr_input_destroy:
00278  *@a_this: the current instance of #CRInput.
00279  *
00280  *The destructor of the #CRInput class.
00281  */
00282 void
00283 cr_input_destroy (CRInput * a_this)
00284 {
00285         if (a_this == NULL)
00286                 return;
00287 
00288         if (PRIVATE (a_this)) {
00289                 if (PRIVATE (a_this)->in_buf && PRIVATE (a_this)->free_in_buf) {
00290                         g_free (PRIVATE (a_this)->in_buf);
00291                         PRIVATE (a_this)->in_buf = NULL;
00292                 }
00293 
00294                 g_free (PRIVATE (a_this));
00295                 PRIVATE (a_this) = NULL;
00296         }
00297 
00298         g_free (a_this);
00299 }
00300 
00301 /**
00302  * cr_input_ref:
00303  *@a_this: the current instance of #CRInput.
00304  *
00305  *Increments the reference count of the current
00306  *instance of #CRInput.
00307  */
00308 void
00309 cr_input_ref (CRInput * a_this)
00310 {
00311         g_return_if_fail (a_this && PRIVATE (a_this));
00312 
00313         PRIVATE (a_this)->ref_count++;
00314 }
00315 
00316 /**
00317  * cr_input_unref:
00318  *@a_this: the current instance of #CRInput.
00319  *
00320  *Decrements the reference count of this instance
00321  *of #CRInput. If the reference count goes down to
00322  *zero, this instance is destroyed.
00323  *
00324  * Returns TRUE if the instance of #CRInput got destroyed, false otherwise.
00325  */
00326 gboolean
00327 cr_input_unref (CRInput * a_this)
00328 {
00329         g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE);
00330 
00331         if (PRIVATE (a_this)->ref_count) {
00332                 PRIVATE (a_this)->ref_count--;
00333         }
00334 
00335         if (PRIVATE (a_this)->ref_count == 0) {
00336                 cr_input_destroy (a_this);
00337                 return TRUE;
00338         }
00339         return FALSE;
00340 }
00341 
00342 /**
00343  * cr_input_end_of_input:
00344  *@a_this: the current instance of #CRInput.
00345  *@a_end_of_input: out parameter. Is set to TRUE if
00346  *the current instance has reached the end of its input buffer,
00347  *FALSE otherwise.
00348  *
00349  *Tests wether the current instance of
00350  *#CRInput has reached its input buffer.
00351  *
00352  * Returns CR_OK upon successful completion, an error code otherwise.
00353  * Note that all the out parameters of this method are valid if
00354  * and only if this method returns CR_OK.
00355  */
00356 enum CRStatus
00357 cr_input_end_of_input (CRInput const * a_this, gboolean * a_end_of_input)
00358 {
00359         g_return_val_if_fail (a_this && PRIVATE (a_this)
00360                               && a_end_of_input, CR_BAD_PARAM_ERROR);
00361 
00362         *a_end_of_input = (PRIVATE (a_this)->next_byte_index
00363                            >= PRIVATE (a_this)->in_buf_size) ? TRUE : FALSE;
00364 
00365         return CR_OK;
00366 }
00367 
00368 /**
00369  * cr_input_get_nb_bytes_left:
00370  *@a_this: the current instance of #CRInput.
00371  *
00372  *Returns the number of bytes left in the input stream
00373  *before the end, -1 in case of error.
00374  */
00375 glong
00376 cr_input_get_nb_bytes_left (CRInput const * a_this)
00377 {
00378         g_return_val_if_fail (a_this && PRIVATE (a_this), -1);
00379         g_return_val_if_fail (PRIVATE (a_this)->nb_bytes
00380                               <= PRIVATE (a_this)->in_buf_size, -1);
00381         g_return_val_if_fail (PRIVATE (a_this)->next_byte_index
00382                               <= PRIVATE (a_this)->nb_bytes, -1);
00383 
00384         if (PRIVATE (a_this)->end_of_input)
00385                 return 0;
00386 
00387         return PRIVATE (a_this)->nb_bytes - PRIVATE (a_this)->next_byte_index;
00388 }
00389 
00390 /**
00391  * cr_input_read_byte:
00392  *@a_this: the current instance of #CRInput.
00393  *@a_byte: out parameter the returned byte.
00394  *
00395  *Gets the next byte of the input.
00396  *Updates the state of the input so that
00397  *the next invocation of this method  returns
00398  *the next coming byte.
00399  *
00400  *Returns CR_OK upon successful completion, an error code
00401  *otherwise. All the out parameters of this method are valid if
00402  *and only if this method returns CR_OK.
00403  */
00404 enum CRStatus
00405 cr_input_read_byte (CRInput * a_this, guchar * a_byte)
00406 {
00407         gulong nb_bytes_left = 0;
00408 
00409         g_return_val_if_fail (a_this && PRIVATE (a_this)
00410                               && a_byte, CR_BAD_PARAM_ERROR);
00411 
00412         g_return_val_if_fail (PRIVATE (a_this)->next_byte_index <=
00413                               PRIVATE (a_this)->nb_bytes, CR_BAD_PARAM_ERROR);
00414 
00415         if (PRIVATE (a_this)->end_of_input == TRUE)
00416                 return CR_END_OF_INPUT_ERROR;
00417 
00418         nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
00419 
00420         if (nb_bytes_left < 1) {
00421                 return CR_END_OF_INPUT_ERROR;
00422         }
00423 
00424         *a_byte = PRIVATE (a_this)->in_buf[PRIVATE (a_this)->next_byte_index];
00425 
00426         if (PRIVATE (a_this)->nb_bytes -
00427             PRIVATE (a_this)->next_byte_index < 2) {
00428                 PRIVATE (a_this)->end_of_input = TRUE;
00429         } else {
00430                 PRIVATE (a_this)->next_byte_index++;
00431         }
00432 
00433         return CR_OK;
00434 }
00435 
00436 /**
00437  * cr_input_read_char:
00438  *@a_this: the current instance of CRInput.
00439  *@a_char: out parameter. The read character.
00440  *
00441  *Reads an unicode character from the current instance of
00442  *#CRInput.
00443  *
00444  *Returns CR_OK upon successful completion, an error code
00445  *otherwise.
00446  */
00447 enum CRStatus
00448 cr_input_read_char (CRInput * a_this, guint32 * a_char)
00449 {
00450         enum CRStatus status = CR_OK;
00451         gulong consumed = 0,
00452                 nb_bytes_left = 0;
00453 
00454         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char,
00455                               CR_BAD_PARAM_ERROR);
00456 
00457         if (PRIVATE (a_this)->end_of_input == TRUE)
00458                 return CR_END_OF_INPUT_ERROR;
00459 
00460         nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
00461 
00462         if (nb_bytes_left < 1) {
00463                 return CR_END_OF_INPUT_ERROR;
00464         }
00465 
00466         status = cr_utils_read_char_from_utf8_buf
00467                 (PRIVATE (a_this)->in_buf
00468                  +
00469                  PRIVATE (a_this)->next_byte_index,
00470                  nb_bytes_left, a_char, &consumed);
00471 
00472         if (status == CR_OK) {
00473                 /*update next byte index */
00474                 PRIVATE (a_this)->next_byte_index += consumed;
00475 
00476                 /*update line and column number */
00477                 if (PRIVATE (a_this)->end_of_line == TRUE) {
00478                         PRIVATE (a_this)->col = 1;
00479                         PRIVATE (a_this)->line++;
00480                         PRIVATE (a_this)->end_of_line = FALSE;
00481                 } else if (*a_char != '\n') {
00482                         PRIVATE (a_this)->col++;
00483                 }
00484 
00485                 if (*a_char == '\n') {
00486                         PRIVATE (a_this)->end_of_line = TRUE;
00487                 }
00488         }
00489 
00490         return status;
00491 }
00492 
00493 /**
00494  * cr_input_set_line_num:
00495  *@a_this: the "this pointer" of the current instance of #CRInput.
00496  *@a_line_num: the new line number.
00497  *
00498  *Setter of the current line number.
00499  *
00500  *Return CR_OK upon successful completion, an error code otherwise.
00501  */
00502 enum CRStatus
00503 cr_input_set_line_num (CRInput * a_this, glong a_line_num)
00504 {
00505         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00506 
00507         PRIVATE (a_this)->line = a_line_num;
00508 
00509         return CR_OK;
00510 }
00511 
00512 /**
00513  * cr_input_get_line_num:
00514  *@a_this: the "this pointer" of the current instance of #CRInput.
00515  *@a_line_num: the returned line number.
00516  *
00517  *Getter of the current line number.
00518  *
00519  *Returns CR_OK upon successful completion, an error code otherwise.
00520  */
00521 enum CRStatus
00522 cr_input_get_line_num (CRInput const * a_this, glong * a_line_num)
00523 {
00524         g_return_val_if_fail (a_this && PRIVATE (a_this)
00525                               && a_line_num, CR_BAD_PARAM_ERROR);
00526 
00527         *a_line_num = PRIVATE (a_this)->line;
00528 
00529         return CR_OK;
00530 }
00531 
00532 /**
00533  * cr_input_set_column_num:
00534  *@a_this: the "this pointer" of the current instance of #CRInput.
00535  *@a_col: the new column number.
00536  *
00537  *Setter of the current column number.
00538  *
00539  *Returns CR_OK upon successful completion, an error code otherwise.
00540  */
00541 enum CRStatus
00542 cr_input_set_column_num (CRInput * a_this, glong a_col)
00543 {
00544         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00545 
00546         PRIVATE (a_this)->col = a_col;
00547 
00548         return CR_OK;
00549 }
00550 
00551 /**
00552  * cr_input_get_column_num:
00553  *@a_this: the "this pointer" of the current instance of #CRInput.
00554  *@a_col: out parameter
00555  *
00556  *Getter of the current column number.
00557  *
00558  *Returns CR_OK upon successful completion, an error code otherwise.
00559  */
00560 enum CRStatus
00561 cr_input_get_column_num (CRInput const * a_this, glong * a_col)
00562 {
00563         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_col,
00564                               CR_BAD_PARAM_ERROR);
00565 
00566         *a_col = PRIVATE (a_this)->col;
00567 
00568         return CR_OK;
00569 }
00570 
00571 /**
00572  * cr_input_increment_line_num:
00573  *@a_this: the "this pointer" of the current instance of #CRInput.
00574  *@a_increment: the increment to add to the line number.
00575  *
00576  *Increments the current line number.
00577  *
00578  *Returns CR_OK upon successful completion, an error code otherwise.
00579  */
00580 enum CRStatus
00581 cr_input_increment_line_num (CRInput * a_this, glong a_increment)
00582 {
00583         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00584 
00585         PRIVATE (a_this)->line += a_increment;
00586 
00587         return CR_OK;
00588 }
00589 
00590 /**
00591  * cr_input_increment_col_num:
00592  *@a_this: the "this pointer" of the current instance of #CRInput.
00593  *@a_increment: the increment to add to the column number.
00594  *
00595  *Increments the current column number.
00596  *
00597  *Returns CR_OK upon successful completion, an error code otherwise.
00598  */
00599 enum CRStatus
00600 cr_input_increment_col_num (CRInput * a_this, glong a_increment)
00601 {
00602         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00603 
00604         PRIVATE (a_this)->col += a_increment;
00605 
00606         return CR_OK;
00607 }
00608 
00609 /**
00610  * cr_input_consume_char:
00611  *@a_this: the this pointer.
00612  *@a_char: the character to consume. If set to zero,
00613  *consumes any character.
00614  *
00615  *Consumes the next character of the input stream if
00616  *and only if that character equals a_char.
00617  *
00618  *Returns CR_OK upon successful completion, CR_PARSING_ERROR if
00619  *next char is different from a_char, an other error code otherwise
00620  */
00621 enum CRStatus
00622 cr_input_consume_char (CRInput * a_this, guint32 a_char)
00623 {
00624         guint32 c;
00625         enum CRStatus status;
00626 
00627         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00628 
00629         if ((status = cr_input_peek_char (a_this, &c)) != CR_OK) {
00630                 return status;
00631         }
00632 
00633         if (c == a_char || a_char == 0) {
00634                 status = cr_input_read_char (a_this, &c);
00635         } else {
00636                 return CR_PARSING_ERROR;
00637         }
00638 
00639         return status;
00640 }
00641 
00642 /**
00643  * cr_input_consume_chars:
00644  *@a_this: the this pointer of the current instance of #CRInput.
00645  *@a_char: the character to consume.
00646  *@a_nb_char: in/out parameter. The number of characters to consume.
00647  *If set to a negative value, the function will consume all the occurences
00648  *of a_char found.
00649  *After return, if the return value equals CR_OK, this variable contains 
00650  *the number of characters actually consumed.
00651  *
00652  *Consumes up to a_nb_char occurences of the next contiguous characters 
00653  *which equal a_char. Note that the next character of the input stream
00654  **MUST* equal a_char to trigger the consumption, or else, the error
00655  *code CR_PARSING_ERROR is returned.
00656  *If the number of contiguous characters that equals a_char is less than
00657  *a_nb_char, then this function consumes all the characters it can consume.
00658  * 
00659  *Returns CR_OK if at least one character has been consumed, an error code
00660  *otherwise.
00661  */
00662 enum CRStatus
00663 cr_input_consume_chars (CRInput * a_this, guint32 a_char, gulong * a_nb_char)
00664 {
00665         enum CRStatus status = CR_OK;
00666         gulong nb_consumed = 0;
00667 
00668         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_char,
00669                               CR_BAD_PARAM_ERROR);
00670 
00671         g_return_val_if_fail (a_char != 0 || a_nb_char != NULL,
00672                               CR_BAD_PARAM_ERROR);
00673 
00674         for (nb_consumed = 0; ((status == CR_OK)
00675                                && (*a_nb_char > 0
00676                                    && nb_consumed < *a_nb_char));
00677              nb_consumed++) {
00678                 status = cr_input_consume_char (a_this, a_char);
00679         }
00680 
00681         *a_nb_char = nb_consumed;
00682 
00683         if ((nb_consumed > 0)
00684             && ((status == CR_PARSING_ERROR)
00685                 || (status == CR_END_OF_INPUT_ERROR))) {
00686                 status = CR_OK;
00687         }
00688 
00689         return status;
00690 }
00691 
00692 /**
00693  * cr_input_consume_white_spaces:
00694  *@a_this: the "this pointer" of the current instance of #CRInput.
00695  *@a_nb_chars: in/out parameter. The number of white spaces to
00696  *consume. After return, holds the number of white spaces actually consumed.
00697  *
00698  *Same as cr_input_consume_chars() but this one consumes white
00699  *spaces.
00700  *
00701  *Returns CR_OK upon successful completion, an error code otherwise.
00702  */
00703 enum CRStatus
00704 cr_input_consume_white_spaces (CRInput * a_this, gulong * a_nb_chars)
00705 {
00706         enum CRStatus status = CR_OK;
00707         guint32 cur_char = 0,
00708                 nb_consumed = 0;
00709 
00710         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_chars,
00711                               CR_BAD_PARAM_ERROR);
00712 
00713         for (nb_consumed = 0;
00714              ((*a_nb_chars > 0) && (nb_consumed < *a_nb_chars));
00715              nb_consumed++) {
00716                 status = cr_input_peek_char (a_this, &cur_char);
00717                 if (status != CR_OK)
00718                         break;
00719 
00720                 /*if the next char is a white space, consume it ! */
00721                 if (cr_utils_is_white_space (cur_char) == TRUE) {
00722                         status = cr_input_read_char (a_this, &cur_char);
00723                         if (status != CR_OK)
00724                                 break;
00725                         continue;
00726                 }
00727 
00728                 break;
00729 
00730         }
00731 
00732         *a_nb_chars = (gulong) nb_consumed;
00733 
00734         if (nb_consumed && status == CR_END_OF_INPUT_ERROR) {
00735                 status = CR_OK;
00736         }
00737 
00738         return status;
00739 }
00740 
00741 /**
00742  * cr_input_peek_char:
00743  *@a_this: the current instance of #CRInput.
00744  *@a_char: out parameter. The returned character.
00745  *
00746  *Same as cr_input_read_char() but does not update the
00747  *internal state of the input stream. The next call
00748  *to cr_input_peek_char() or cr_input_read_char() will thus
00749  *return the same character as the current one.
00750  *
00751  *Returns CR_OK upon successful completion, an error code
00752  *otherwise.
00753  */
00754 enum CRStatus
00755 cr_input_peek_char (CRInput const * a_this, guint32 * a_char)
00756 {
00757         enum CRStatus status = CR_OK;
00758         gulong consumed = 0,
00759                 nb_bytes_left = 0;
00760 
00761         g_return_val_if_fail (a_this && PRIVATE (a_this)
00762                               && a_char, CR_BAD_PARAM_ERROR);
00763 
00764         if (PRIVATE (a_this)->next_byte_index >=
00765             PRIVATE (a_this)->in_buf_size) {
00766                 return CR_END_OF_INPUT_ERROR;
00767         }
00768 
00769         nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
00770 
00771         if (nb_bytes_left < 1) {
00772                 return CR_END_OF_INPUT_ERROR;
00773         }
00774 
00775         status = cr_utils_read_char_from_utf8_buf
00776                 (PRIVATE (a_this)->in_buf +
00777                  PRIVATE (a_this)->next_byte_index,
00778                  nb_bytes_left, a_char, &consumed);
00779 
00780         return status;
00781 }
00782 
00783 /**
00784  * cr_input_peek_byte:
00785  *@a_this: the current instance of #CRInput.
00786  *@a_origin: the origin to consider in the calculation
00787  *of the position of the byte to peek.
00788  *@a_offset: the offset of the byte to peek, starting from
00789  *the origin specified by a_origin.
00790  *@a_byte: out parameter the peeked byte.
00791  *
00792  *Gets a byte from the input stream,
00793  *starting from the current position in the input stream.
00794  *Unlike cr_input_peek_next_byte() this method
00795  *does not update the state of the current input stream.
00796  *Subsequent calls to cr_input_peek_byte with the same arguments
00797  *will return the same byte.
00798  *
00799  *Returns CR_OK upon successful completion or,
00800  *CR_BAD_PARAM_ERROR if at least one of the parameters is invalid;
00801  *CR_OUT_OF_BOUNDS_ERROR if the indexed byte is out of bounds.
00802  */
00803 enum CRStatus
00804 cr_input_peek_byte (CRInput const * a_this, enum CRSeekPos a_origin,
00805                     gulong a_offset, guchar * a_byte)
00806 {
00807         gulong abs_offset = 0;
00808 
00809         g_return_val_if_fail (a_this && PRIVATE (a_this)
00810                               && a_byte, CR_BAD_PARAM_ERROR);
00811 
00812         switch (a_origin) {
00813 
00814         case CR_SEEK_CUR:
00815                 abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_offset;
00816                 break;
00817 
00818         case CR_SEEK_BEGIN:
00819                 abs_offset = a_offset;
00820                 break;
00821 
00822         case CR_SEEK_END:
00823                 abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_offset;
00824                 break;
00825 
00826         default:
00827                 return CR_BAD_PARAM_ERROR;
00828         }
00829 
00830         if (abs_offset < PRIVATE (a_this)->in_buf_size) {
00831 
00832                 *a_byte = PRIVATE (a_this)->in_buf[abs_offset];
00833 
00834                 return CR_OK;
00835 
00836         } else {
00837                 return CR_END_OF_INPUT_ERROR;
00838         }
00839 }
00840 
00841 /**
00842  * cr_input_peek_byte2:
00843  *@a_this: the current byte input stream.
00844  *@a_offset: the offset of the byte to peek, starting
00845  *from the current input position pointer.
00846  *@a_eof: out parameter. Is set to true is we reach end of
00847  *stream. If set to NULL by the caller, this parameter is not taken
00848  *in account.
00849  *
00850  *Same as cr_input_peek_byte() but with a simplified
00851  *interface.
00852  *
00853  *Returns the read byte or 0 if something bad happened.
00854  */
00855 guchar
00856 cr_input_peek_byte2 (CRInput const * a_this, gulong a_offset, gboolean * a_eof)
00857 {
00858         guchar result = 0;
00859         enum CRStatus status = CR_ERROR;
00860 
00861         g_return_val_if_fail (a_this && PRIVATE (a_this), 0);
00862 
00863         if (a_eof)
00864                 *a_eof = FALSE;
00865 
00866         status = cr_input_peek_byte (a_this, CR_SEEK_CUR, a_offset, &result);
00867 
00868         if ((status == CR_END_OF_INPUT_ERROR)
00869             && a_eof)
00870                 *a_eof = TRUE;
00871 
00872         return result;
00873 }
00874 
00875 /**
00876  * cr_input_get_byte_addr:
00877  *@a_this: the current instance of #CRInput.
00878  *@a_offset: the offset of the byte in the input stream starting
00879  *from the beginning of the stream.
00880  *
00881  *Gets the memory address of the byte located at a given offset
00882  *in the input stream.
00883  *
00884  *Returns the address, otherwise NULL if an error occured.
00885  */
00886 guchar *
00887 cr_input_get_byte_addr (CRInput * a_this, gulong a_offset)
00888 {
00889         g_return_val_if_fail (a_this && PRIVATE (a_this), NULL);
00890 
00891         if (a_offset >= PRIVATE (a_this)->nb_bytes) {
00892                 return NULL;
00893         }
00894 
00895         return &PRIVATE (a_this)->in_buf[a_offset];
00896 }
00897 
00898 /**
00899  * cr_input_get_cur_byte_addr:
00900  *@a_this: the current input stream
00901  *@a_offset: out parameter. The returned address.
00902  *
00903  *Gets the address of the current character pointer.
00904  *
00905  *Returns CR_OK upon successful completion, an error code otherwise.
00906  */
00907 enum CRStatus
00908 cr_input_get_cur_byte_addr (CRInput * a_this, guchar ** a_offset)
00909 {
00910         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_offset,
00911                               CR_BAD_PARAM_ERROR);
00912 
00913         if (!PRIVATE (a_this)->next_byte_index) {
00914                 return CR_START_OF_INPUT_ERROR;
00915         }
00916 
00917         *a_offset = cr_input_get_byte_addr
00918                 (a_this, PRIVATE (a_this)->next_byte_index - 1);
00919 
00920         return CR_OK;
00921 }
00922 
00923 /**
00924  * cr_input_seek_index:
00925  *@a_this: the current instance of #CRInput.
00926  *@a_origin: the origin to consider during the calculation
00927  *of the absolute position of the new "current byte index".
00928  *@a_pos: the relative offset of the new "current byte index."
00929  *This offset is relative to the origin a_origin.
00930  *
00931  *Sets the "current byte index" of the current instance
00932  *of #CRInput. Next call to cr_input_get_byte() will return
00933  *the byte next after the new "current byte index".
00934  *
00935  *Returns CR_OK upon successful completion otherwise returns
00936  *CR_BAD_PARAM_ERROR if at least one of the parameters is not valid
00937  *or CR_OUT_BOUNDS_ERROR in case of error.
00938  */
00939 enum CRStatus
00940 cr_input_seek_index (CRInput * a_this, enum CRSeekPos a_origin, gint a_pos)
00941 {
00942 
00943         glong abs_offset = 0;
00944 
00945         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
00946 
00947         switch (a_origin) {
00948 
00949         case CR_SEEK_CUR:
00950                 abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_pos;
00951                 break;
00952 
00953         case CR_SEEK_BEGIN:
00954                 abs_offset = a_pos;
00955                 break;
00956 
00957         case CR_SEEK_END:
00958                 abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_pos;
00959                 break;
00960 
00961         default:
00962                 return CR_BAD_PARAM_ERROR;
00963         }
00964 
00965         if ((abs_offset > 0)
00966             && (gulong) abs_offset < PRIVATE (a_this)->nb_bytes) {
00967 
00968                 /*update the input stream's internal state */
00969                 PRIVATE (a_this)->next_byte_index = abs_offset + 1;
00970 
00971                 return CR_OK;
00972         }
00973 
00974         return CR_OUT_OF_BOUNDS_ERROR;
00975 }
00976 
00977 /**
00978  * cr_input_get_cur_pos:
00979  *@a_this: the current instance of #CRInput.
00980  *@a_pos: out parameter. The returned position.
00981  *
00982  *Gets the position of the "current byte index" which
00983  *is basically the position of the last returned byte in the
00984  *input stream.
00985  *
00986  *Returns CR_OK upon successful completion. Otherwise,
00987  *CR_BAD_PARAMETER_ERROR if at least one of the arguments is invalid.
00988  *CR_START_OF_INPUT if no call to either cr_input_read_byte()
00989  *or cr_input_seek_index() have been issued before calling 
00990  *cr_input_get_cur_pos()
00991  *Note that the out parameters of this function are valid if and only if this
00992  *function returns CR_OK.
00993  */
00994 enum CRStatus
00995 cr_input_get_cur_pos (CRInput const * a_this, CRInputPos * a_pos)
00996 {
00997         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos,
00998                               CR_BAD_PARAM_ERROR);
00999 
01000         a_pos->next_byte_index = PRIVATE (a_this)->next_byte_index;
01001         a_pos->line = PRIVATE (a_this)->line;
01002         a_pos->col = PRIVATE (a_this)->col;
01003         a_pos->end_of_line = PRIVATE (a_this)->end_of_line;
01004         a_pos->end_of_file = PRIVATE (a_this)->end_of_input;
01005 
01006         return CR_OK;
01007 }
01008 
01009 /**
01010  * cr_input_get_parsing_location:
01011  *@a_this: the current instance of #CRInput
01012  *@a_loc: the set parsing location.
01013  *
01014  *Gets the current parsing location.
01015  *The Parsing location is a public datastructure that
01016  *represents the current line/column/byte offset/ in the input
01017  *stream.
01018  *
01019  *Returns CR_OK upon successful completion, an error
01020  *code otherwise.
01021  */
01022 enum CRStatus
01023 cr_input_get_parsing_location (CRInput const *a_this,
01024                                CRParsingLocation *a_loc)
01025 {
01026         g_return_val_if_fail (a_this 
01027                               && PRIVATE (a_this)
01028                               && a_loc, 
01029                               CR_BAD_PARAM_ERROR) ;
01030 
01031         a_loc->line = PRIVATE (a_this)->line ;
01032         a_loc->column = PRIVATE (a_this)->col ;
01033         if (PRIVATE (a_this)->next_byte_index) {
01034                 a_loc->byte_offset = PRIVATE (a_this)->next_byte_index - 1 ;
01035         } else {
01036                 a_loc->byte_offset = PRIVATE (a_this)->next_byte_index  ;
01037         }
01038         return CR_OK ;
01039 }
01040 
01041 /**
01042  * cr_input_get_cur_index:
01043  *@a_this: the "this pointer" of the current instance of
01044  *#CRInput
01045  *@a_index: out parameter. The returned index.
01046  *
01047  *Getter of the next byte index. 
01048  *It actually returns the index of the
01049  *next byte to be read.
01050  *
01051  *Returns CR_OK upon successful completion, an error code
01052  *otherwise.
01053  */
01054 enum CRStatus
01055 cr_input_get_cur_index (CRInput const * a_this, glong * a_index)
01056 {
01057         g_return_val_if_fail (a_this && PRIVATE (a_this)
01058                               && a_index, CR_BAD_PARAM_ERROR);
01059 
01060         *a_index = PRIVATE (a_this)->next_byte_index;
01061 
01062         return CR_OK;
01063 }
01064 
01065 /**
01066  * cr_input_set_cur_index:
01067  *@a_this: the "this pointer" of the current instance
01068  *of #CRInput .
01069  *@a_index: the new index to set.
01070  *
01071  *Setter of the next byte index.
01072  *It sets the index of the next byte to be read.
01073  *
01074  *Returns CR_OK upon successful completion, an error code otherwise.
01075  */
01076 enum CRStatus
01077 cr_input_set_cur_index (CRInput * a_this, glong a_index)
01078 {
01079         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
01080 
01081         PRIVATE (a_this)->next_byte_index = a_index;
01082 
01083         return CR_OK;
01084 }
01085 
01086 /**
01087  * cr_input_set_end_of_file:
01088  *@a_this: the current instance of #CRInput.
01089  *@a_eof: the new end of file flag.
01090  *
01091  *Sets the end of file flag.
01092  *
01093  *Returns CR_OK upon successful completion, an error code otherwise.
01094  */
01095 enum CRStatus
01096 cr_input_set_end_of_file (CRInput * a_this, gboolean a_eof)
01097 {
01098         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
01099 
01100         PRIVATE (a_this)->end_of_input = a_eof;
01101 
01102         return CR_OK;
01103 }
01104 
01105 /**
01106  * cr_input_get_end_of_file:
01107  *@a_this: the current instance of #CRInput.
01108  *@a_eof: out parameter the place to put the end of
01109  *file flag.
01110  *
01111  *Gets the end of file flag.
01112  *
01113  *Returns CR_OK upon successful completion, an error code otherwise.
01114  */
01115 enum CRStatus
01116 cr_input_get_end_of_file (CRInput const * a_this, gboolean * a_eof)
01117 {
01118         g_return_val_if_fail (a_this && PRIVATE (a_this)
01119                               && a_eof, CR_BAD_PARAM_ERROR);
01120 
01121         *a_eof = PRIVATE (a_this)->end_of_input;
01122 
01123         return CR_OK;
01124 }
01125 
01126 /**
01127  * cr_input_set_end_of_line:
01128  *@a_this: the current instance of #CRInput.
01129  *@a_eol: the new end of line flag.
01130  *
01131  *Sets the end of line flag.
01132  *
01133  *Returns CR_OK upon successful completion, an error code
01134  *otherwise.
01135  */
01136 enum CRStatus
01137 cr_input_set_end_of_line (CRInput * a_this, gboolean a_eol)
01138 {
01139         g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
01140 
01141         PRIVATE (a_this)->end_of_line = a_eol;
01142 
01143         return CR_OK;
01144 }
01145 
01146 /**
01147  * cr_input_get_end_of_line:
01148  *@a_this: the current instance of #CRInput
01149  *@a_eol: out parameter. The place to put
01150  *the returned flag
01151  *
01152  *Gets the end of line flag of the current input.
01153  *
01154  *Returns CR_OK upon successful completion, an error code
01155  *otherwise.
01156  */
01157 enum CRStatus
01158 cr_input_get_end_of_line (CRInput const * a_this, gboolean * a_eol)
01159 {
01160         g_return_val_if_fail (a_this && PRIVATE (a_this)
01161                               && a_eol, CR_BAD_PARAM_ERROR);
01162 
01163         *a_eol = PRIVATE (a_this)->end_of_line;
01164 
01165         return CR_OK;
01166 }
01167 
01168 /**
01169  * cr_input_set_cur_pos:
01170  *@a_this: the "this pointer" of the current instance of
01171  *#CRInput.
01172  *@a_pos: the new position.
01173  *
01174  *Sets the current position in the input stream.
01175  *
01176  * Returns CR_OK upon successful completion, an error code otherwise.
01177  */
01178 enum CRStatus
01179 cr_input_set_cur_pos (CRInput * a_this, CRInputPos const * a_pos)
01180 {
01181         g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos,
01182                               CR_BAD_PARAM_ERROR);
01183 
01184         cr_input_set_column_num (a_this, a_pos->col);
01185         cr_input_set_line_num (a_this, a_pos->line);
01186         cr_input_set_cur_index (a_this, a_pos->next_byte_index);
01187         cr_input_set_end_of_line (a_this, a_pos->end_of_line);
01188         cr_input_set_end_of_file (a_this, a_pos->end_of_file);
01189 
01190         return CR_OK;
01191 }