intl/icu/source/tools/toolutil/uparse.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *
     4 *   Copyright (C) 2000-2010, International Business Machines
     5 *   Corporation and others.  All Rights Reserved.
     6 *
     7 *******************************************************************************
     8 *   file name:  uparse.h
     9 *   encoding:   US-ASCII
    10 *   tab size:   8 (not used)
    11 *   indentation:4
    12 *
    13 *   created on: 2000apr18
    14 *   created by: Markus W. Scherer
    15 *
    16 *   This file provides a parser for files that are delimited by one single
    17 *   character like ';' or TAB. Example: the Unicode Character Properties files
    18 *   like UnicodeData.txt are semicolon-delimited.
    19 */
    21 #ifndef __UPARSE_H__
    22 #define __UPARSE_H__
    24 #include "unicode/utypes.h"
    26 /**
    27  * Is c an invariant-character whitespace?
    28  * @param c invariant character
    29  */
    30 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
    32 U_CDECL_BEGIN
    34 /**
    35  * Skip space ' ' and TAB '\t' characters.
    36  *
    37  * @param s Pointer to characters.
    38  * @return Pointer to first character at or after s that is not a space or TAB.
    39  */
    40 U_CAPI const char * U_EXPORT2
    41 u_skipWhitespace(const char *s);
    43 /**
    44  * Trim whitespace (including line endings) from the end of the string.
    45  *
    46  * @param s Pointer to the string.
    47  * @return Pointer to the new end of the string.
    48  */
    49 U_CAPI char * U_EXPORT2
    50 u_rtrim(char *s);
    52 /** Function type for u_parseDelimitedFile(). */
    53 typedef void U_CALLCONV
    54 UParseLineFn(void *context,
    55               char *fields[][2],
    56               int32_t fieldCount,
    57               UErrorCode *pErrorCode);
    59 /**
    60  * Parser for files that are similar to UnicodeData.txt:
    61  * This function opens the file and reads it line by line. It skips empty lines
    62  * and comment lines that start with a '#'.
    63  * All other lines are separated into fields with one delimiter character
    64  * (semicolon for Unicode Properties files) between two fields. The last field in
    65  * a line does not need to be terminated with a delimiter.
    66  *
    67  * For each line, after segmenting it, a line function is called.
    68  * It gets passed the array of field start and limit pointers that is
    69  * passed into this parser and filled by it for each line.
    70  * For each field i of the line, the start pointer in fields[i][0]
    71  * points to the beginning of the field, while the limit pointer in fields[i][1]
    72  * points behind the field, i.e., to the delimiter or the line end.
    73  *
    74  * The context parameter of the line function is
    75  * the same as the one for the parse function.
    76  *
    77  * The line function may modify the contents of the fields including the
    78  * limit characters.
    79  *
    80  * If the file cannot be opened, or there is a parsing error or a field function
    81  * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
    82  */
    83 U_CAPI void U_EXPORT2
    84 u_parseDelimitedFile(const char *filename, char delimiter,
    85                      char *fields[][2], int32_t fieldCount,
    86                      UParseLineFn *lineFn, void *context,
    87                      UErrorCode *pErrorCode);
    89 /**
    90  * Parse a string of code points like 0061 0308 0300.
    91  * s must end with either ';' or NUL.
    92  *
    93  * @return Number of code points.
    94  */
    95 U_CAPI int32_t U_EXPORT2
    96 u_parseCodePoints(const char *s,
    97                   uint32_t *dest, int32_t destCapacity,
    98                   UErrorCode *pErrorCode);
   100 /**
   101  * Parse a list of code points like 0061 0308 0300
   102  * into a UChar * string.
   103  * s must end with either ';' or NUL.
   104  *
   105  * Set the first code point in *pFirst.
   106  *
   107  * @param s Input char * string.
   108  * @param dest Output string buffer.
   109  * @param destCapacity Capacity of dest in numbers of UChars.
   110  * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
   111  *               code point in the string.
   112  * @param pErrorCode ICU error code.
   113  * @return The length of the string in numbers of UChars.
   114  */
   115 U_CAPI int32_t U_EXPORT2
   116 u_parseString(const char *s,
   117               UChar *dest, int32_t destCapacity,
   118               uint32_t *pFirst,
   119               UErrorCode *pErrorCode);
   121 /**
   122  * Parse a code point range like
   123  * 0085 or
   124  * 4E00..9FA5.
   125  *
   126  * s must contain such a range and end with either ';' or NUL.
   127  *
   128  * @return Length of code point range, end-start+1
   129  */
   130 U_CAPI int32_t U_EXPORT2
   131 u_parseCodePointRange(const char *s,
   132                       uint32_t *pStart, uint32_t *pEnd,
   133                       UErrorCode *pErrorCode);
   135 /**
   136  * Same as u_parseCodePointRange() but the range may be terminated by
   137  * any character. The position of the terminating character is returned via
   138  * the *terminator output parameter.
   139  */
   140 U_CAPI int32_t U_EXPORT2
   141 u_parseCodePointRangeAnyTerminator(const char *s,
   142                                    uint32_t *pStart, uint32_t *pEnd,
   143                                    const char **terminator,
   144                                    UErrorCode *pErrorCode);
   146 U_CAPI int32_t U_EXPORT2
   147 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
   149 U_CDECL_END
   151 #endif

mercurial