intl/icu/source/tools/toolutil/uparse.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/icu/source/tools/toolutil/uparse.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,151 @@
     1.4 +/*
     1.5 +*******************************************************************************
     1.6 +*
     1.7 +*   Copyright (C) 2000-2010, International Business Machines
     1.8 +*   Corporation and others.  All Rights Reserved.
     1.9 +*
    1.10 +*******************************************************************************
    1.11 +*   file name:  uparse.h
    1.12 +*   encoding:   US-ASCII
    1.13 +*   tab size:   8 (not used)
    1.14 +*   indentation:4
    1.15 +*
    1.16 +*   created on: 2000apr18
    1.17 +*   created by: Markus W. Scherer
    1.18 +*
    1.19 +*   This file provides a parser for files that are delimited by one single
    1.20 +*   character like ';' or TAB. Example: the Unicode Character Properties files
    1.21 +*   like UnicodeData.txt are semicolon-delimited.
    1.22 +*/
    1.23 +
    1.24 +#ifndef __UPARSE_H__
    1.25 +#define __UPARSE_H__
    1.26 +
    1.27 +#include "unicode/utypes.h"
    1.28 +
    1.29 +/**
    1.30 + * Is c an invariant-character whitespace?
    1.31 + * @param c invariant character
    1.32 + */
    1.33 +#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
    1.34 +
    1.35 +U_CDECL_BEGIN
    1.36 +
    1.37 +/**
    1.38 + * Skip space ' ' and TAB '\t' characters.
    1.39 + *
    1.40 + * @param s Pointer to characters.
    1.41 + * @return Pointer to first character at or after s that is not a space or TAB.
    1.42 + */
    1.43 +U_CAPI const char * U_EXPORT2
    1.44 +u_skipWhitespace(const char *s);
    1.45 +
    1.46 +/**
    1.47 + * Trim whitespace (including line endings) from the end of the string.
    1.48 + *
    1.49 + * @param s Pointer to the string.
    1.50 + * @return Pointer to the new end of the string.
    1.51 + */
    1.52 +U_CAPI char * U_EXPORT2
    1.53 +u_rtrim(char *s);
    1.54 +
    1.55 +/** Function type for u_parseDelimitedFile(). */
    1.56 +typedef void U_CALLCONV
    1.57 +UParseLineFn(void *context,
    1.58 +              char *fields[][2],
    1.59 +              int32_t fieldCount,
    1.60 +              UErrorCode *pErrorCode);
    1.61 +
    1.62 +/**
    1.63 + * Parser for files that are similar to UnicodeData.txt:
    1.64 + * This function opens the file and reads it line by line. It skips empty lines
    1.65 + * and comment lines that start with a '#'.
    1.66 + * All other lines are separated into fields with one delimiter character
    1.67 + * (semicolon for Unicode Properties files) between two fields. The last field in
    1.68 + * a line does not need to be terminated with a delimiter.
    1.69 + *
    1.70 + * For each line, after segmenting it, a line function is called.
    1.71 + * It gets passed the array of field start and limit pointers that is
    1.72 + * passed into this parser and filled by it for each line.
    1.73 + * For each field i of the line, the start pointer in fields[i][0]
    1.74 + * points to the beginning of the field, while the limit pointer in fields[i][1]
    1.75 + * points behind the field, i.e., to the delimiter or the line end.
    1.76 + *
    1.77 + * The context parameter of the line function is
    1.78 + * the same as the one for the parse function.
    1.79 + *
    1.80 + * The line function may modify the contents of the fields including the
    1.81 + * limit characters.
    1.82 + *
    1.83 + * If the file cannot be opened, or there is a parsing error or a field function
    1.84 + * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
    1.85 + */
    1.86 +U_CAPI void U_EXPORT2
    1.87 +u_parseDelimitedFile(const char *filename, char delimiter,
    1.88 +                     char *fields[][2], int32_t fieldCount,
    1.89 +                     UParseLineFn *lineFn, void *context,
    1.90 +                     UErrorCode *pErrorCode);
    1.91 +
    1.92 +/**
    1.93 + * Parse a string of code points like 0061 0308 0300.
    1.94 + * s must end with either ';' or NUL.
    1.95 + *
    1.96 + * @return Number of code points.
    1.97 + */
    1.98 +U_CAPI int32_t U_EXPORT2
    1.99 +u_parseCodePoints(const char *s,
   1.100 +                  uint32_t *dest, int32_t destCapacity,
   1.101 +                  UErrorCode *pErrorCode);
   1.102 +
   1.103 +/**
   1.104 + * Parse a list of code points like 0061 0308 0300
   1.105 + * into a UChar * string.
   1.106 + * s must end with either ';' or NUL.
   1.107 + *
   1.108 + * Set the first code point in *pFirst.
   1.109 + *
   1.110 + * @param s Input char * string.
   1.111 + * @param dest Output string buffer.
   1.112 + * @param destCapacity Capacity of dest in numbers of UChars.
   1.113 + * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
   1.114 + *               code point in the string.
   1.115 + * @param pErrorCode ICU error code.
   1.116 + * @return The length of the string in numbers of UChars.
   1.117 + */
   1.118 +U_CAPI int32_t U_EXPORT2
   1.119 +u_parseString(const char *s,
   1.120 +              UChar *dest, int32_t destCapacity,
   1.121 +              uint32_t *pFirst,
   1.122 +              UErrorCode *pErrorCode);
   1.123 +
   1.124 +/**
   1.125 + * Parse a code point range like
   1.126 + * 0085 or
   1.127 + * 4E00..9FA5.
   1.128 + *
   1.129 + * s must contain such a range and end with either ';' or NUL.
   1.130 + *
   1.131 + * @return Length of code point range, end-start+1
   1.132 + */
   1.133 +U_CAPI int32_t U_EXPORT2
   1.134 +u_parseCodePointRange(const char *s,
   1.135 +                      uint32_t *pStart, uint32_t *pEnd,
   1.136 +                      UErrorCode *pErrorCode);
   1.137 +
   1.138 +/**
   1.139 + * Same as u_parseCodePointRange() but the range may be terminated by
   1.140 + * any character. The position of the terminating character is returned via
   1.141 + * the *terminator output parameter.
   1.142 + */
   1.143 +U_CAPI int32_t U_EXPORT2
   1.144 +u_parseCodePointRangeAnyTerminator(const char *s,
   1.145 +                                   uint32_t *pStart, uint32_t *pEnd,
   1.146 +                                   const char **terminator,
   1.147 +                                   UErrorCode *pErrorCode);
   1.148 +
   1.149 +U_CAPI int32_t U_EXPORT2
   1.150 +u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
   1.151 +
   1.152 +U_CDECL_END
   1.153 +
   1.154 +#endif

mercurial