1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/tools/toolutil/uparse.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,151 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* 1.7 +* Copyright (C) 2000-2010, International Business Machines 1.8 +* Corporation and others. All Rights Reserved. 1.9 +* 1.10 +******************************************************************************* 1.11 +* file name: uparse.h 1.12 +* encoding: US-ASCII 1.13 +* tab size: 8 (not used) 1.14 +* indentation:4 1.15 +* 1.16 +* created on: 2000apr18 1.17 +* created by: Markus W. Scherer 1.18 +* 1.19 +* This file provides a parser for files that are delimited by one single 1.20 +* character like ';' or TAB. Example: the Unicode Character Properties files 1.21 +* like UnicodeData.txt are semicolon-delimited. 1.22 +*/ 1.23 + 1.24 +#ifndef __UPARSE_H__ 1.25 +#define __UPARSE_H__ 1.26 + 1.27 +#include "unicode/utypes.h" 1.28 + 1.29 +/** 1.30 + * Is c an invariant-character whitespace? 1.31 + * @param c invariant character 1.32 + */ 1.33 +#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n') 1.34 + 1.35 +U_CDECL_BEGIN 1.36 + 1.37 +/** 1.38 + * Skip space ' ' and TAB '\t' characters. 1.39 + * 1.40 + * @param s Pointer to characters. 1.41 + * @return Pointer to first character at or after s that is not a space or TAB. 1.42 + */ 1.43 +U_CAPI const char * U_EXPORT2 1.44 +u_skipWhitespace(const char *s); 1.45 + 1.46 +/** 1.47 + * Trim whitespace (including line endings) from the end of the string. 1.48 + * 1.49 + * @param s Pointer to the string. 1.50 + * @return Pointer to the new end of the string. 1.51 + */ 1.52 +U_CAPI char * U_EXPORT2 1.53 +u_rtrim(char *s); 1.54 + 1.55 +/** Function type for u_parseDelimitedFile(). */ 1.56 +typedef void U_CALLCONV 1.57 +UParseLineFn(void *context, 1.58 + char *fields[][2], 1.59 + int32_t fieldCount, 1.60 + UErrorCode *pErrorCode); 1.61 + 1.62 +/** 1.63 + * Parser for files that are similar to UnicodeData.txt: 1.64 + * This function opens the file and reads it line by line. It skips empty lines 1.65 + * and comment lines that start with a '#'. 1.66 + * All other lines are separated into fields with one delimiter character 1.67 + * (semicolon for Unicode Properties files) between two fields. The last field in 1.68 + * a line does not need to be terminated with a delimiter. 1.69 + * 1.70 + * For each line, after segmenting it, a line function is called. 1.71 + * It gets passed the array of field start and limit pointers that is 1.72 + * passed into this parser and filled by it for each line. 1.73 + * For each field i of the line, the start pointer in fields[i][0] 1.74 + * points to the beginning of the field, while the limit pointer in fields[i][1] 1.75 + * points behind the field, i.e., to the delimiter or the line end. 1.76 + * 1.77 + * The context parameter of the line function is 1.78 + * the same as the one for the parse function. 1.79 + * 1.80 + * The line function may modify the contents of the fields including the 1.81 + * limit characters. 1.82 + * 1.83 + * If the file cannot be opened, or there is a parsing error or a field function 1.84 + * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code. 1.85 + */ 1.86 +U_CAPI void U_EXPORT2 1.87 +u_parseDelimitedFile(const char *filename, char delimiter, 1.88 + char *fields[][2], int32_t fieldCount, 1.89 + UParseLineFn *lineFn, void *context, 1.90 + UErrorCode *pErrorCode); 1.91 + 1.92 +/** 1.93 + * Parse a string of code points like 0061 0308 0300. 1.94 + * s must end with either ';' or NUL. 1.95 + * 1.96 + * @return Number of code points. 1.97 + */ 1.98 +U_CAPI int32_t U_EXPORT2 1.99 +u_parseCodePoints(const char *s, 1.100 + uint32_t *dest, int32_t destCapacity, 1.101 + UErrorCode *pErrorCode); 1.102 + 1.103 +/** 1.104 + * Parse a list of code points like 0061 0308 0300 1.105 + * into a UChar * string. 1.106 + * s must end with either ';' or NUL. 1.107 + * 1.108 + * Set the first code point in *pFirst. 1.109 + * 1.110 + * @param s Input char * string. 1.111 + * @param dest Output string buffer. 1.112 + * @param destCapacity Capacity of dest in numbers of UChars. 1.113 + * @param pFirst If pFirst!=NULL the *pFirst will be set to the first 1.114 + * code point in the string. 1.115 + * @param pErrorCode ICU error code. 1.116 + * @return The length of the string in numbers of UChars. 1.117 + */ 1.118 +U_CAPI int32_t U_EXPORT2 1.119 +u_parseString(const char *s, 1.120 + UChar *dest, int32_t destCapacity, 1.121 + uint32_t *pFirst, 1.122 + UErrorCode *pErrorCode); 1.123 + 1.124 +/** 1.125 + * Parse a code point range like 1.126 + * 0085 or 1.127 + * 4E00..9FA5. 1.128 + * 1.129 + * s must contain such a range and end with either ';' or NUL. 1.130 + * 1.131 + * @return Length of code point range, end-start+1 1.132 + */ 1.133 +U_CAPI int32_t U_EXPORT2 1.134 +u_parseCodePointRange(const char *s, 1.135 + uint32_t *pStart, uint32_t *pEnd, 1.136 + UErrorCode *pErrorCode); 1.137 + 1.138 +/** 1.139 + * Same as u_parseCodePointRange() but the range may be terminated by 1.140 + * any character. The position of the terminating character is returned via 1.141 + * the *terminator output parameter. 1.142 + */ 1.143 +U_CAPI int32_t U_EXPORT2 1.144 +u_parseCodePointRangeAnyTerminator(const char *s, 1.145 + uint32_t *pStart, uint32_t *pEnd, 1.146 + const char **terminator, 1.147 + UErrorCode *pErrorCode); 1.148 + 1.149 +U_CAPI int32_t U_EXPORT2 1.150 +u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status); 1.151 + 1.152 +U_CDECL_END 1.153 + 1.154 +#endif