|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2000-2010, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: uparse.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 2000apr18 |
|
14 * created by: Markus W. Scherer |
|
15 * |
|
16 * This file provides a parser for files that are delimited by one single |
|
17 * character like ';' or TAB. Example: the Unicode Character Properties files |
|
18 * like UnicodeData.txt are semicolon-delimited. |
|
19 */ |
|
20 |
|
21 #ifndef __UPARSE_H__ |
|
22 #define __UPARSE_H__ |
|
23 |
|
24 #include "unicode/utypes.h" |
|
25 |
|
26 /** |
|
27 * Is c an invariant-character whitespace? |
|
28 * @param c invariant character |
|
29 */ |
|
30 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n') |
|
31 |
|
32 U_CDECL_BEGIN |
|
33 |
|
34 /** |
|
35 * Skip space ' ' and TAB '\t' characters. |
|
36 * |
|
37 * @param s Pointer to characters. |
|
38 * @return Pointer to first character at or after s that is not a space or TAB. |
|
39 */ |
|
40 U_CAPI const char * U_EXPORT2 |
|
41 u_skipWhitespace(const char *s); |
|
42 |
|
43 /** |
|
44 * Trim whitespace (including line endings) from the end of the string. |
|
45 * |
|
46 * @param s Pointer to the string. |
|
47 * @return Pointer to the new end of the string. |
|
48 */ |
|
49 U_CAPI char * U_EXPORT2 |
|
50 u_rtrim(char *s); |
|
51 |
|
52 /** Function type for u_parseDelimitedFile(). */ |
|
53 typedef void U_CALLCONV |
|
54 UParseLineFn(void *context, |
|
55 char *fields[][2], |
|
56 int32_t fieldCount, |
|
57 UErrorCode *pErrorCode); |
|
58 |
|
59 /** |
|
60 * Parser for files that are similar to UnicodeData.txt: |
|
61 * This function opens the file and reads it line by line. It skips empty lines |
|
62 * and comment lines that start with a '#'. |
|
63 * All other lines are separated into fields with one delimiter character |
|
64 * (semicolon for Unicode Properties files) between two fields. The last field in |
|
65 * a line does not need to be terminated with a delimiter. |
|
66 * |
|
67 * For each line, after segmenting it, a line function is called. |
|
68 * It gets passed the array of field start and limit pointers that is |
|
69 * passed into this parser and filled by it for each line. |
|
70 * For each field i of the line, the start pointer in fields[i][0] |
|
71 * points to the beginning of the field, while the limit pointer in fields[i][1] |
|
72 * points behind the field, i.e., to the delimiter or the line end. |
|
73 * |
|
74 * The context parameter of the line function is |
|
75 * the same as the one for the parse function. |
|
76 * |
|
77 * The line function may modify the contents of the fields including the |
|
78 * limit characters. |
|
79 * |
|
80 * If the file cannot be opened, or there is a parsing error or a field function |
|
81 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code. |
|
82 */ |
|
83 U_CAPI void U_EXPORT2 |
|
84 u_parseDelimitedFile(const char *filename, char delimiter, |
|
85 char *fields[][2], int32_t fieldCount, |
|
86 UParseLineFn *lineFn, void *context, |
|
87 UErrorCode *pErrorCode); |
|
88 |
|
89 /** |
|
90 * Parse a string of code points like 0061 0308 0300. |
|
91 * s must end with either ';' or NUL. |
|
92 * |
|
93 * @return Number of code points. |
|
94 */ |
|
95 U_CAPI int32_t U_EXPORT2 |
|
96 u_parseCodePoints(const char *s, |
|
97 uint32_t *dest, int32_t destCapacity, |
|
98 UErrorCode *pErrorCode); |
|
99 |
|
100 /** |
|
101 * Parse a list of code points like 0061 0308 0300 |
|
102 * into a UChar * string. |
|
103 * s must end with either ';' or NUL. |
|
104 * |
|
105 * Set the first code point in *pFirst. |
|
106 * |
|
107 * @param s Input char * string. |
|
108 * @param dest Output string buffer. |
|
109 * @param destCapacity Capacity of dest in numbers of UChars. |
|
110 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first |
|
111 * code point in the string. |
|
112 * @param pErrorCode ICU error code. |
|
113 * @return The length of the string in numbers of UChars. |
|
114 */ |
|
115 U_CAPI int32_t U_EXPORT2 |
|
116 u_parseString(const char *s, |
|
117 UChar *dest, int32_t destCapacity, |
|
118 uint32_t *pFirst, |
|
119 UErrorCode *pErrorCode); |
|
120 |
|
121 /** |
|
122 * Parse a code point range like |
|
123 * 0085 or |
|
124 * 4E00..9FA5. |
|
125 * |
|
126 * s must contain such a range and end with either ';' or NUL. |
|
127 * |
|
128 * @return Length of code point range, end-start+1 |
|
129 */ |
|
130 U_CAPI int32_t U_EXPORT2 |
|
131 u_parseCodePointRange(const char *s, |
|
132 uint32_t *pStart, uint32_t *pEnd, |
|
133 UErrorCode *pErrorCode); |
|
134 |
|
135 /** |
|
136 * Same as u_parseCodePointRange() but the range may be terminated by |
|
137 * any character. The position of the terminating character is returned via |
|
138 * the *terminator output parameter. |
|
139 */ |
|
140 U_CAPI int32_t U_EXPORT2 |
|
141 u_parseCodePointRangeAnyTerminator(const char *s, |
|
142 uint32_t *pStart, uint32_t *pEnd, |
|
143 const char **terminator, |
|
144 UErrorCode *pErrorCode); |
|
145 |
|
146 U_CAPI int32_t U_EXPORT2 |
|
147 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status); |
|
148 |
|
149 U_CDECL_END |
|
150 |
|
151 #endif |