|
1 /* |
|
2 ******************************************************************************* |
|
3 * |
|
4 * Copyright (C) 2001-2011, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ******************************************************************************* |
|
8 * file name: ucol_tok.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created 02/22/2001 |
|
14 * created by: Vladimir Weinstein |
|
15 * |
|
16 * This module reads a tailoring rule string and produces a list of |
|
17 * tokens that will be turned into collation elements |
|
18 * |
|
19 */ |
|
20 |
|
21 #ifndef UCOL_TOKENS_H |
|
22 #define UCOL_TOKENS_H |
|
23 |
|
24 #include "unicode/utypes.h" |
|
25 #include "unicode/uset.h" |
|
26 |
|
27 #if !UCONFIG_NO_COLLATION |
|
28 |
|
29 #include "ucol_imp.h" |
|
30 #include "uhash.h" |
|
31 #include "unicode/parseerr.h" |
|
32 |
|
33 #define UCOL_TOK_UNSET 0xFFFFFFFF |
|
34 #define UCOL_TOK_RESET 0xDEADBEEF |
|
35 |
|
36 #define UCOL_TOK_POLARITY_NEGATIVE 0 |
|
37 #define UCOL_TOK_POLARITY_POSITIVE 1 |
|
38 |
|
39 #define UCOL_TOK_TOP 0x04 |
|
40 #define UCOL_TOK_VARIABLE_TOP 0x08 |
|
41 #define UCOL_TOK_BEFORE 0x03 |
|
42 #define UCOL_TOK_SUCCESS 0x10 |
|
43 |
|
44 /* this is space for the extra strings that need to be unquoted */ |
|
45 /* during the parsing of the rules */ |
|
46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 |
|
47 typedef struct UColToken UColToken; |
|
48 |
|
49 typedef struct { |
|
50 UColToken* first; |
|
51 UColToken* last; |
|
52 UColToken* reset; |
|
53 UBool indirect; |
|
54 uint32_t baseCE; |
|
55 uint32_t baseContCE; |
|
56 uint32_t nextCE; |
|
57 uint32_t nextContCE; |
|
58 uint32_t previousCE; |
|
59 uint32_t previousContCE; |
|
60 int32_t pos[UCOL_STRENGTH_LIMIT]; |
|
61 uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; |
|
62 uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; |
|
63 uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; |
|
64 UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; |
|
65 UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; |
|
66 } UColTokListHeader; |
|
67 |
|
68 struct UColToken { |
|
69 UChar debugSource; |
|
70 UChar debugExpansion; |
|
71 UChar debugPrefix; |
|
72 uint32_t CEs[128]; |
|
73 uint32_t noOfCEs; |
|
74 uint32_t expCEs[128]; |
|
75 uint32_t noOfExpCEs; |
|
76 uint32_t source; |
|
77 uint32_t expansion; |
|
78 uint32_t prefix; |
|
79 uint32_t strength; |
|
80 uint32_t toInsert; |
|
81 uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ |
|
82 UColTokListHeader *listHeader; |
|
83 UColToken* previous; |
|
84 UColToken* next; |
|
85 UChar **rulesToParseHdl; |
|
86 uint16_t flags; |
|
87 }; |
|
88 |
|
89 /* |
|
90 * This is a token that has been parsed |
|
91 * but not yet processed. Used to reduce |
|
92 * the number of arguments in the parser |
|
93 */ |
|
94 typedef struct { |
|
95 uint32_t strength; |
|
96 uint32_t charsOffset; |
|
97 uint32_t charsLen; |
|
98 uint32_t extensionOffset; |
|
99 uint32_t extensionLen; |
|
100 uint32_t prefixOffset; |
|
101 uint32_t prefixLen; |
|
102 uint16_t flags; |
|
103 uint16_t indirectIndex; |
|
104 } UColParsedToken; |
|
105 |
|
106 |
|
107 typedef struct { |
|
108 UColParsedToken parsedToken; |
|
109 UChar *source; |
|
110 UChar *end; |
|
111 const UChar *current; |
|
112 UChar *sourceCurrent; |
|
113 UChar *extraCurrent; |
|
114 UChar *extraEnd; |
|
115 const InverseUCATableHeader *invUCA; |
|
116 const UCollator *UCA; |
|
117 UHashtable *tailored; |
|
118 UColOptionSet *opts; |
|
119 uint32_t resultLen; |
|
120 uint32_t listCapacity; |
|
121 UColTokListHeader *lh; |
|
122 UColToken *varTop; |
|
123 USet *copySet; |
|
124 USet *removeSet; |
|
125 UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */ |
|
126 |
|
127 UChar32 previousCp; /* Previous code point. */ |
|
128 /* For processing starred lists. */ |
|
129 UBool isStarred; /* Are we processing a starred token? */ |
|
130 UBool savedIsStarred; |
|
131 uint32_t currentStarredCharIndex; /* Index of the current charrecter in the starred expression. */ |
|
132 uint32_t lastStarredCharIndex; /* Index to the last character in the starred expression. */ |
|
133 |
|
134 /* For processing ranges. */ |
|
135 UBool inRange; /* Are we in a range? */ |
|
136 UChar32 currentRangeCp; /* Current code point in the range. */ |
|
137 UChar32 lastRangeCp; /* The last code point in the range. */ |
|
138 |
|
139 /* reorder codes for collation reordering */ |
|
140 int32_t* reorderCodes; |
|
141 int32_t reorderCodesLength; |
|
142 |
|
143 } UColTokenParser; |
|
144 |
|
145 typedef struct { |
|
146 const UChar *subName; |
|
147 int32_t subLen; |
|
148 UColAttributeValue attrVal; |
|
149 } ucolTokSuboption; |
|
150 |
|
151 typedef struct { |
|
152 const UChar *optionName; |
|
153 int32_t optionLen; |
|
154 const ucolTokSuboption *subopts; |
|
155 int32_t subSize; |
|
156 UColAttribute attr; |
|
157 } ucolTokOption; |
|
158 |
|
159 #define ucol_tok_isSpecialChar(ch) \ |
|
160 (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ |
|
161 (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ |
|
162 (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ |
|
163 (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ |
|
164 (ch) == 0x007B)) |
|
165 |
|
166 |
|
167 U_CFUNC |
|
168 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, |
|
169 UParseError *parseError, |
|
170 UErrorCode *status); |
|
171 |
|
172 U_CFUNC |
|
173 void ucol_tok_initTokenList(UColTokenParser *src, |
|
174 const UChar *rules, |
|
175 const uint32_t rulesLength, |
|
176 const UCollator *UCA, |
|
177 GetCollationRulesFunction importFunc, |
|
178 void* context, |
|
179 UErrorCode *status); |
|
180 |
|
181 U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); |
|
182 |
|
183 U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, |
|
184 UBool startOfRules, |
|
185 UParseError *parseError, |
|
186 UErrorCode *status); |
|
187 |
|
188 |
|
189 U_CAPI const UChar * U_EXPORT2 |
|
190 ucol_tok_getNextArgument(const UChar *start, const UChar *end, |
|
191 UColAttribute *attrib, UColAttributeValue *value, |
|
192 UErrorCode *status); |
|
193 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, |
|
194 uint32_t CE, uint32_t contCE, |
|
195 uint32_t *nextCE, uint32_t *nextContCE, |
|
196 uint32_t strength); |
|
197 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, |
|
198 uint32_t CE, uint32_t contCE, |
|
199 uint32_t *prevCE, uint32_t *prevContCE, |
|
200 uint32_t strength); |
|
201 |
|
202 const UChar* U_CALLCONV ucol_tok_getRulesFromBundle( |
|
203 void* context, |
|
204 const char* locale, |
|
205 const char* type, |
|
206 int32_t* pLength, |
|
207 UErrorCode* status); |
|
208 |
|
209 #endif /* #if !UCONFIG_NO_COLLATION */ |
|
210 |
|
211 #endif |