|
1 // |
|
2 // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class |
|
3 // |
|
4 /* |
|
5 *************************************************************************** |
|
6 * Copyright (C) 2002-2011 International Business Machines Corporation |
|
7 * and others. All rights reserved. |
|
8 *************************************************************************** |
|
9 */ |
|
10 |
|
11 #include "unicode/utypes.h" |
|
12 |
|
13 #if !UCONFIG_NO_BREAK_ITERATION |
|
14 |
|
15 #include "unicode/unistr.h" |
|
16 #include "unicode/uniset.h" |
|
17 #include "unicode/uchar.h" |
|
18 #include "unicode/parsepos.h" |
|
19 |
|
20 #include "umutex.h" |
|
21 |
|
22 #include "rbbirb.h" |
|
23 #include "rbbinode.h" |
|
24 |
|
25 |
|
26 // |
|
27 // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents |
|
28 // when the hash table is deleted. |
|
29 // |
|
30 U_CDECL_BEGIN |
|
31 static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { |
|
32 icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; |
|
33 delete px; |
|
34 } |
|
35 U_CDECL_END |
|
36 |
|
37 |
|
38 |
|
39 U_NAMESPACE_BEGIN |
|
40 |
|
41 RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) |
|
42 :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) |
|
43 { |
|
44 fHashTable = NULL; |
|
45 fCachedSetLookup = NULL; |
|
46 |
|
47 fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); |
|
48 // uhash_open checks status |
|
49 if (U_FAILURE(status)) { |
|
50 return; |
|
51 } |
|
52 uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); |
|
53 } |
|
54 |
|
55 |
|
56 |
|
57 RBBISymbolTable::~RBBISymbolTable() |
|
58 { |
|
59 uhash_close(fHashTable); |
|
60 } |
|
61 |
|
62 |
|
63 // |
|
64 // RBBISymbolTable::lookup This function from the abstract symbol table inteface |
|
65 // looks up a variable name and returns a UnicodeString |
|
66 // containing the substitution text. |
|
67 // |
|
68 // The variable name does NOT include the leading $. |
|
69 // |
|
70 const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const |
|
71 { |
|
72 RBBISymbolTableEntry *el; |
|
73 RBBINode *varRefNode; |
|
74 RBBINode *exprNode; |
|
75 RBBINode *usetNode; |
|
76 const UnicodeString *retString; |
|
77 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const |
|
78 |
|
79 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); |
|
80 if (el == NULL) { |
|
81 return NULL; |
|
82 } |
|
83 |
|
84 varRefNode = el->val; |
|
85 exprNode = varRefNode->fLeftChild; // Root node of expression for variable |
|
86 if (exprNode->fType == RBBINode::setRef) { |
|
87 // The $variable refers to a single UnicodeSet |
|
88 // return the ffffString, which will subsequently be interpreted as a |
|
89 // stand-in character for the set by RBBISymbolTable::lookupMatcher() |
|
90 usetNode = exprNode->fLeftChild; |
|
91 This->fCachedSetLookup = usetNode->fInputSet; |
|
92 retString = &ffffString; |
|
93 } |
|
94 else |
|
95 { |
|
96 // The variable refers to something other than just a set. |
|
97 // return the original source string for the expression |
|
98 retString = &exprNode->fText; |
|
99 This->fCachedSetLookup = NULL; |
|
100 } |
|
101 return retString; |
|
102 } |
|
103 |
|
104 |
|
105 |
|
106 // |
|
107 // RBBISymbolTable::lookupMatcher This function from the abstract symbol table |
|
108 // interface maps a single stand-in character to a |
|
109 // pointer to a Unicode Set. The Unicode Set code uses this |
|
110 // mechanism to get all references to the same $variable |
|
111 // name to refer to a single common Unicode Set instance. |
|
112 // |
|
113 // This implementation cheats a little, and does not maintain a map of stand-in chars |
|
114 // to sets. Instead, it takes advantage of the fact that the UnicodeSet |
|
115 // constructor will always call this function right after calling lookup(), |
|
116 // and we just need to remember what set to return between these two calls. |
|
117 const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const |
|
118 { |
|
119 UnicodeSet *retVal = NULL; |
|
120 RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const |
|
121 if (ch == 0xffff) { |
|
122 retVal = fCachedSetLookup; |
|
123 This->fCachedSetLookup = 0; |
|
124 } |
|
125 return retVal; |
|
126 } |
|
127 |
|
128 // |
|
129 // RBBISymbolTable::parseReference This function from the abstract symbol table interface |
|
130 // looks for a $variable name in the source text. |
|
131 // It does not look it up, only scans for it. |
|
132 // It is used by the UnicodeSet parser. |
|
133 // |
|
134 // This implementation is lifted pretty much verbatim |
|
135 // from the rules based transliterator implementation. |
|
136 // I didn't see an obvious way of sharing it. |
|
137 // |
|
138 UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, |
|
139 ParsePosition& pos, int32_t limit) const |
|
140 { |
|
141 int32_t start = pos.getIndex(); |
|
142 int32_t i = start; |
|
143 UnicodeString result; |
|
144 while (i < limit) { |
|
145 UChar c = text.charAt(i); |
|
146 if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { |
|
147 break; |
|
148 } |
|
149 ++i; |
|
150 } |
|
151 if (i == start) { // No valid name chars |
|
152 return result; // Indicate failure with empty string |
|
153 } |
|
154 pos.setIndex(i); |
|
155 text.extractBetween(start, i, result); |
|
156 return result; |
|
157 } |
|
158 |
|
159 |
|
160 |
|
161 // |
|
162 // RBBISymbolTable::lookupNode Given a key (a variable name), return the |
|
163 // corresponding RBBI Node. If there is no entry |
|
164 // in the table for this name, return NULL. |
|
165 // |
|
166 RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ |
|
167 |
|
168 RBBINode *retNode = NULL; |
|
169 RBBISymbolTableEntry *el; |
|
170 |
|
171 el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); |
|
172 if (el != NULL) { |
|
173 retNode = el->val; |
|
174 } |
|
175 return retNode; |
|
176 } |
|
177 |
|
178 |
|
179 // |
|
180 // RBBISymbolTable::addEntry Add a new entry to the symbol table. |
|
181 // Indicate an error if the name already exists - |
|
182 // this will only occur in the case of duplicate |
|
183 // variable assignments. |
|
184 // |
|
185 void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { |
|
186 RBBISymbolTableEntry *e; |
|
187 /* test for buffer overflows */ |
|
188 if (U_FAILURE(err)) { |
|
189 return; |
|
190 } |
|
191 e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); |
|
192 if (e != NULL) { |
|
193 err = U_BRK_VARIABLE_REDFINITION; |
|
194 return; |
|
195 } |
|
196 |
|
197 e = new RBBISymbolTableEntry; |
|
198 if (e == NULL) { |
|
199 err = U_MEMORY_ALLOCATION_ERROR; |
|
200 return; |
|
201 } |
|
202 e->key = key; |
|
203 e->val = val; |
|
204 uhash_put( fHashTable, &e->key, e, &err); |
|
205 } |
|
206 |
|
207 |
|
208 RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} |
|
209 |
|
210 RBBISymbolTableEntry::~RBBISymbolTableEntry() { |
|
211 // The "val" of a symbol table entry is a variable reference node. |
|
212 // The l. child of the val is the rhs expression from the assignment. |
|
213 // Unlike other node types, children of variable reference nodes are not |
|
214 // automatically recursively deleted. We do it manually here. |
|
215 delete val->fLeftChild; |
|
216 val->fLeftChild = NULL; |
|
217 |
|
218 delete val; |
|
219 |
|
220 // Note: the key UnicodeString is destructed by virtue of being in the object by value. |
|
221 } |
|
222 |
|
223 |
|
224 // |
|
225 // RBBISymbolTable::print Debugging function, dump out the symbol table contents. |
|
226 // |
|
227 #ifdef RBBI_DEBUG |
|
228 void RBBISymbolTable::rbbiSymtablePrint() const { |
|
229 RBBIDebugPrintf("Variable Definitions\n" |
|
230 "Name Node Val String Val\n" |
|
231 "----------------------------------------------------------------------\n"); |
|
232 |
|
233 int32_t pos = -1; |
|
234 const UHashElement *e = NULL; |
|
235 for (;;) { |
|
236 e = uhash_nextElement(fHashTable, &pos); |
|
237 if (e == NULL ) { |
|
238 break; |
|
239 } |
|
240 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; |
|
241 |
|
242 RBBI_DEBUG_printUnicodeString(s->key, 15); |
|
243 RBBIDebugPrintf(" %8p ", (void *)s->val); |
|
244 RBBI_DEBUG_printUnicodeString(s->val->fLeftChild->fText); |
|
245 RBBIDebugPrintf("\n"); |
|
246 } |
|
247 |
|
248 RBBIDebugPrintf("\nParsed Variable Definitions\n"); |
|
249 pos = -1; |
|
250 for (;;) { |
|
251 e = uhash_nextElement(fHashTable, &pos); |
|
252 if (e == NULL ) { |
|
253 break; |
|
254 } |
|
255 RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; |
|
256 RBBI_DEBUG_printUnicodeString(s->key); |
|
257 s->val->fLeftChild->printTree(TRUE); |
|
258 RBBIDebugPrintf("\n"); |
|
259 } |
|
260 } |
|
261 #endif |
|
262 |
|
263 |
|
264 |
|
265 |
|
266 |
|
267 U_NAMESPACE_END |
|
268 |
|
269 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |