|
1 /* |
|
2 *************************************************************************** |
|
3 * Copyright (C) 2002-2008 International Business Machines Corporation * |
|
4 * and others. All rights reserved. * |
|
5 *************************************************************************** |
|
6 */ |
|
7 |
|
8 // |
|
9 // File: rbbinode.cpp |
|
10 // |
|
11 // Implementation of class RBBINode, which represents a node in the |
|
12 // tree generated when parsing the Rules Based Break Iterator rules. |
|
13 // |
|
14 // This "Class" is actually closer to a struct. |
|
15 // Code using it is expected to directly access fields much of the time. |
|
16 // |
|
17 |
|
18 #include "unicode/utypes.h" |
|
19 |
|
20 #if !UCONFIG_NO_BREAK_ITERATION |
|
21 |
|
22 #include "unicode/unistr.h" |
|
23 #include "unicode/uniset.h" |
|
24 #include "unicode/uchar.h" |
|
25 #include "unicode/parsepos.h" |
|
26 #include "uvector.h" |
|
27 |
|
28 #include "rbbirb.h" |
|
29 #include "rbbinode.h" |
|
30 |
|
31 #include "uassert.h" |
|
32 |
|
33 |
|
34 U_NAMESPACE_BEGIN |
|
35 |
|
36 #ifdef RBBI_DEBUG |
|
37 static int gLastSerial = 0; |
|
38 #endif |
|
39 |
|
40 |
|
41 //------------------------------------------------------------------------- |
|
42 // |
|
43 // Constructor. Just set the fields to reasonable default values. |
|
44 // |
|
45 //------------------------------------------------------------------------- |
|
46 RBBINode::RBBINode(NodeType t) : UMemory() { |
|
47 #ifdef RBBI_DEBUG |
|
48 fSerialNum = ++gLastSerial; |
|
49 #endif |
|
50 fType = t; |
|
51 fParent = NULL; |
|
52 fLeftChild = NULL; |
|
53 fRightChild = NULL; |
|
54 fInputSet = NULL; |
|
55 fFirstPos = 0; |
|
56 fLastPos = 0; |
|
57 fNullable = FALSE; |
|
58 fLookAheadEnd = FALSE; |
|
59 fVal = 0; |
|
60 fPrecedence = precZero; |
|
61 |
|
62 UErrorCode status = U_ZERO_ERROR; |
|
63 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere |
|
64 fLastPosSet = new UVector(status); |
|
65 fFollowPos = new UVector(status); |
|
66 if (t==opCat) {fPrecedence = precOpCat;} |
|
67 else if (t==opOr) {fPrecedence = precOpOr;} |
|
68 else if (t==opStart) {fPrecedence = precStart;} |
|
69 else if (t==opLParen) {fPrecedence = precLParen;} |
|
70 |
|
71 } |
|
72 |
|
73 |
|
74 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { |
|
75 #ifdef RBBI_DEBUG |
|
76 fSerialNum = ++gLastSerial; |
|
77 #endif |
|
78 fType = other.fType; |
|
79 fParent = NULL; |
|
80 fLeftChild = NULL; |
|
81 fRightChild = NULL; |
|
82 fInputSet = other.fInputSet; |
|
83 fPrecedence = other.fPrecedence; |
|
84 fText = other.fText; |
|
85 fFirstPos = other.fFirstPos; |
|
86 fLastPos = other.fLastPos; |
|
87 fNullable = other.fNullable; |
|
88 fVal = other.fVal; |
|
89 UErrorCode status = U_ZERO_ERROR; |
|
90 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere |
|
91 fLastPosSet = new UVector(status); |
|
92 fFollowPos = new UVector(status); |
|
93 } |
|
94 |
|
95 |
|
96 //------------------------------------------------------------------------- |
|
97 // |
|
98 // Destructor. Deletes both this node AND any child nodes, |
|
99 // except in the case of variable reference nodes. For |
|
100 // these, the l. child points back to the definition, which |
|
101 // is common for all references to the variable, meaning |
|
102 // it can't be deleted here. |
|
103 // |
|
104 //------------------------------------------------------------------------- |
|
105 RBBINode::~RBBINode() { |
|
106 // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); |
|
107 delete fInputSet; |
|
108 fInputSet = NULL; |
|
109 |
|
110 switch (this->fType) { |
|
111 case varRef: |
|
112 case setRef: |
|
113 // for these node types, multiple instances point to the same "children" |
|
114 // Storage ownership of children handled elsewhere. Don't delete here. |
|
115 break; |
|
116 |
|
117 default: |
|
118 delete fLeftChild; |
|
119 fLeftChild = NULL; |
|
120 delete fRightChild; |
|
121 fRightChild = NULL; |
|
122 } |
|
123 |
|
124 |
|
125 delete fFirstPosSet; |
|
126 delete fLastPosSet; |
|
127 delete fFollowPos; |
|
128 |
|
129 } |
|
130 |
|
131 |
|
132 //------------------------------------------------------------------------- |
|
133 // |
|
134 // cloneTree Make a copy of the subtree rooted at this node. |
|
135 // Discard any variable references encountered along the way, |
|
136 // and replace with copies of the variable's definitions. |
|
137 // Used to replicate the expression underneath variable |
|
138 // references in preparation for generating the DFA tables. |
|
139 // |
|
140 //------------------------------------------------------------------------- |
|
141 RBBINode *RBBINode::cloneTree() { |
|
142 RBBINode *n; |
|
143 |
|
144 if (fType == RBBINode::varRef) { |
|
145 // If the current node is a variable reference, skip over it |
|
146 // and clone the definition of the variable instead. |
|
147 n = fLeftChild->cloneTree(); |
|
148 } else if (fType == RBBINode::uset) { |
|
149 n = this; |
|
150 } else { |
|
151 n = new RBBINode(*this); |
|
152 // Check for null pointer. |
|
153 if (n != NULL) { |
|
154 if (fLeftChild != NULL) { |
|
155 n->fLeftChild = fLeftChild->cloneTree(); |
|
156 n->fLeftChild->fParent = n; |
|
157 } |
|
158 if (fRightChild != NULL) { |
|
159 n->fRightChild = fRightChild->cloneTree(); |
|
160 n->fRightChild->fParent = n; |
|
161 } |
|
162 } |
|
163 } |
|
164 return n; |
|
165 } |
|
166 |
|
167 |
|
168 |
|
169 //------------------------------------------------------------------------- |
|
170 // |
|
171 // flattenVariables Walk a parse tree, replacing any variable |
|
172 // references with a copy of the variable's definition. |
|
173 // Aside from variables, the tree is not changed. |
|
174 // |
|
175 // Return the root of the tree. If the root was not a variable |
|
176 // reference, it remains unchanged - the root we started with |
|
177 // is the root we return. If, however, the root was a variable |
|
178 // reference, the root of the newly cloned replacement tree will |
|
179 // be returned, and the original tree deleted. |
|
180 // |
|
181 // This function works by recursively walking the tree |
|
182 // without doing anything until a variable reference is |
|
183 // found, then calling cloneTree() at that point. Any |
|
184 // nested references are handled by cloneTree(), not here. |
|
185 // |
|
186 //------------------------------------------------------------------------- |
|
187 RBBINode *RBBINode::flattenVariables() { |
|
188 if (fType == varRef) { |
|
189 RBBINode *retNode = fLeftChild->cloneTree(); |
|
190 delete this; |
|
191 return retNode; |
|
192 } |
|
193 |
|
194 if (fLeftChild != NULL) { |
|
195 fLeftChild = fLeftChild->flattenVariables(); |
|
196 fLeftChild->fParent = this; |
|
197 } |
|
198 if (fRightChild != NULL) { |
|
199 fRightChild = fRightChild->flattenVariables(); |
|
200 fRightChild->fParent = this; |
|
201 } |
|
202 return this; |
|
203 } |
|
204 |
|
205 |
|
206 //------------------------------------------------------------------------- |
|
207 // |
|
208 // flattenSets Walk the parse tree, replacing any nodes of type setRef |
|
209 // with a copy of the expression tree for the set. A set's |
|
210 // equivalent expression tree is precomputed and saved as |
|
211 // the left child of the uset node. |
|
212 // |
|
213 //------------------------------------------------------------------------- |
|
214 void RBBINode::flattenSets() { |
|
215 U_ASSERT(fType != setRef); |
|
216 |
|
217 if (fLeftChild != NULL) { |
|
218 if (fLeftChild->fType==setRef) { |
|
219 RBBINode *setRefNode = fLeftChild; |
|
220 RBBINode *usetNode = setRefNode->fLeftChild; |
|
221 RBBINode *replTree = usetNode->fLeftChild; |
|
222 fLeftChild = replTree->cloneTree(); |
|
223 fLeftChild->fParent = this; |
|
224 delete setRefNode; |
|
225 } else { |
|
226 fLeftChild->flattenSets(); |
|
227 } |
|
228 } |
|
229 |
|
230 if (fRightChild != NULL) { |
|
231 if (fRightChild->fType==setRef) { |
|
232 RBBINode *setRefNode = fRightChild; |
|
233 RBBINode *usetNode = setRefNode->fLeftChild; |
|
234 RBBINode *replTree = usetNode->fLeftChild; |
|
235 fRightChild = replTree->cloneTree(); |
|
236 fRightChild->fParent = this; |
|
237 delete setRefNode; |
|
238 } else { |
|
239 fRightChild->flattenSets(); |
|
240 } |
|
241 } |
|
242 } |
|
243 |
|
244 |
|
245 |
|
246 //------------------------------------------------------------------------- |
|
247 // |
|
248 // findNodes() Locate all the nodes of the specified type, starting |
|
249 // at the specified root. |
|
250 // |
|
251 //------------------------------------------------------------------------- |
|
252 void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { |
|
253 /* test for buffer overflows */ |
|
254 if (U_FAILURE(status)) { |
|
255 return; |
|
256 } |
|
257 if (fType == kind) { |
|
258 dest->addElement(this, status); |
|
259 } |
|
260 if (fLeftChild != NULL) { |
|
261 fLeftChild->findNodes(dest, kind, status); |
|
262 } |
|
263 if (fRightChild != NULL) { |
|
264 fRightChild->findNodes(dest, kind, status); |
|
265 } |
|
266 } |
|
267 |
|
268 |
|
269 //------------------------------------------------------------------------- |
|
270 // |
|
271 // print. Print out a single node, for debugging. |
|
272 // |
|
273 //------------------------------------------------------------------------- |
|
274 #ifdef RBBI_DEBUG |
|
275 void RBBINode::printNode() { |
|
276 static const char * const nodeTypeNames[] = { |
|
277 "setRef", |
|
278 "uset", |
|
279 "varRef", |
|
280 "leafChar", |
|
281 "lookAhead", |
|
282 "tag", |
|
283 "endMark", |
|
284 "opStart", |
|
285 "opCat", |
|
286 "opOr", |
|
287 "opStar", |
|
288 "opPlus", |
|
289 "opQuestion", |
|
290 "opBreak", |
|
291 "opReverse", |
|
292 "opLParen" |
|
293 }; |
|
294 |
|
295 if (this==NULL) { |
|
296 RBBIDebugPrintf("%10p", (void *)this); |
|
297 } else { |
|
298 RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ", |
|
299 (void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild, |
|
300 fSerialNum, fFirstPos, fVal); |
|
301 if (fType == varRef) { |
|
302 RBBI_DEBUG_printUnicodeString(fText); |
|
303 } |
|
304 } |
|
305 RBBIDebugPrintf("\n"); |
|
306 } |
|
307 #endif |
|
308 |
|
309 |
|
310 #ifdef RBBI_DEBUG |
|
311 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) |
|
312 { |
|
313 int i; |
|
314 for (i=0; i<s.length(); i++) { |
|
315 RBBIDebugPrintf("%c", s.charAt(i)); |
|
316 // putc(s.charAt(i), stdout); |
|
317 } |
|
318 for (i=s.length(); i<minWidth; i++) { |
|
319 RBBIDebugPrintf(" "); |
|
320 } |
|
321 } |
|
322 #endif |
|
323 |
|
324 |
|
325 //------------------------------------------------------------------------- |
|
326 // |
|
327 // print. Print out the tree of nodes rooted at "this" |
|
328 // |
|
329 //------------------------------------------------------------------------- |
|
330 #ifdef RBBI_DEBUG |
|
331 void RBBINode::printTree(UBool printHeading) { |
|
332 if (printHeading) { |
|
333 RBBIDebugPrintf( "-------------------------------------------------------------------\n" |
|
334 " Address type Parent LeftChild RightChild serial position value\n" |
|
335 ); |
|
336 } |
|
337 this->printNode(); |
|
338 if (this != NULL) { |
|
339 // Only dump the definition under a variable reference if asked to. |
|
340 // Unconditinally dump children of all other node types. |
|
341 if (fType != varRef) { |
|
342 if (fLeftChild != NULL) { |
|
343 fLeftChild->printTree(FALSE); |
|
344 } |
|
345 |
|
346 if (fRightChild != NULL) { |
|
347 fRightChild->printTree(FALSE); |
|
348 } |
|
349 } |
|
350 } |
|
351 } |
|
352 #endif |
|
353 |
|
354 |
|
355 |
|
356 U_NAMESPACE_END |
|
357 |
|
358 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |