|
1 /* |
|
2 ********************************************************************** |
|
3 * Copyright (c) 2002-2009, International Business Machines |
|
4 * Corporation and others. All Rights Reserved. |
|
5 ********************************************************************** |
|
6 */ |
|
7 // |
|
8 // rbbitblb.cpp |
|
9 // |
|
10 |
|
11 |
|
12 #include "unicode/utypes.h" |
|
13 |
|
14 #if !UCONFIG_NO_BREAK_ITERATION |
|
15 |
|
16 #include "unicode/unistr.h" |
|
17 #include "rbbitblb.h" |
|
18 #include "rbbirb.h" |
|
19 #include "rbbisetb.h" |
|
20 #include "rbbidata.h" |
|
21 #include "cstring.h" |
|
22 #include "uassert.h" |
|
23 #include "cmemory.h" |
|
24 |
|
25 U_NAMESPACE_BEGIN |
|
26 |
|
27 RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode) : |
|
28 fTree(*rootNode) { |
|
29 fRB = rb; |
|
30 fStatus = fRB->fStatus; |
|
31 UErrorCode status = U_ZERO_ERROR; |
|
32 fDStates = new UVector(status); |
|
33 if (U_FAILURE(*fStatus)) { |
|
34 return; |
|
35 } |
|
36 if (U_FAILURE(status)) { |
|
37 *fStatus = status; |
|
38 return; |
|
39 } |
|
40 if (fDStates == NULL) { |
|
41 *fStatus = U_MEMORY_ALLOCATION_ERROR;; |
|
42 } |
|
43 } |
|
44 |
|
45 |
|
46 |
|
47 RBBITableBuilder::~RBBITableBuilder() { |
|
48 int i; |
|
49 for (i=0; i<fDStates->size(); i++) { |
|
50 delete (RBBIStateDescriptor *)fDStates->elementAt(i); |
|
51 } |
|
52 delete fDStates; |
|
53 } |
|
54 |
|
55 |
|
56 //----------------------------------------------------------------------------- |
|
57 // |
|
58 // RBBITableBuilder::build - This is the main function for building the DFA state transtion |
|
59 // table from the RBBI rules parse tree. |
|
60 // |
|
61 //----------------------------------------------------------------------------- |
|
62 void RBBITableBuilder::build() { |
|
63 |
|
64 if (U_FAILURE(*fStatus)) { |
|
65 return; |
|
66 } |
|
67 |
|
68 // If there were no rules, just return. This situation can easily arise |
|
69 // for the reverse rules. |
|
70 if (fTree==NULL) { |
|
71 return; |
|
72 } |
|
73 |
|
74 // |
|
75 // Walk through the tree, replacing any references to $variables with a copy of the |
|
76 // parse tree for the substition expression. |
|
77 // |
|
78 fTree = fTree->flattenVariables(); |
|
79 #ifdef RBBI_DEBUG |
|
80 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { |
|
81 RBBIDebugPuts("Parse tree after flattening variable references."); |
|
82 fTree->printTree(TRUE); |
|
83 } |
|
84 #endif |
|
85 |
|
86 // |
|
87 // If the rules contained any references to {bof} |
|
88 // add a {bof} <cat> <former root of tree> to the |
|
89 // tree. Means that all matches must start out with the |
|
90 // {bof} fake character. |
|
91 // |
|
92 if (fRB->fSetBuilder->sawBOF()) { |
|
93 RBBINode *bofTop = new RBBINode(RBBINode::opCat); |
|
94 RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); |
|
95 // Delete and exit if memory allocation failed. |
|
96 if (bofTop == NULL || bofLeaf == NULL) { |
|
97 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
98 delete bofTop; |
|
99 delete bofLeaf; |
|
100 return; |
|
101 } |
|
102 bofTop->fLeftChild = bofLeaf; |
|
103 bofTop->fRightChild = fTree; |
|
104 bofLeaf->fParent = bofTop; |
|
105 bofLeaf->fVal = 2; // Reserved value for {bof}. |
|
106 fTree = bofTop; |
|
107 } |
|
108 |
|
109 // |
|
110 // Add a unique right-end marker to the expression. |
|
111 // Appears as a cat-node, left child being the original tree, |
|
112 // right child being the end marker. |
|
113 // |
|
114 RBBINode *cn = new RBBINode(RBBINode::opCat); |
|
115 // Exit if memory allocation failed. |
|
116 if (cn == NULL) { |
|
117 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
118 return; |
|
119 } |
|
120 cn->fLeftChild = fTree; |
|
121 fTree->fParent = cn; |
|
122 cn->fRightChild = new RBBINode(RBBINode::endMark); |
|
123 // Delete and exit if memory allocation failed. |
|
124 if (cn->fRightChild == NULL) { |
|
125 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
126 delete cn; |
|
127 return; |
|
128 } |
|
129 cn->fRightChild->fParent = cn; |
|
130 fTree = cn; |
|
131 |
|
132 // |
|
133 // Replace all references to UnicodeSets with the tree for the equivalent |
|
134 // expression. |
|
135 // |
|
136 fTree->flattenSets(); |
|
137 #ifdef RBBI_DEBUG |
|
138 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { |
|
139 RBBIDebugPuts("Parse tree after flattening Unicode Set references."); |
|
140 fTree->printTree(TRUE); |
|
141 } |
|
142 #endif |
|
143 |
|
144 |
|
145 // |
|
146 // calculate the functions nullable, firstpos, lastpos and followpos on |
|
147 // nodes in the parse tree. |
|
148 // See the alogrithm description in Aho. |
|
149 // Understanding how this works by looking at the code alone will be |
|
150 // nearly impossible. |
|
151 // |
|
152 calcNullable(fTree); |
|
153 calcFirstPos(fTree); |
|
154 calcLastPos(fTree); |
|
155 calcFollowPos(fTree); |
|
156 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { |
|
157 RBBIDebugPuts("\n"); |
|
158 printPosSets(fTree); |
|
159 } |
|
160 |
|
161 // |
|
162 // For "chained" rules, modify the followPos sets |
|
163 // |
|
164 if (fRB->fChainRules) { |
|
165 calcChainedFollowPos(fTree); |
|
166 } |
|
167 |
|
168 // |
|
169 // BOF (start of input) test fixup. |
|
170 // |
|
171 if (fRB->fSetBuilder->sawBOF()) { |
|
172 bofFixup(); |
|
173 } |
|
174 |
|
175 // |
|
176 // Build the DFA state transition tables. |
|
177 // |
|
178 buildStateTable(); |
|
179 flagAcceptingStates(); |
|
180 flagLookAheadStates(); |
|
181 flagTaggedStates(); |
|
182 |
|
183 // |
|
184 // Update the global table of rule status {tag} values |
|
185 // The rule builder has a global vector of status values that are common |
|
186 // for all tables. Merge the ones from this table into the global set. |
|
187 // |
|
188 mergeRuleStatusVals(); |
|
189 |
|
190 if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();}; |
|
191 } |
|
192 |
|
193 |
|
194 |
|
195 //----------------------------------------------------------------------------- |
|
196 // |
|
197 // calcNullable. Impossible to explain succinctly. See Aho, section 3.9 |
|
198 // |
|
199 //----------------------------------------------------------------------------- |
|
200 void RBBITableBuilder::calcNullable(RBBINode *n) { |
|
201 if (n == NULL) { |
|
202 return; |
|
203 } |
|
204 if (n->fType == RBBINode::setRef || |
|
205 n->fType == RBBINode::endMark ) { |
|
206 // These are non-empty leaf node types. |
|
207 n->fNullable = FALSE; |
|
208 return; |
|
209 } |
|
210 |
|
211 if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) { |
|
212 // Lookahead marker node. It's a leaf, so no recursion on children. |
|
213 // It's nullable because it does not match any literal text from the input stream. |
|
214 n->fNullable = TRUE; |
|
215 return; |
|
216 } |
|
217 |
|
218 |
|
219 // The node is not a leaf. |
|
220 // Calculate nullable on its children. |
|
221 calcNullable(n->fLeftChild); |
|
222 calcNullable(n->fRightChild); |
|
223 |
|
224 // Apply functions from table 3.40 in Aho |
|
225 if (n->fType == RBBINode::opOr) { |
|
226 n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable; |
|
227 } |
|
228 else if (n->fType == RBBINode::opCat) { |
|
229 n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable; |
|
230 } |
|
231 else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) { |
|
232 n->fNullable = TRUE; |
|
233 } |
|
234 else { |
|
235 n->fNullable = FALSE; |
|
236 } |
|
237 } |
|
238 |
|
239 |
|
240 |
|
241 |
|
242 //----------------------------------------------------------------------------- |
|
243 // |
|
244 // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 |
|
245 // |
|
246 //----------------------------------------------------------------------------- |
|
247 void RBBITableBuilder::calcFirstPos(RBBINode *n) { |
|
248 if (n == NULL) { |
|
249 return; |
|
250 } |
|
251 if (n->fType == RBBINode::leafChar || |
|
252 n->fType == RBBINode::endMark || |
|
253 n->fType == RBBINode::lookAhead || |
|
254 n->fType == RBBINode::tag) { |
|
255 // These are non-empty leaf node types. |
|
256 // Note: In order to maintain the sort invariant on the set, |
|
257 // this function should only be called on a node whose set is |
|
258 // empty to start with. |
|
259 n->fFirstPosSet->addElement(n, *fStatus); |
|
260 return; |
|
261 } |
|
262 |
|
263 // The node is not a leaf. |
|
264 // Calculate firstPos on its children. |
|
265 calcFirstPos(n->fLeftChild); |
|
266 calcFirstPos(n->fRightChild); |
|
267 |
|
268 // Apply functions from table 3.40 in Aho |
|
269 if (n->fType == RBBINode::opOr) { |
|
270 setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); |
|
271 setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); |
|
272 } |
|
273 else if (n->fType == RBBINode::opCat) { |
|
274 setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); |
|
275 if (n->fLeftChild->fNullable) { |
|
276 setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); |
|
277 } |
|
278 } |
|
279 else if (n->fType == RBBINode::opStar || |
|
280 n->fType == RBBINode::opQuestion || |
|
281 n->fType == RBBINode::opPlus) { |
|
282 setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); |
|
283 } |
|
284 } |
|
285 |
|
286 |
|
287 |
|
288 //----------------------------------------------------------------------------- |
|
289 // |
|
290 // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 |
|
291 // |
|
292 //----------------------------------------------------------------------------- |
|
293 void RBBITableBuilder::calcLastPos(RBBINode *n) { |
|
294 if (n == NULL) { |
|
295 return; |
|
296 } |
|
297 if (n->fType == RBBINode::leafChar || |
|
298 n->fType == RBBINode::endMark || |
|
299 n->fType == RBBINode::lookAhead || |
|
300 n->fType == RBBINode::tag) { |
|
301 // These are non-empty leaf node types. |
|
302 // Note: In order to maintain the sort invariant on the set, |
|
303 // this function should only be called on a node whose set is |
|
304 // empty to start with. |
|
305 n->fLastPosSet->addElement(n, *fStatus); |
|
306 return; |
|
307 } |
|
308 |
|
309 // The node is not a leaf. |
|
310 // Calculate lastPos on its children. |
|
311 calcLastPos(n->fLeftChild); |
|
312 calcLastPos(n->fRightChild); |
|
313 |
|
314 // Apply functions from table 3.40 in Aho |
|
315 if (n->fType == RBBINode::opOr) { |
|
316 setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); |
|
317 setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); |
|
318 } |
|
319 else if (n->fType == RBBINode::opCat) { |
|
320 setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); |
|
321 if (n->fRightChild->fNullable) { |
|
322 setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); |
|
323 } |
|
324 } |
|
325 else if (n->fType == RBBINode::opStar || |
|
326 n->fType == RBBINode::opQuestion || |
|
327 n->fType == RBBINode::opPlus) { |
|
328 setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); |
|
329 } |
|
330 } |
|
331 |
|
332 |
|
333 |
|
334 //----------------------------------------------------------------------------- |
|
335 // |
|
336 // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 |
|
337 // |
|
338 //----------------------------------------------------------------------------- |
|
339 void RBBITableBuilder::calcFollowPos(RBBINode *n) { |
|
340 if (n == NULL || |
|
341 n->fType == RBBINode::leafChar || |
|
342 n->fType == RBBINode::endMark) { |
|
343 return; |
|
344 } |
|
345 |
|
346 calcFollowPos(n->fLeftChild); |
|
347 calcFollowPos(n->fRightChild); |
|
348 |
|
349 // Aho rule #1 |
|
350 if (n->fType == RBBINode::opCat) { |
|
351 RBBINode *i; // is 'i' in Aho's description |
|
352 uint32_t ix; |
|
353 |
|
354 UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet; |
|
355 |
|
356 for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) { |
|
357 i = (RBBINode *)LastPosOfLeftChild->elementAt(ix); |
|
358 setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet); |
|
359 } |
|
360 } |
|
361 |
|
362 // Aho rule #2 |
|
363 if (n->fType == RBBINode::opStar || |
|
364 n->fType == RBBINode::opPlus) { |
|
365 RBBINode *i; // again, n and i are the names from Aho's description. |
|
366 uint32_t ix; |
|
367 |
|
368 for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) { |
|
369 i = (RBBINode *)n->fLastPosSet->elementAt(ix); |
|
370 setAdd(i->fFollowPos, n->fFirstPosSet); |
|
371 } |
|
372 } |
|
373 |
|
374 |
|
375 |
|
376 } |
|
377 |
|
378 |
|
379 //----------------------------------------------------------------------------- |
|
380 // |
|
381 // calcChainedFollowPos. Modify the previously calculated followPos sets |
|
382 // to implement rule chaining. NOT described by Aho |
|
383 // |
|
384 //----------------------------------------------------------------------------- |
|
385 void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) { |
|
386 |
|
387 UVector endMarkerNodes(*fStatus); |
|
388 UVector leafNodes(*fStatus); |
|
389 int32_t i; |
|
390 |
|
391 if (U_FAILURE(*fStatus)) { |
|
392 return; |
|
393 } |
|
394 |
|
395 // get a list of all endmarker nodes. |
|
396 tree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); |
|
397 |
|
398 // get a list all leaf nodes |
|
399 tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus); |
|
400 if (U_FAILURE(*fStatus)) { |
|
401 return; |
|
402 } |
|
403 |
|
404 // Get all nodes that can be the start a match, which is FirstPosition() |
|
405 // of the portion of the tree corresponding to user-written rules. |
|
406 // See the tree description in bofFixup(). |
|
407 RBBINode *userRuleRoot = tree; |
|
408 if (fRB->fSetBuilder->sawBOF()) { |
|
409 userRuleRoot = tree->fLeftChild->fRightChild; |
|
410 } |
|
411 U_ASSERT(userRuleRoot != NULL); |
|
412 UVector *matchStartNodes = userRuleRoot->fFirstPosSet; |
|
413 |
|
414 |
|
415 // Iteratate over all leaf nodes, |
|
416 // |
|
417 int32_t endNodeIx; |
|
418 int32_t startNodeIx; |
|
419 |
|
420 for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) { |
|
421 RBBINode *tNode = (RBBINode *)leafNodes.elementAt(endNodeIx); |
|
422 RBBINode *endNode = NULL; |
|
423 |
|
424 // Identify leaf nodes that correspond to overall rule match positions. |
|
425 // These include an endMarkerNode in their followPos sets. |
|
426 for (i=0; i<endMarkerNodes.size(); i++) { |
|
427 if (tNode->fFollowPos->contains(endMarkerNodes.elementAt(i))) { |
|
428 endNode = tNode; |
|
429 break; |
|
430 } |
|
431 } |
|
432 if (endNode == NULL) { |
|
433 // node wasn't an end node. Try again with the next. |
|
434 continue; |
|
435 } |
|
436 |
|
437 // We've got a node that can end a match. |
|
438 |
|
439 // Line Break Specific hack: If this node's val correspond to the $CM char class, |
|
440 // don't chain from it. |
|
441 // TODO: Add rule syntax for this behavior, get specifics out of here and |
|
442 // into the rule file. |
|
443 if (fRB->fLBCMNoChain) { |
|
444 UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); |
|
445 if (c != -1) { |
|
446 // c == -1 occurs with sets containing only the {eof} marker string. |
|
447 ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK); |
|
448 if (cLBProp == U_LB_COMBINING_MARK) { |
|
449 continue; |
|
450 } |
|
451 } |
|
452 } |
|
453 |
|
454 |
|
455 // Now iterate over the nodes that can start a match, looking for ones |
|
456 // with the same char class as our ending node. |
|
457 RBBINode *startNode; |
|
458 for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { |
|
459 startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); |
|
460 if (startNode->fType != RBBINode::leafChar) { |
|
461 continue; |
|
462 } |
|
463 |
|
464 if (endNode->fVal == startNode->fVal) { |
|
465 // The end val (character class) of one possible match is the |
|
466 // same as the start of another. |
|
467 |
|
468 // Add all nodes from the followPos of the start node to the |
|
469 // followPos set of the end node, which will have the effect of |
|
470 // letting matches transition from a match state at endNode |
|
471 // to the second char of a match starting with startNode. |
|
472 setAdd(endNode->fFollowPos, startNode->fFollowPos); |
|
473 } |
|
474 } |
|
475 } |
|
476 } |
|
477 |
|
478 |
|
479 //----------------------------------------------------------------------------- |
|
480 // |
|
481 // bofFixup. Fixup for state tables that include {bof} beginning of input testing. |
|
482 // Do an swizzle similar to chaining, modifying the followPos set of |
|
483 // the bofNode to include the followPos nodes from other {bot} nodes |
|
484 // scattered through the tree. |
|
485 // |
|
486 // This function has much in common with calcChainedFollowPos(). |
|
487 // |
|
488 //----------------------------------------------------------------------------- |
|
489 void RBBITableBuilder::bofFixup() { |
|
490 |
|
491 if (U_FAILURE(*fStatus)) { |
|
492 return; |
|
493 } |
|
494 |
|
495 // The parse tree looks like this ... |
|
496 // fTree root ---> <cat> |
|
497 // / \ . |
|
498 // <cat> <#end node> |
|
499 // / \ . |
|
500 // <bofNode> rest |
|
501 // of tree |
|
502 // |
|
503 // We will be adding things to the followPos set of the <bofNode> |
|
504 // |
|
505 RBBINode *bofNode = fTree->fLeftChild->fLeftChild; |
|
506 U_ASSERT(bofNode->fType == RBBINode::leafChar); |
|
507 U_ASSERT(bofNode->fVal == 2); |
|
508 |
|
509 // Get all nodes that can be the start a match of the user-written rules |
|
510 // (excluding the fake bofNode) |
|
511 // We want the nodes that can start a match in the |
|
512 // part labeled "rest of tree" |
|
513 // |
|
514 UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet; |
|
515 |
|
516 RBBINode *startNode; |
|
517 int startNodeIx; |
|
518 for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { |
|
519 startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); |
|
520 if (startNode->fType != RBBINode::leafChar) { |
|
521 continue; |
|
522 } |
|
523 |
|
524 if (startNode->fVal == bofNode->fVal) { |
|
525 // We found a leaf node corresponding to a {bof} that was |
|
526 // explicitly written into a rule. |
|
527 // Add everything from the followPos set of this node to the |
|
528 // followPos set of the fake bofNode at the start of the tree. |
|
529 // |
|
530 setAdd(bofNode->fFollowPos, startNode->fFollowPos); |
|
531 } |
|
532 } |
|
533 } |
|
534 |
|
535 //----------------------------------------------------------------------------- |
|
536 // |
|
537 // buildStateTable() Determine the set of runtime DFA states and the |
|
538 // transition tables for these states, by the algorithm |
|
539 // of fig. 3.44 in Aho. |
|
540 // |
|
541 // Most of the comments are quotes of Aho's psuedo-code. |
|
542 // |
|
543 //----------------------------------------------------------------------------- |
|
544 void RBBITableBuilder::buildStateTable() { |
|
545 if (U_FAILURE(*fStatus)) { |
|
546 return; |
|
547 } |
|
548 RBBIStateDescriptor *failState; |
|
549 // Set it to NULL to avoid uninitialized warning |
|
550 RBBIStateDescriptor *initialState = NULL; |
|
551 // |
|
552 // Add a dummy state 0 - the stop state. Not from Aho. |
|
553 int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1; |
|
554 failState = new RBBIStateDescriptor(lastInputSymbol, fStatus); |
|
555 if (failState == NULL) { |
|
556 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
557 goto ExitBuildSTdeleteall; |
|
558 } |
|
559 failState->fPositions = new UVector(*fStatus); |
|
560 if (failState->fPositions == NULL) { |
|
561 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
562 } |
|
563 if (failState->fPositions == NULL || U_FAILURE(*fStatus)) { |
|
564 goto ExitBuildSTdeleteall; |
|
565 } |
|
566 fDStates->addElement(failState, *fStatus); |
|
567 if (U_FAILURE(*fStatus)) { |
|
568 goto ExitBuildSTdeleteall; |
|
569 } |
|
570 |
|
571 // initially, the only unmarked state in Dstates is firstpos(root), |
|
572 // where toot is the root of the syntax tree for (r)#; |
|
573 initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus); |
|
574 if (initialState == NULL) { |
|
575 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
576 } |
|
577 if (U_FAILURE(*fStatus)) { |
|
578 goto ExitBuildSTdeleteall; |
|
579 } |
|
580 initialState->fPositions = new UVector(*fStatus); |
|
581 if (initialState->fPositions == NULL) { |
|
582 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
583 } |
|
584 if (U_FAILURE(*fStatus)) { |
|
585 goto ExitBuildSTdeleteall; |
|
586 } |
|
587 setAdd(initialState->fPositions, fTree->fFirstPosSet); |
|
588 fDStates->addElement(initialState, *fStatus); |
|
589 if (U_FAILURE(*fStatus)) { |
|
590 goto ExitBuildSTdeleteall; |
|
591 } |
|
592 |
|
593 // while there is an unmarked state T in Dstates do begin |
|
594 for (;;) { |
|
595 RBBIStateDescriptor *T = NULL; |
|
596 int32_t tx; |
|
597 for (tx=1; tx<fDStates->size(); tx++) { |
|
598 RBBIStateDescriptor *temp; |
|
599 temp = (RBBIStateDescriptor *)fDStates->elementAt(tx); |
|
600 if (temp->fMarked == FALSE) { |
|
601 T = temp; |
|
602 break; |
|
603 } |
|
604 } |
|
605 if (T == NULL) { |
|
606 break; |
|
607 } |
|
608 |
|
609 // mark T; |
|
610 T->fMarked = TRUE; |
|
611 |
|
612 // for each input symbol a do begin |
|
613 int32_t a; |
|
614 for (a = 1; a<=lastInputSymbol; a++) { |
|
615 // let U be the set of positions that are in followpos(p) |
|
616 // for some position p in T |
|
617 // such that the symbol at position p is a; |
|
618 UVector *U = NULL; |
|
619 RBBINode *p; |
|
620 int32_t px; |
|
621 for (px=0; px<T->fPositions->size(); px++) { |
|
622 p = (RBBINode *)T->fPositions->elementAt(px); |
|
623 if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) { |
|
624 if (U == NULL) { |
|
625 U = new UVector(*fStatus); |
|
626 if (U == NULL) { |
|
627 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
628 goto ExitBuildSTdeleteall; |
|
629 } |
|
630 } |
|
631 setAdd(U, p->fFollowPos); |
|
632 } |
|
633 } |
|
634 |
|
635 // if U is not empty and not in DStates then |
|
636 int32_t ux = 0; |
|
637 UBool UinDstates = FALSE; |
|
638 if (U != NULL) { |
|
639 U_ASSERT(U->size() > 0); |
|
640 int ix; |
|
641 for (ix=0; ix<fDStates->size(); ix++) { |
|
642 RBBIStateDescriptor *temp2; |
|
643 temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix); |
|
644 if (setEquals(U, temp2->fPositions)) { |
|
645 delete U; |
|
646 U = temp2->fPositions; |
|
647 ux = ix; |
|
648 UinDstates = TRUE; |
|
649 break; |
|
650 } |
|
651 } |
|
652 |
|
653 // Add U as an unmarked state to Dstates |
|
654 if (!UinDstates) |
|
655 { |
|
656 RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus); |
|
657 if (newState == NULL) { |
|
658 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
659 } |
|
660 if (U_FAILURE(*fStatus)) { |
|
661 goto ExitBuildSTdeleteall; |
|
662 } |
|
663 newState->fPositions = U; |
|
664 fDStates->addElement(newState, *fStatus); |
|
665 if (U_FAILURE(*fStatus)) { |
|
666 return; |
|
667 } |
|
668 ux = fDStates->size()-1; |
|
669 } |
|
670 |
|
671 // Dtran[T, a] := U; |
|
672 T->fDtran->setElementAt(ux, a); |
|
673 } |
|
674 } |
|
675 } |
|
676 return; |
|
677 // delete local pointers only if error occured. |
|
678 ExitBuildSTdeleteall: |
|
679 delete initialState; |
|
680 delete failState; |
|
681 } |
|
682 |
|
683 |
|
684 |
|
685 //----------------------------------------------------------------------------- |
|
686 // |
|
687 // flagAcceptingStates Identify accepting states. |
|
688 // First get a list of all of the end marker nodes. |
|
689 // Then, for each state s, |
|
690 // if s contains one of the end marker nodes in its list of tree positions then |
|
691 // s is an accepting state. |
|
692 // |
|
693 //----------------------------------------------------------------------------- |
|
694 void RBBITableBuilder::flagAcceptingStates() { |
|
695 if (U_FAILURE(*fStatus)) { |
|
696 return; |
|
697 } |
|
698 UVector endMarkerNodes(*fStatus); |
|
699 RBBINode *endMarker; |
|
700 int32_t i; |
|
701 int32_t n; |
|
702 |
|
703 if (U_FAILURE(*fStatus)) { |
|
704 return; |
|
705 } |
|
706 |
|
707 fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); |
|
708 if (U_FAILURE(*fStatus)) { |
|
709 return; |
|
710 } |
|
711 |
|
712 for (i=0; i<endMarkerNodes.size(); i++) { |
|
713 endMarker = (RBBINode *)endMarkerNodes.elementAt(i); |
|
714 for (n=0; n<fDStates->size(); n++) { |
|
715 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); |
|
716 if (sd->fPositions->indexOf(endMarker) >= 0) { |
|
717 // Any non-zero value for fAccepting means this is an accepting node. |
|
718 // The value is what will be returned to the user as the break status. |
|
719 // If no other value was specified, force it to -1. |
|
720 |
|
721 if (sd->fAccepting==0) { |
|
722 // State hasn't been marked as accepting yet. Do it now. |
|
723 sd->fAccepting = endMarker->fVal; |
|
724 if (sd->fAccepting == 0) { |
|
725 sd->fAccepting = -1; |
|
726 } |
|
727 } |
|
728 if (sd->fAccepting==-1 && endMarker->fVal != 0) { |
|
729 // Both lookahead and non-lookahead accepting for this state. |
|
730 // Favor the look-ahead. Expedient for line break. |
|
731 // TODO: need a more elegant resolution for conflicting rules. |
|
732 sd->fAccepting = endMarker->fVal; |
|
733 } |
|
734 // implicit else: |
|
735 // if sd->fAccepting already had a value other than 0 or -1, leave it be. |
|
736 |
|
737 // If the end marker node is from a look-ahead rule, set |
|
738 // the fLookAhead field or this state also. |
|
739 if (endMarker->fLookAheadEnd) { |
|
740 // TODO: don't change value if already set? |
|
741 // TODO: allow for more than one active look-ahead rule in engine. |
|
742 // Make value here an index to a side array in engine? |
|
743 sd->fLookAhead = sd->fAccepting; |
|
744 } |
|
745 } |
|
746 } |
|
747 } |
|
748 } |
|
749 |
|
750 |
|
751 //----------------------------------------------------------------------------- |
|
752 // |
|
753 // flagLookAheadStates Very similar to flagAcceptingStates, above. |
|
754 // |
|
755 //----------------------------------------------------------------------------- |
|
756 void RBBITableBuilder::flagLookAheadStates() { |
|
757 if (U_FAILURE(*fStatus)) { |
|
758 return; |
|
759 } |
|
760 UVector lookAheadNodes(*fStatus); |
|
761 RBBINode *lookAheadNode; |
|
762 int32_t i; |
|
763 int32_t n; |
|
764 |
|
765 fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus); |
|
766 if (U_FAILURE(*fStatus)) { |
|
767 return; |
|
768 } |
|
769 for (i=0; i<lookAheadNodes.size(); i++) { |
|
770 lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i); |
|
771 |
|
772 for (n=0; n<fDStates->size(); n++) { |
|
773 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); |
|
774 if (sd->fPositions->indexOf(lookAheadNode) >= 0) { |
|
775 sd->fLookAhead = lookAheadNode->fVal; |
|
776 } |
|
777 } |
|
778 } |
|
779 } |
|
780 |
|
781 |
|
782 |
|
783 |
|
784 //----------------------------------------------------------------------------- |
|
785 // |
|
786 // flagTaggedStates |
|
787 // |
|
788 //----------------------------------------------------------------------------- |
|
789 void RBBITableBuilder::flagTaggedStates() { |
|
790 if (U_FAILURE(*fStatus)) { |
|
791 return; |
|
792 } |
|
793 UVector tagNodes(*fStatus); |
|
794 RBBINode *tagNode; |
|
795 int32_t i; |
|
796 int32_t n; |
|
797 |
|
798 if (U_FAILURE(*fStatus)) { |
|
799 return; |
|
800 } |
|
801 fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus); |
|
802 if (U_FAILURE(*fStatus)) { |
|
803 return; |
|
804 } |
|
805 for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em) |
|
806 tagNode = (RBBINode *)tagNodes.elementAt(i); |
|
807 |
|
808 for (n=0; n<fDStates->size(); n++) { // For each state s (row in the state table) |
|
809 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); |
|
810 if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include the tag node t |
|
811 sortedAdd(&sd->fTagVals, tagNode->fVal); |
|
812 } |
|
813 } |
|
814 } |
|
815 } |
|
816 |
|
817 |
|
818 |
|
819 |
|
820 //----------------------------------------------------------------------------- |
|
821 // |
|
822 // mergeRuleStatusVals |
|
823 // |
|
824 // Update the global table of rule status {tag} values |
|
825 // The rule builder has a global vector of status values that are common |
|
826 // for all tables. Merge the ones from this table into the global set. |
|
827 // |
|
828 //----------------------------------------------------------------------------- |
|
829 void RBBITableBuilder::mergeRuleStatusVals() { |
|
830 // |
|
831 // The basic outline of what happens here is this... |
|
832 // |
|
833 // for each state in this state table |
|
834 // if the status tag list for this state is in the global statuses list |
|
835 // record where and |
|
836 // continue with the next state |
|
837 // else |
|
838 // add the tag list for this state to the global list. |
|
839 // |
|
840 int i; |
|
841 int n; |
|
842 |
|
843 // Pre-set a single tag of {0} into the table. |
|
844 // We will need this as a default, for rule sets with no explicit tagging. |
|
845 if (fRB->fRuleStatusVals->size() == 0) { |
|
846 fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group |
|
847 fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our single status of zero |
|
848 } |
|
849 |
|
850 // For each state |
|
851 for (n=0; n<fDStates->size(); n++) { |
|
852 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); |
|
853 UVector *thisStatesTagValues = sd->fTagVals; |
|
854 if (thisStatesTagValues == NULL) { |
|
855 // No tag values are explicitly associated with this state. |
|
856 // Set the default tag value. |
|
857 sd->fTagsIdx = 0; |
|
858 continue; |
|
859 } |
|
860 |
|
861 // There are tag(s) associated with this state. |
|
862 // fTagsIdx will be the index into the global tag list for this state's tag values. |
|
863 // Initial value of -1 flags that we haven't got it set yet. |
|
864 sd->fTagsIdx = -1; |
|
865 int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list |
|
866 int32_t nextTagGroupStart = 0; |
|
867 |
|
868 // Loop runs once per group of tags in the global list |
|
869 while (nextTagGroupStart < fRB->fRuleStatusVals->size()) { |
|
870 thisTagGroupStart = nextTagGroupStart; |
|
871 nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1; |
|
872 if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) { |
|
873 // The number of tags for this state is different from |
|
874 // the number of tags in this group from the global list. |
|
875 // Continue with the next group from the global list. |
|
876 continue; |
|
877 } |
|
878 // The lengths match, go ahead and compare the actual tag values |
|
879 // between this state and the group from the global list. |
|
880 for (i=0; i<thisStatesTagValues->size(); i++) { |
|
881 if (thisStatesTagValues->elementAti(i) != |
|
882 fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) { |
|
883 // Mismatch. |
|
884 break; |
|
885 } |
|
886 } |
|
887 |
|
888 if (i == thisStatesTagValues->size()) { |
|
889 // We found a set of tag values in the global list that match |
|
890 // those for this state. Use them. |
|
891 sd->fTagsIdx = thisTagGroupStart; |
|
892 break; |
|
893 } |
|
894 } |
|
895 |
|
896 if (sd->fTagsIdx == -1) { |
|
897 // No suitable entry in the global tag list already. Add one |
|
898 sd->fTagsIdx = fRB->fRuleStatusVals->size(); |
|
899 fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus); |
|
900 for (i=0; i<thisStatesTagValues->size(); i++) { |
|
901 fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus); |
|
902 } |
|
903 } |
|
904 } |
|
905 } |
|
906 |
|
907 |
|
908 |
|
909 |
|
910 |
|
911 |
|
912 |
|
913 //----------------------------------------------------------------------------- |
|
914 // |
|
915 // sortedAdd Add a value to a vector of sorted values (ints). |
|
916 // Do not replicate entries; if the value is already there, do not |
|
917 // add a second one. |
|
918 // Lazily create the vector if it does not already exist. |
|
919 // |
|
920 //----------------------------------------------------------------------------- |
|
921 void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) { |
|
922 int32_t i; |
|
923 |
|
924 if (*vector == NULL) { |
|
925 *vector = new UVector(*fStatus); |
|
926 } |
|
927 if (*vector == NULL || U_FAILURE(*fStatus)) { |
|
928 return; |
|
929 } |
|
930 UVector *vec = *vector; |
|
931 int32_t vSize = vec->size(); |
|
932 for (i=0; i<vSize; i++) { |
|
933 int32_t valAtI = vec->elementAti(i); |
|
934 if (valAtI == val) { |
|
935 // The value is already in the vector. Don't add it again. |
|
936 return; |
|
937 } |
|
938 if (valAtI > val) { |
|
939 break; |
|
940 } |
|
941 } |
|
942 vec->insertElementAt(val, i, *fStatus); |
|
943 } |
|
944 |
|
945 |
|
946 |
|
947 //----------------------------------------------------------------------------- |
|
948 // |
|
949 // setAdd Set operation on UVector |
|
950 // dest = dest union source |
|
951 // Elements may only appear once and must be sorted. |
|
952 // |
|
953 //----------------------------------------------------------------------------- |
|
954 void RBBITableBuilder::setAdd(UVector *dest, UVector *source) { |
|
955 int32_t destOriginalSize = dest->size(); |
|
956 int32_t sourceSize = source->size(); |
|
957 int32_t di = 0; |
|
958 MaybeStackArray<void *, 16> destArray, sourceArray; // Handle small cases without malloc |
|
959 void **destPtr, **sourcePtr; |
|
960 void **destLim, **sourceLim; |
|
961 |
|
962 if (destOriginalSize > destArray.getCapacity()) { |
|
963 if (destArray.resize(destOriginalSize) == NULL) { |
|
964 return; |
|
965 } |
|
966 } |
|
967 destPtr = destArray.getAlias(); |
|
968 destLim = destPtr + destOriginalSize; // destArray.getArrayLimit()? |
|
969 |
|
970 if (sourceSize > sourceArray.getCapacity()) { |
|
971 if (sourceArray.resize(sourceSize) == NULL) { |
|
972 return; |
|
973 } |
|
974 } |
|
975 sourcePtr = sourceArray.getAlias(); |
|
976 sourceLim = sourcePtr + sourceSize; // sourceArray.getArrayLimit()? |
|
977 |
|
978 // Avoid multiple "get element" calls by getting the contents into arrays |
|
979 (void) dest->toArray(destPtr); |
|
980 (void) source->toArray(sourcePtr); |
|
981 |
|
982 dest->setSize(sourceSize+destOriginalSize, *fStatus); |
|
983 |
|
984 while (sourcePtr < sourceLim && destPtr < destLim) { |
|
985 if (*destPtr == *sourcePtr) { |
|
986 dest->setElementAt(*sourcePtr++, di++); |
|
987 destPtr++; |
|
988 } |
|
989 // This check is required for machines with segmented memory, like i5/OS. |
|
990 // Direct pointer comparison is not recommended. |
|
991 else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) { |
|
992 dest->setElementAt(*destPtr++, di++); |
|
993 } |
|
994 else { /* *sourcePtr < *destPtr */ |
|
995 dest->setElementAt(*sourcePtr++, di++); |
|
996 } |
|
997 } |
|
998 |
|
999 // At most one of these two cleanup loops will execute |
|
1000 while (destPtr < destLim) { |
|
1001 dest->setElementAt(*destPtr++, di++); |
|
1002 } |
|
1003 while (sourcePtr < sourceLim) { |
|
1004 dest->setElementAt(*sourcePtr++, di++); |
|
1005 } |
|
1006 |
|
1007 dest->setSize(di, *fStatus); |
|
1008 } |
|
1009 |
|
1010 |
|
1011 |
|
1012 //----------------------------------------------------------------------------- |
|
1013 // |
|
1014 // setEqual Set operation on UVector. |
|
1015 // Compare for equality. |
|
1016 // Elements must be sorted. |
|
1017 // |
|
1018 //----------------------------------------------------------------------------- |
|
1019 UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) { |
|
1020 return a->equals(*b); |
|
1021 } |
|
1022 |
|
1023 |
|
1024 //----------------------------------------------------------------------------- |
|
1025 // |
|
1026 // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos |
|
1027 // for each node in the tree. |
|
1028 // |
|
1029 //----------------------------------------------------------------------------- |
|
1030 #ifdef RBBI_DEBUG |
|
1031 void RBBITableBuilder::printPosSets(RBBINode *n) { |
|
1032 if (n==NULL) { |
|
1033 return; |
|
1034 } |
|
1035 n->printNode(); |
|
1036 RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"TRUE":"FALSE"); |
|
1037 |
|
1038 RBBIDebugPrintf(" firstpos: "); |
|
1039 printSet(n->fFirstPosSet); |
|
1040 |
|
1041 RBBIDebugPrintf(" lastpos: "); |
|
1042 printSet(n->fLastPosSet); |
|
1043 |
|
1044 RBBIDebugPrintf(" followpos: "); |
|
1045 printSet(n->fFollowPos); |
|
1046 |
|
1047 printPosSets(n->fLeftChild); |
|
1048 printPosSets(n->fRightChild); |
|
1049 } |
|
1050 #endif |
|
1051 |
|
1052 |
|
1053 |
|
1054 //----------------------------------------------------------------------------- |
|
1055 // |
|
1056 // getTableSize() Calculate the size of the runtime form of this |
|
1057 // state transition table. |
|
1058 // |
|
1059 //----------------------------------------------------------------------------- |
|
1060 int32_t RBBITableBuilder::getTableSize() const { |
|
1061 int32_t size = 0; |
|
1062 int32_t numRows; |
|
1063 int32_t numCols; |
|
1064 int32_t rowSize; |
|
1065 |
|
1066 if (fTree == NULL) { |
|
1067 return 0; |
|
1068 } |
|
1069 |
|
1070 size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the table. |
|
1071 |
|
1072 numRows = fDStates->size(); |
|
1073 numCols = fRB->fSetBuilder->getNumCharCategories(); |
|
1074 |
|
1075 // Note The declaration of RBBIStateTableRow is for a table of two columns. |
|
1076 // Therefore we subtract two from numCols when determining |
|
1077 // how much storage to add to a row for the total columns. |
|
1078 rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2); |
|
1079 size += numRows * rowSize; |
|
1080 return size; |
|
1081 } |
|
1082 |
|
1083 |
|
1084 |
|
1085 //----------------------------------------------------------------------------- |
|
1086 // |
|
1087 // exportTable() export the state transition table in the format required |
|
1088 // by the runtime engine. getTableSize() bytes of memory |
|
1089 // must be available at the output address "where". |
|
1090 // |
|
1091 //----------------------------------------------------------------------------- |
|
1092 void RBBITableBuilder::exportTable(void *where) { |
|
1093 RBBIStateTable *table = (RBBIStateTable *)where; |
|
1094 uint32_t state; |
|
1095 int col; |
|
1096 |
|
1097 if (U_FAILURE(*fStatus) || fTree == NULL) { |
|
1098 return; |
|
1099 } |
|
1100 |
|
1101 if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff || |
|
1102 fDStates->size() > 0x7fff) { |
|
1103 *fStatus = U_BRK_INTERNAL_ERROR; |
|
1104 return; |
|
1105 } |
|
1106 |
|
1107 table->fRowLen = sizeof(RBBIStateTableRow) + |
|
1108 sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2); |
|
1109 table->fNumStates = fDStates->size(); |
|
1110 table->fFlags = 0; |
|
1111 if (fRB->fLookAheadHardBreak) { |
|
1112 table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK; |
|
1113 } |
|
1114 if (fRB->fSetBuilder->sawBOF()) { |
|
1115 table->fFlags |= RBBI_BOF_REQUIRED; |
|
1116 } |
|
1117 table->fReserved = 0; |
|
1118 |
|
1119 for (state=0; state<table->fNumStates; state++) { |
|
1120 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); |
|
1121 RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen); |
|
1122 U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767); |
|
1123 U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767); |
|
1124 row->fAccepting = (int16_t)sd->fAccepting; |
|
1125 row->fLookAhead = (int16_t)sd->fLookAhead; |
|
1126 row->fTagIdx = (int16_t)sd->fTagsIdx; |
|
1127 for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) { |
|
1128 row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col); |
|
1129 } |
|
1130 } |
|
1131 } |
|
1132 |
|
1133 |
|
1134 |
|
1135 //----------------------------------------------------------------------------- |
|
1136 // |
|
1137 // printSet Debug function. Print the contents of a UVector |
|
1138 // |
|
1139 //----------------------------------------------------------------------------- |
|
1140 #ifdef RBBI_DEBUG |
|
1141 void RBBITableBuilder::printSet(UVector *s) { |
|
1142 int32_t i; |
|
1143 for (i=0; i<s->size(); i++) { |
|
1144 void *v = s->elementAt(i); |
|
1145 RBBIDebugPrintf("%10p", v); |
|
1146 } |
|
1147 RBBIDebugPrintf("\n"); |
|
1148 } |
|
1149 #endif |
|
1150 |
|
1151 |
|
1152 //----------------------------------------------------------------------------- |
|
1153 // |
|
1154 // printStates Debug Function. Dump the fully constructed state transition table. |
|
1155 // |
|
1156 //----------------------------------------------------------------------------- |
|
1157 #ifdef RBBI_DEBUG |
|
1158 void RBBITableBuilder::printStates() { |
|
1159 int c; // input "character" |
|
1160 int n; // state number |
|
1161 |
|
1162 RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); |
|
1163 RBBIDebugPrintf(" | Acc LA Tag"); |
|
1164 for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { |
|
1165 RBBIDebugPrintf(" %2d", c); |
|
1166 } |
|
1167 RBBIDebugPrintf("\n"); |
|
1168 RBBIDebugPrintf(" |---------------"); |
|
1169 for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { |
|
1170 RBBIDebugPrintf("---"); |
|
1171 } |
|
1172 RBBIDebugPrintf("\n"); |
|
1173 |
|
1174 for (n=0; n<fDStates->size(); n++) { |
|
1175 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); |
|
1176 RBBIDebugPrintf(" %3d | " , n); |
|
1177 RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx); |
|
1178 for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { |
|
1179 RBBIDebugPrintf(" %2d", sd->fDtran->elementAti(c)); |
|
1180 } |
|
1181 RBBIDebugPrintf("\n"); |
|
1182 } |
|
1183 RBBIDebugPrintf("\n\n"); |
|
1184 } |
|
1185 #endif |
|
1186 |
|
1187 |
|
1188 |
|
1189 //----------------------------------------------------------------------------- |
|
1190 // |
|
1191 // printRuleStatusTable Debug Function. Dump the common rule status table |
|
1192 // |
|
1193 //----------------------------------------------------------------------------- |
|
1194 #ifdef RBBI_DEBUG |
|
1195 void RBBITableBuilder::printRuleStatusTable() { |
|
1196 int32_t thisRecord = 0; |
|
1197 int32_t nextRecord = 0; |
|
1198 int i; |
|
1199 UVector *tbl = fRB->fRuleStatusVals; |
|
1200 |
|
1201 RBBIDebugPrintf("index | tags \n"); |
|
1202 RBBIDebugPrintf("-------------------\n"); |
|
1203 |
|
1204 while (nextRecord < tbl->size()) { |
|
1205 thisRecord = nextRecord; |
|
1206 nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1; |
|
1207 RBBIDebugPrintf("%4d ", thisRecord); |
|
1208 for (i=thisRecord+1; i<nextRecord; i++) { |
|
1209 RBBIDebugPrintf(" %5d", tbl->elementAti(i)); |
|
1210 } |
|
1211 RBBIDebugPrintf("\n"); |
|
1212 } |
|
1213 RBBIDebugPrintf("\n\n"); |
|
1214 } |
|
1215 #endif |
|
1216 |
|
1217 |
|
1218 //----------------------------------------------------------------------------- |
|
1219 // |
|
1220 // RBBIStateDescriptor Methods. This is a very struct-like class |
|
1221 // Most access is directly to the fields. |
|
1222 // |
|
1223 //----------------------------------------------------------------------------- |
|
1224 |
|
1225 RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) { |
|
1226 fMarked = FALSE; |
|
1227 fAccepting = 0; |
|
1228 fLookAhead = 0; |
|
1229 fTagsIdx = 0; |
|
1230 fTagVals = NULL; |
|
1231 fPositions = NULL; |
|
1232 fDtran = NULL; |
|
1233 |
|
1234 fDtran = new UVector(lastInputSymbol+1, *fStatus); |
|
1235 if (U_FAILURE(*fStatus)) { |
|
1236 return; |
|
1237 } |
|
1238 if (fDtran == NULL) { |
|
1239 *fStatus = U_MEMORY_ALLOCATION_ERROR; |
|
1240 return; |
|
1241 } |
|
1242 fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized. |
|
1243 // It is indexed by input symbols, and will |
|
1244 // hold the next state number for each |
|
1245 // symbol. |
|
1246 } |
|
1247 |
|
1248 |
|
1249 RBBIStateDescriptor::~RBBIStateDescriptor() { |
|
1250 delete fPositions; |
|
1251 delete fDtran; |
|
1252 delete fTagVals; |
|
1253 fPositions = NULL; |
|
1254 fDtran = NULL; |
|
1255 fTagVals = NULL; |
|
1256 } |
|
1257 |
|
1258 U_NAMESPACE_END |
|
1259 |
|
1260 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |