1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/rbbitblb.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1260 @@ 1.4 +/* 1.5 +********************************************************************** 1.6 +* Copyright (c) 2002-2009, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +********************************************************************** 1.9 +*/ 1.10 +// 1.11 +// rbbitblb.cpp 1.12 +// 1.13 + 1.14 + 1.15 +#include "unicode/utypes.h" 1.16 + 1.17 +#if !UCONFIG_NO_BREAK_ITERATION 1.18 + 1.19 +#include "unicode/unistr.h" 1.20 +#include "rbbitblb.h" 1.21 +#include "rbbirb.h" 1.22 +#include "rbbisetb.h" 1.23 +#include "rbbidata.h" 1.24 +#include "cstring.h" 1.25 +#include "uassert.h" 1.26 +#include "cmemory.h" 1.27 + 1.28 +U_NAMESPACE_BEGIN 1.29 + 1.30 +RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode) : 1.31 + fTree(*rootNode) { 1.32 + fRB = rb; 1.33 + fStatus = fRB->fStatus; 1.34 + UErrorCode status = U_ZERO_ERROR; 1.35 + fDStates = new UVector(status); 1.36 + if (U_FAILURE(*fStatus)) { 1.37 + return; 1.38 + } 1.39 + if (U_FAILURE(status)) { 1.40 + *fStatus = status; 1.41 + return; 1.42 + } 1.43 + if (fDStates == NULL) { 1.44 + *fStatus = U_MEMORY_ALLOCATION_ERROR;; 1.45 + } 1.46 +} 1.47 + 1.48 + 1.49 + 1.50 +RBBITableBuilder::~RBBITableBuilder() { 1.51 + int i; 1.52 + for (i=0; i<fDStates->size(); i++) { 1.53 + delete (RBBIStateDescriptor *)fDStates->elementAt(i); 1.54 + } 1.55 + delete fDStates; 1.56 +} 1.57 + 1.58 + 1.59 +//----------------------------------------------------------------------------- 1.60 +// 1.61 +// RBBITableBuilder::build - This is the main function for building the DFA state transtion 1.62 +// table from the RBBI rules parse tree. 1.63 +// 1.64 +//----------------------------------------------------------------------------- 1.65 +void RBBITableBuilder::build() { 1.66 + 1.67 + if (U_FAILURE(*fStatus)) { 1.68 + return; 1.69 + } 1.70 + 1.71 + // If there were no rules, just return. This situation can easily arise 1.72 + // for the reverse rules. 1.73 + if (fTree==NULL) { 1.74 + return; 1.75 + } 1.76 + 1.77 + // 1.78 + // Walk through the tree, replacing any references to $variables with a copy of the 1.79 + // parse tree for the substition expression. 1.80 + // 1.81 + fTree = fTree->flattenVariables(); 1.82 +#ifdef RBBI_DEBUG 1.83 + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { 1.84 + RBBIDebugPuts("Parse tree after flattening variable references."); 1.85 + fTree->printTree(TRUE); 1.86 + } 1.87 +#endif 1.88 + 1.89 + // 1.90 + // If the rules contained any references to {bof} 1.91 + // add a {bof} <cat> <former root of tree> to the 1.92 + // tree. Means that all matches must start out with the 1.93 + // {bof} fake character. 1.94 + // 1.95 + if (fRB->fSetBuilder->sawBOF()) { 1.96 + RBBINode *bofTop = new RBBINode(RBBINode::opCat); 1.97 + RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); 1.98 + // Delete and exit if memory allocation failed. 1.99 + if (bofTop == NULL || bofLeaf == NULL) { 1.100 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.101 + delete bofTop; 1.102 + delete bofLeaf; 1.103 + return; 1.104 + } 1.105 + bofTop->fLeftChild = bofLeaf; 1.106 + bofTop->fRightChild = fTree; 1.107 + bofLeaf->fParent = bofTop; 1.108 + bofLeaf->fVal = 2; // Reserved value for {bof}. 1.109 + fTree = bofTop; 1.110 + } 1.111 + 1.112 + // 1.113 + // Add a unique right-end marker to the expression. 1.114 + // Appears as a cat-node, left child being the original tree, 1.115 + // right child being the end marker. 1.116 + // 1.117 + RBBINode *cn = new RBBINode(RBBINode::opCat); 1.118 + // Exit if memory allocation failed. 1.119 + if (cn == NULL) { 1.120 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.121 + return; 1.122 + } 1.123 + cn->fLeftChild = fTree; 1.124 + fTree->fParent = cn; 1.125 + cn->fRightChild = new RBBINode(RBBINode::endMark); 1.126 + // Delete and exit if memory allocation failed. 1.127 + if (cn->fRightChild == NULL) { 1.128 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.129 + delete cn; 1.130 + return; 1.131 + } 1.132 + cn->fRightChild->fParent = cn; 1.133 + fTree = cn; 1.134 + 1.135 + // 1.136 + // Replace all references to UnicodeSets with the tree for the equivalent 1.137 + // expression. 1.138 + // 1.139 + fTree->flattenSets(); 1.140 +#ifdef RBBI_DEBUG 1.141 + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { 1.142 + RBBIDebugPuts("Parse tree after flattening Unicode Set references."); 1.143 + fTree->printTree(TRUE); 1.144 + } 1.145 +#endif 1.146 + 1.147 + 1.148 + // 1.149 + // calculate the functions nullable, firstpos, lastpos and followpos on 1.150 + // nodes in the parse tree. 1.151 + // See the alogrithm description in Aho. 1.152 + // Understanding how this works by looking at the code alone will be 1.153 + // nearly impossible. 1.154 + // 1.155 + calcNullable(fTree); 1.156 + calcFirstPos(fTree); 1.157 + calcLastPos(fTree); 1.158 + calcFollowPos(fTree); 1.159 + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { 1.160 + RBBIDebugPuts("\n"); 1.161 + printPosSets(fTree); 1.162 + } 1.163 + 1.164 + // 1.165 + // For "chained" rules, modify the followPos sets 1.166 + // 1.167 + if (fRB->fChainRules) { 1.168 + calcChainedFollowPos(fTree); 1.169 + } 1.170 + 1.171 + // 1.172 + // BOF (start of input) test fixup. 1.173 + // 1.174 + if (fRB->fSetBuilder->sawBOF()) { 1.175 + bofFixup(); 1.176 + } 1.177 + 1.178 + // 1.179 + // Build the DFA state transition tables. 1.180 + // 1.181 + buildStateTable(); 1.182 + flagAcceptingStates(); 1.183 + flagLookAheadStates(); 1.184 + flagTaggedStates(); 1.185 + 1.186 + // 1.187 + // Update the global table of rule status {tag} values 1.188 + // The rule builder has a global vector of status values that are common 1.189 + // for all tables. Merge the ones from this table into the global set. 1.190 + // 1.191 + mergeRuleStatusVals(); 1.192 + 1.193 + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "states")) {printStates();}; 1.194 +} 1.195 + 1.196 + 1.197 + 1.198 +//----------------------------------------------------------------------------- 1.199 +// 1.200 +// calcNullable. Impossible to explain succinctly. See Aho, section 3.9 1.201 +// 1.202 +//----------------------------------------------------------------------------- 1.203 +void RBBITableBuilder::calcNullable(RBBINode *n) { 1.204 + if (n == NULL) { 1.205 + return; 1.206 + } 1.207 + if (n->fType == RBBINode::setRef || 1.208 + n->fType == RBBINode::endMark ) { 1.209 + // These are non-empty leaf node types. 1.210 + n->fNullable = FALSE; 1.211 + return; 1.212 + } 1.213 + 1.214 + if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) { 1.215 + // Lookahead marker node. It's a leaf, so no recursion on children. 1.216 + // It's nullable because it does not match any literal text from the input stream. 1.217 + n->fNullable = TRUE; 1.218 + return; 1.219 + } 1.220 + 1.221 + 1.222 + // The node is not a leaf. 1.223 + // Calculate nullable on its children. 1.224 + calcNullable(n->fLeftChild); 1.225 + calcNullable(n->fRightChild); 1.226 + 1.227 + // Apply functions from table 3.40 in Aho 1.228 + if (n->fType == RBBINode::opOr) { 1.229 + n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable; 1.230 + } 1.231 + else if (n->fType == RBBINode::opCat) { 1.232 + n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable; 1.233 + } 1.234 + else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) { 1.235 + n->fNullable = TRUE; 1.236 + } 1.237 + else { 1.238 + n->fNullable = FALSE; 1.239 + } 1.240 +} 1.241 + 1.242 + 1.243 + 1.244 + 1.245 +//----------------------------------------------------------------------------- 1.246 +// 1.247 +// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 1.248 +// 1.249 +//----------------------------------------------------------------------------- 1.250 +void RBBITableBuilder::calcFirstPos(RBBINode *n) { 1.251 + if (n == NULL) { 1.252 + return; 1.253 + } 1.254 + if (n->fType == RBBINode::leafChar || 1.255 + n->fType == RBBINode::endMark || 1.256 + n->fType == RBBINode::lookAhead || 1.257 + n->fType == RBBINode::tag) { 1.258 + // These are non-empty leaf node types. 1.259 + // Note: In order to maintain the sort invariant on the set, 1.260 + // this function should only be called on a node whose set is 1.261 + // empty to start with. 1.262 + n->fFirstPosSet->addElement(n, *fStatus); 1.263 + return; 1.264 + } 1.265 + 1.266 + // The node is not a leaf. 1.267 + // Calculate firstPos on its children. 1.268 + calcFirstPos(n->fLeftChild); 1.269 + calcFirstPos(n->fRightChild); 1.270 + 1.271 + // Apply functions from table 3.40 in Aho 1.272 + if (n->fType == RBBINode::opOr) { 1.273 + setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); 1.274 + setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); 1.275 + } 1.276 + else if (n->fType == RBBINode::opCat) { 1.277 + setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); 1.278 + if (n->fLeftChild->fNullable) { 1.279 + setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); 1.280 + } 1.281 + } 1.282 + else if (n->fType == RBBINode::opStar || 1.283 + n->fType == RBBINode::opQuestion || 1.284 + n->fType == RBBINode::opPlus) { 1.285 + setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); 1.286 + } 1.287 +} 1.288 + 1.289 + 1.290 + 1.291 +//----------------------------------------------------------------------------- 1.292 +// 1.293 +// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 1.294 +// 1.295 +//----------------------------------------------------------------------------- 1.296 +void RBBITableBuilder::calcLastPos(RBBINode *n) { 1.297 + if (n == NULL) { 1.298 + return; 1.299 + } 1.300 + if (n->fType == RBBINode::leafChar || 1.301 + n->fType == RBBINode::endMark || 1.302 + n->fType == RBBINode::lookAhead || 1.303 + n->fType == RBBINode::tag) { 1.304 + // These are non-empty leaf node types. 1.305 + // Note: In order to maintain the sort invariant on the set, 1.306 + // this function should only be called on a node whose set is 1.307 + // empty to start with. 1.308 + n->fLastPosSet->addElement(n, *fStatus); 1.309 + return; 1.310 + } 1.311 + 1.312 + // The node is not a leaf. 1.313 + // Calculate lastPos on its children. 1.314 + calcLastPos(n->fLeftChild); 1.315 + calcLastPos(n->fRightChild); 1.316 + 1.317 + // Apply functions from table 3.40 in Aho 1.318 + if (n->fType == RBBINode::opOr) { 1.319 + setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); 1.320 + setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); 1.321 + } 1.322 + else if (n->fType == RBBINode::opCat) { 1.323 + setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); 1.324 + if (n->fRightChild->fNullable) { 1.325 + setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); 1.326 + } 1.327 + } 1.328 + else if (n->fType == RBBINode::opStar || 1.329 + n->fType == RBBINode::opQuestion || 1.330 + n->fType == RBBINode::opPlus) { 1.331 + setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); 1.332 + } 1.333 +} 1.334 + 1.335 + 1.336 + 1.337 +//----------------------------------------------------------------------------- 1.338 +// 1.339 +// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 1.340 +// 1.341 +//----------------------------------------------------------------------------- 1.342 +void RBBITableBuilder::calcFollowPos(RBBINode *n) { 1.343 + if (n == NULL || 1.344 + n->fType == RBBINode::leafChar || 1.345 + n->fType == RBBINode::endMark) { 1.346 + return; 1.347 + } 1.348 + 1.349 + calcFollowPos(n->fLeftChild); 1.350 + calcFollowPos(n->fRightChild); 1.351 + 1.352 + // Aho rule #1 1.353 + if (n->fType == RBBINode::opCat) { 1.354 + RBBINode *i; // is 'i' in Aho's description 1.355 + uint32_t ix; 1.356 + 1.357 + UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet; 1.358 + 1.359 + for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) { 1.360 + i = (RBBINode *)LastPosOfLeftChild->elementAt(ix); 1.361 + setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet); 1.362 + } 1.363 + } 1.364 + 1.365 + // Aho rule #2 1.366 + if (n->fType == RBBINode::opStar || 1.367 + n->fType == RBBINode::opPlus) { 1.368 + RBBINode *i; // again, n and i are the names from Aho's description. 1.369 + uint32_t ix; 1.370 + 1.371 + for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) { 1.372 + i = (RBBINode *)n->fLastPosSet->elementAt(ix); 1.373 + setAdd(i->fFollowPos, n->fFirstPosSet); 1.374 + } 1.375 + } 1.376 + 1.377 + 1.378 + 1.379 +} 1.380 + 1.381 + 1.382 +//----------------------------------------------------------------------------- 1.383 +// 1.384 +// calcChainedFollowPos. Modify the previously calculated followPos sets 1.385 +// to implement rule chaining. NOT described by Aho 1.386 +// 1.387 +//----------------------------------------------------------------------------- 1.388 +void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) { 1.389 + 1.390 + UVector endMarkerNodes(*fStatus); 1.391 + UVector leafNodes(*fStatus); 1.392 + int32_t i; 1.393 + 1.394 + if (U_FAILURE(*fStatus)) { 1.395 + return; 1.396 + } 1.397 + 1.398 + // get a list of all endmarker nodes. 1.399 + tree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); 1.400 + 1.401 + // get a list all leaf nodes 1.402 + tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus); 1.403 + if (U_FAILURE(*fStatus)) { 1.404 + return; 1.405 + } 1.406 + 1.407 + // Get all nodes that can be the start a match, which is FirstPosition() 1.408 + // of the portion of the tree corresponding to user-written rules. 1.409 + // See the tree description in bofFixup(). 1.410 + RBBINode *userRuleRoot = tree; 1.411 + if (fRB->fSetBuilder->sawBOF()) { 1.412 + userRuleRoot = tree->fLeftChild->fRightChild; 1.413 + } 1.414 + U_ASSERT(userRuleRoot != NULL); 1.415 + UVector *matchStartNodes = userRuleRoot->fFirstPosSet; 1.416 + 1.417 + 1.418 + // Iteratate over all leaf nodes, 1.419 + // 1.420 + int32_t endNodeIx; 1.421 + int32_t startNodeIx; 1.422 + 1.423 + for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) { 1.424 + RBBINode *tNode = (RBBINode *)leafNodes.elementAt(endNodeIx); 1.425 + RBBINode *endNode = NULL; 1.426 + 1.427 + // Identify leaf nodes that correspond to overall rule match positions. 1.428 + // These include an endMarkerNode in their followPos sets. 1.429 + for (i=0; i<endMarkerNodes.size(); i++) { 1.430 + if (tNode->fFollowPos->contains(endMarkerNodes.elementAt(i))) { 1.431 + endNode = tNode; 1.432 + break; 1.433 + } 1.434 + } 1.435 + if (endNode == NULL) { 1.436 + // node wasn't an end node. Try again with the next. 1.437 + continue; 1.438 + } 1.439 + 1.440 + // We've got a node that can end a match. 1.441 + 1.442 + // Line Break Specific hack: If this node's val correspond to the $CM char class, 1.443 + // don't chain from it. 1.444 + // TODO: Add rule syntax for this behavior, get specifics out of here and 1.445 + // into the rule file. 1.446 + if (fRB->fLBCMNoChain) { 1.447 + UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); 1.448 + if (c != -1) { 1.449 + // c == -1 occurs with sets containing only the {eof} marker string. 1.450 + ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK); 1.451 + if (cLBProp == U_LB_COMBINING_MARK) { 1.452 + continue; 1.453 + } 1.454 + } 1.455 + } 1.456 + 1.457 + 1.458 + // Now iterate over the nodes that can start a match, looking for ones 1.459 + // with the same char class as our ending node. 1.460 + RBBINode *startNode; 1.461 + for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { 1.462 + startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); 1.463 + if (startNode->fType != RBBINode::leafChar) { 1.464 + continue; 1.465 + } 1.466 + 1.467 + if (endNode->fVal == startNode->fVal) { 1.468 + // The end val (character class) of one possible match is the 1.469 + // same as the start of another. 1.470 + 1.471 + // Add all nodes from the followPos of the start node to the 1.472 + // followPos set of the end node, which will have the effect of 1.473 + // letting matches transition from a match state at endNode 1.474 + // to the second char of a match starting with startNode. 1.475 + setAdd(endNode->fFollowPos, startNode->fFollowPos); 1.476 + } 1.477 + } 1.478 + } 1.479 +} 1.480 + 1.481 + 1.482 +//----------------------------------------------------------------------------- 1.483 +// 1.484 +// bofFixup. Fixup for state tables that include {bof} beginning of input testing. 1.485 +// Do an swizzle similar to chaining, modifying the followPos set of 1.486 +// the bofNode to include the followPos nodes from other {bot} nodes 1.487 +// scattered through the tree. 1.488 +// 1.489 +// This function has much in common with calcChainedFollowPos(). 1.490 +// 1.491 +//----------------------------------------------------------------------------- 1.492 +void RBBITableBuilder::bofFixup() { 1.493 + 1.494 + if (U_FAILURE(*fStatus)) { 1.495 + return; 1.496 + } 1.497 + 1.498 + // The parse tree looks like this ... 1.499 + // fTree root ---> <cat> 1.500 + // / \ . 1.501 + // <cat> <#end node> 1.502 + // / \ . 1.503 + // <bofNode> rest 1.504 + // of tree 1.505 + // 1.506 + // We will be adding things to the followPos set of the <bofNode> 1.507 + // 1.508 + RBBINode *bofNode = fTree->fLeftChild->fLeftChild; 1.509 + U_ASSERT(bofNode->fType == RBBINode::leafChar); 1.510 + U_ASSERT(bofNode->fVal == 2); 1.511 + 1.512 + // Get all nodes that can be the start a match of the user-written rules 1.513 + // (excluding the fake bofNode) 1.514 + // We want the nodes that can start a match in the 1.515 + // part labeled "rest of tree" 1.516 + // 1.517 + UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet; 1.518 + 1.519 + RBBINode *startNode; 1.520 + int startNodeIx; 1.521 + for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { 1.522 + startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); 1.523 + if (startNode->fType != RBBINode::leafChar) { 1.524 + continue; 1.525 + } 1.526 + 1.527 + if (startNode->fVal == bofNode->fVal) { 1.528 + // We found a leaf node corresponding to a {bof} that was 1.529 + // explicitly written into a rule. 1.530 + // Add everything from the followPos set of this node to the 1.531 + // followPos set of the fake bofNode at the start of the tree. 1.532 + // 1.533 + setAdd(bofNode->fFollowPos, startNode->fFollowPos); 1.534 + } 1.535 + } 1.536 +} 1.537 + 1.538 +//----------------------------------------------------------------------------- 1.539 +// 1.540 +// buildStateTable() Determine the set of runtime DFA states and the 1.541 +// transition tables for these states, by the algorithm 1.542 +// of fig. 3.44 in Aho. 1.543 +// 1.544 +// Most of the comments are quotes of Aho's psuedo-code. 1.545 +// 1.546 +//----------------------------------------------------------------------------- 1.547 +void RBBITableBuilder::buildStateTable() { 1.548 + if (U_FAILURE(*fStatus)) { 1.549 + return; 1.550 + } 1.551 + RBBIStateDescriptor *failState; 1.552 + // Set it to NULL to avoid uninitialized warning 1.553 + RBBIStateDescriptor *initialState = NULL; 1.554 + // 1.555 + // Add a dummy state 0 - the stop state. Not from Aho. 1.556 + int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1; 1.557 + failState = new RBBIStateDescriptor(lastInputSymbol, fStatus); 1.558 + if (failState == NULL) { 1.559 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.560 + goto ExitBuildSTdeleteall; 1.561 + } 1.562 + failState->fPositions = new UVector(*fStatus); 1.563 + if (failState->fPositions == NULL) { 1.564 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.565 + } 1.566 + if (failState->fPositions == NULL || U_FAILURE(*fStatus)) { 1.567 + goto ExitBuildSTdeleteall; 1.568 + } 1.569 + fDStates->addElement(failState, *fStatus); 1.570 + if (U_FAILURE(*fStatus)) { 1.571 + goto ExitBuildSTdeleteall; 1.572 + } 1.573 + 1.574 + // initially, the only unmarked state in Dstates is firstpos(root), 1.575 + // where toot is the root of the syntax tree for (r)#; 1.576 + initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus); 1.577 + if (initialState == NULL) { 1.578 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.579 + } 1.580 + if (U_FAILURE(*fStatus)) { 1.581 + goto ExitBuildSTdeleteall; 1.582 + } 1.583 + initialState->fPositions = new UVector(*fStatus); 1.584 + if (initialState->fPositions == NULL) { 1.585 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.586 + } 1.587 + if (U_FAILURE(*fStatus)) { 1.588 + goto ExitBuildSTdeleteall; 1.589 + } 1.590 + setAdd(initialState->fPositions, fTree->fFirstPosSet); 1.591 + fDStates->addElement(initialState, *fStatus); 1.592 + if (U_FAILURE(*fStatus)) { 1.593 + goto ExitBuildSTdeleteall; 1.594 + } 1.595 + 1.596 + // while there is an unmarked state T in Dstates do begin 1.597 + for (;;) { 1.598 + RBBIStateDescriptor *T = NULL; 1.599 + int32_t tx; 1.600 + for (tx=1; tx<fDStates->size(); tx++) { 1.601 + RBBIStateDescriptor *temp; 1.602 + temp = (RBBIStateDescriptor *)fDStates->elementAt(tx); 1.603 + if (temp->fMarked == FALSE) { 1.604 + T = temp; 1.605 + break; 1.606 + } 1.607 + } 1.608 + if (T == NULL) { 1.609 + break; 1.610 + } 1.611 + 1.612 + // mark T; 1.613 + T->fMarked = TRUE; 1.614 + 1.615 + // for each input symbol a do begin 1.616 + int32_t a; 1.617 + for (a = 1; a<=lastInputSymbol; a++) { 1.618 + // let U be the set of positions that are in followpos(p) 1.619 + // for some position p in T 1.620 + // such that the symbol at position p is a; 1.621 + UVector *U = NULL; 1.622 + RBBINode *p; 1.623 + int32_t px; 1.624 + for (px=0; px<T->fPositions->size(); px++) { 1.625 + p = (RBBINode *)T->fPositions->elementAt(px); 1.626 + if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) { 1.627 + if (U == NULL) { 1.628 + U = new UVector(*fStatus); 1.629 + if (U == NULL) { 1.630 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.631 + goto ExitBuildSTdeleteall; 1.632 + } 1.633 + } 1.634 + setAdd(U, p->fFollowPos); 1.635 + } 1.636 + } 1.637 + 1.638 + // if U is not empty and not in DStates then 1.639 + int32_t ux = 0; 1.640 + UBool UinDstates = FALSE; 1.641 + if (U != NULL) { 1.642 + U_ASSERT(U->size() > 0); 1.643 + int ix; 1.644 + for (ix=0; ix<fDStates->size(); ix++) { 1.645 + RBBIStateDescriptor *temp2; 1.646 + temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix); 1.647 + if (setEquals(U, temp2->fPositions)) { 1.648 + delete U; 1.649 + U = temp2->fPositions; 1.650 + ux = ix; 1.651 + UinDstates = TRUE; 1.652 + break; 1.653 + } 1.654 + } 1.655 + 1.656 + // Add U as an unmarked state to Dstates 1.657 + if (!UinDstates) 1.658 + { 1.659 + RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus); 1.660 + if (newState == NULL) { 1.661 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.662 + } 1.663 + if (U_FAILURE(*fStatus)) { 1.664 + goto ExitBuildSTdeleteall; 1.665 + } 1.666 + newState->fPositions = U; 1.667 + fDStates->addElement(newState, *fStatus); 1.668 + if (U_FAILURE(*fStatus)) { 1.669 + return; 1.670 + } 1.671 + ux = fDStates->size()-1; 1.672 + } 1.673 + 1.674 + // Dtran[T, a] := U; 1.675 + T->fDtran->setElementAt(ux, a); 1.676 + } 1.677 + } 1.678 + } 1.679 + return; 1.680 + // delete local pointers only if error occured. 1.681 +ExitBuildSTdeleteall: 1.682 + delete initialState; 1.683 + delete failState; 1.684 +} 1.685 + 1.686 + 1.687 + 1.688 +//----------------------------------------------------------------------------- 1.689 +// 1.690 +// flagAcceptingStates Identify accepting states. 1.691 +// First get a list of all of the end marker nodes. 1.692 +// Then, for each state s, 1.693 +// if s contains one of the end marker nodes in its list of tree positions then 1.694 +// s is an accepting state. 1.695 +// 1.696 +//----------------------------------------------------------------------------- 1.697 +void RBBITableBuilder::flagAcceptingStates() { 1.698 + if (U_FAILURE(*fStatus)) { 1.699 + return; 1.700 + } 1.701 + UVector endMarkerNodes(*fStatus); 1.702 + RBBINode *endMarker; 1.703 + int32_t i; 1.704 + int32_t n; 1.705 + 1.706 + if (U_FAILURE(*fStatus)) { 1.707 + return; 1.708 + } 1.709 + 1.710 + fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); 1.711 + if (U_FAILURE(*fStatus)) { 1.712 + return; 1.713 + } 1.714 + 1.715 + for (i=0; i<endMarkerNodes.size(); i++) { 1.716 + endMarker = (RBBINode *)endMarkerNodes.elementAt(i); 1.717 + for (n=0; n<fDStates->size(); n++) { 1.718 + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); 1.719 + if (sd->fPositions->indexOf(endMarker) >= 0) { 1.720 + // Any non-zero value for fAccepting means this is an accepting node. 1.721 + // The value is what will be returned to the user as the break status. 1.722 + // If no other value was specified, force it to -1. 1.723 + 1.724 + if (sd->fAccepting==0) { 1.725 + // State hasn't been marked as accepting yet. Do it now. 1.726 + sd->fAccepting = endMarker->fVal; 1.727 + if (sd->fAccepting == 0) { 1.728 + sd->fAccepting = -1; 1.729 + } 1.730 + } 1.731 + if (sd->fAccepting==-1 && endMarker->fVal != 0) { 1.732 + // Both lookahead and non-lookahead accepting for this state. 1.733 + // Favor the look-ahead. Expedient for line break. 1.734 + // TODO: need a more elegant resolution for conflicting rules. 1.735 + sd->fAccepting = endMarker->fVal; 1.736 + } 1.737 + // implicit else: 1.738 + // if sd->fAccepting already had a value other than 0 or -1, leave it be. 1.739 + 1.740 + // If the end marker node is from a look-ahead rule, set 1.741 + // the fLookAhead field or this state also. 1.742 + if (endMarker->fLookAheadEnd) { 1.743 + // TODO: don't change value if already set? 1.744 + // TODO: allow for more than one active look-ahead rule in engine. 1.745 + // Make value here an index to a side array in engine? 1.746 + sd->fLookAhead = sd->fAccepting; 1.747 + } 1.748 + } 1.749 + } 1.750 + } 1.751 +} 1.752 + 1.753 + 1.754 +//----------------------------------------------------------------------------- 1.755 +// 1.756 +// flagLookAheadStates Very similar to flagAcceptingStates, above. 1.757 +// 1.758 +//----------------------------------------------------------------------------- 1.759 +void RBBITableBuilder::flagLookAheadStates() { 1.760 + if (U_FAILURE(*fStatus)) { 1.761 + return; 1.762 + } 1.763 + UVector lookAheadNodes(*fStatus); 1.764 + RBBINode *lookAheadNode; 1.765 + int32_t i; 1.766 + int32_t n; 1.767 + 1.768 + fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus); 1.769 + if (U_FAILURE(*fStatus)) { 1.770 + return; 1.771 + } 1.772 + for (i=0; i<lookAheadNodes.size(); i++) { 1.773 + lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i); 1.774 + 1.775 + for (n=0; n<fDStates->size(); n++) { 1.776 + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); 1.777 + if (sd->fPositions->indexOf(lookAheadNode) >= 0) { 1.778 + sd->fLookAhead = lookAheadNode->fVal; 1.779 + } 1.780 + } 1.781 + } 1.782 +} 1.783 + 1.784 + 1.785 + 1.786 + 1.787 +//----------------------------------------------------------------------------- 1.788 +// 1.789 +// flagTaggedStates 1.790 +// 1.791 +//----------------------------------------------------------------------------- 1.792 +void RBBITableBuilder::flagTaggedStates() { 1.793 + if (U_FAILURE(*fStatus)) { 1.794 + return; 1.795 + } 1.796 + UVector tagNodes(*fStatus); 1.797 + RBBINode *tagNode; 1.798 + int32_t i; 1.799 + int32_t n; 1.800 + 1.801 + if (U_FAILURE(*fStatus)) { 1.802 + return; 1.803 + } 1.804 + fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus); 1.805 + if (U_FAILURE(*fStatus)) { 1.806 + return; 1.807 + } 1.808 + for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em) 1.809 + tagNode = (RBBINode *)tagNodes.elementAt(i); 1.810 + 1.811 + for (n=0; n<fDStates->size(); n++) { // For each state s (row in the state table) 1.812 + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); 1.813 + if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include the tag node t 1.814 + sortedAdd(&sd->fTagVals, tagNode->fVal); 1.815 + } 1.816 + } 1.817 + } 1.818 +} 1.819 + 1.820 + 1.821 + 1.822 + 1.823 +//----------------------------------------------------------------------------- 1.824 +// 1.825 +// mergeRuleStatusVals 1.826 +// 1.827 +// Update the global table of rule status {tag} values 1.828 +// The rule builder has a global vector of status values that are common 1.829 +// for all tables. Merge the ones from this table into the global set. 1.830 +// 1.831 +//----------------------------------------------------------------------------- 1.832 +void RBBITableBuilder::mergeRuleStatusVals() { 1.833 + // 1.834 + // The basic outline of what happens here is this... 1.835 + // 1.836 + // for each state in this state table 1.837 + // if the status tag list for this state is in the global statuses list 1.838 + // record where and 1.839 + // continue with the next state 1.840 + // else 1.841 + // add the tag list for this state to the global list. 1.842 + // 1.843 + int i; 1.844 + int n; 1.845 + 1.846 + // Pre-set a single tag of {0} into the table. 1.847 + // We will need this as a default, for rule sets with no explicit tagging. 1.848 + if (fRB->fRuleStatusVals->size() == 0) { 1.849 + fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group 1.850 + fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our single status of zero 1.851 + } 1.852 + 1.853 + // For each state 1.854 + for (n=0; n<fDStates->size(); n++) { 1.855 + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); 1.856 + UVector *thisStatesTagValues = sd->fTagVals; 1.857 + if (thisStatesTagValues == NULL) { 1.858 + // No tag values are explicitly associated with this state. 1.859 + // Set the default tag value. 1.860 + sd->fTagsIdx = 0; 1.861 + continue; 1.862 + } 1.863 + 1.864 + // There are tag(s) associated with this state. 1.865 + // fTagsIdx will be the index into the global tag list for this state's tag values. 1.866 + // Initial value of -1 flags that we haven't got it set yet. 1.867 + sd->fTagsIdx = -1; 1.868 + int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list 1.869 + int32_t nextTagGroupStart = 0; 1.870 + 1.871 + // Loop runs once per group of tags in the global list 1.872 + while (nextTagGroupStart < fRB->fRuleStatusVals->size()) { 1.873 + thisTagGroupStart = nextTagGroupStart; 1.874 + nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1; 1.875 + if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) { 1.876 + // The number of tags for this state is different from 1.877 + // the number of tags in this group from the global list. 1.878 + // Continue with the next group from the global list. 1.879 + continue; 1.880 + } 1.881 + // The lengths match, go ahead and compare the actual tag values 1.882 + // between this state and the group from the global list. 1.883 + for (i=0; i<thisStatesTagValues->size(); i++) { 1.884 + if (thisStatesTagValues->elementAti(i) != 1.885 + fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) { 1.886 + // Mismatch. 1.887 + break; 1.888 + } 1.889 + } 1.890 + 1.891 + if (i == thisStatesTagValues->size()) { 1.892 + // We found a set of tag values in the global list that match 1.893 + // those for this state. Use them. 1.894 + sd->fTagsIdx = thisTagGroupStart; 1.895 + break; 1.896 + } 1.897 + } 1.898 + 1.899 + if (sd->fTagsIdx == -1) { 1.900 + // No suitable entry in the global tag list already. Add one 1.901 + sd->fTagsIdx = fRB->fRuleStatusVals->size(); 1.902 + fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus); 1.903 + for (i=0; i<thisStatesTagValues->size(); i++) { 1.904 + fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus); 1.905 + } 1.906 + } 1.907 + } 1.908 +} 1.909 + 1.910 + 1.911 + 1.912 + 1.913 + 1.914 + 1.915 + 1.916 +//----------------------------------------------------------------------------- 1.917 +// 1.918 +// sortedAdd Add a value to a vector of sorted values (ints). 1.919 +// Do not replicate entries; if the value is already there, do not 1.920 +// add a second one. 1.921 +// Lazily create the vector if it does not already exist. 1.922 +// 1.923 +//----------------------------------------------------------------------------- 1.924 +void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) { 1.925 + int32_t i; 1.926 + 1.927 + if (*vector == NULL) { 1.928 + *vector = new UVector(*fStatus); 1.929 + } 1.930 + if (*vector == NULL || U_FAILURE(*fStatus)) { 1.931 + return; 1.932 + } 1.933 + UVector *vec = *vector; 1.934 + int32_t vSize = vec->size(); 1.935 + for (i=0; i<vSize; i++) { 1.936 + int32_t valAtI = vec->elementAti(i); 1.937 + if (valAtI == val) { 1.938 + // The value is already in the vector. Don't add it again. 1.939 + return; 1.940 + } 1.941 + if (valAtI > val) { 1.942 + break; 1.943 + } 1.944 + } 1.945 + vec->insertElementAt(val, i, *fStatus); 1.946 +} 1.947 + 1.948 + 1.949 + 1.950 +//----------------------------------------------------------------------------- 1.951 +// 1.952 +// setAdd Set operation on UVector 1.953 +// dest = dest union source 1.954 +// Elements may only appear once and must be sorted. 1.955 +// 1.956 +//----------------------------------------------------------------------------- 1.957 +void RBBITableBuilder::setAdd(UVector *dest, UVector *source) { 1.958 + int32_t destOriginalSize = dest->size(); 1.959 + int32_t sourceSize = source->size(); 1.960 + int32_t di = 0; 1.961 + MaybeStackArray<void *, 16> destArray, sourceArray; // Handle small cases without malloc 1.962 + void **destPtr, **sourcePtr; 1.963 + void **destLim, **sourceLim; 1.964 + 1.965 + if (destOriginalSize > destArray.getCapacity()) { 1.966 + if (destArray.resize(destOriginalSize) == NULL) { 1.967 + return; 1.968 + } 1.969 + } 1.970 + destPtr = destArray.getAlias(); 1.971 + destLim = destPtr + destOriginalSize; // destArray.getArrayLimit()? 1.972 + 1.973 + if (sourceSize > sourceArray.getCapacity()) { 1.974 + if (sourceArray.resize(sourceSize) == NULL) { 1.975 + return; 1.976 + } 1.977 + } 1.978 + sourcePtr = sourceArray.getAlias(); 1.979 + sourceLim = sourcePtr + sourceSize; // sourceArray.getArrayLimit()? 1.980 + 1.981 + // Avoid multiple "get element" calls by getting the contents into arrays 1.982 + (void) dest->toArray(destPtr); 1.983 + (void) source->toArray(sourcePtr); 1.984 + 1.985 + dest->setSize(sourceSize+destOriginalSize, *fStatus); 1.986 + 1.987 + while (sourcePtr < sourceLim && destPtr < destLim) { 1.988 + if (*destPtr == *sourcePtr) { 1.989 + dest->setElementAt(*sourcePtr++, di++); 1.990 + destPtr++; 1.991 + } 1.992 + // This check is required for machines with segmented memory, like i5/OS. 1.993 + // Direct pointer comparison is not recommended. 1.994 + else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) { 1.995 + dest->setElementAt(*destPtr++, di++); 1.996 + } 1.997 + else { /* *sourcePtr < *destPtr */ 1.998 + dest->setElementAt(*sourcePtr++, di++); 1.999 + } 1.1000 + } 1.1001 + 1.1002 + // At most one of these two cleanup loops will execute 1.1003 + while (destPtr < destLim) { 1.1004 + dest->setElementAt(*destPtr++, di++); 1.1005 + } 1.1006 + while (sourcePtr < sourceLim) { 1.1007 + dest->setElementAt(*sourcePtr++, di++); 1.1008 + } 1.1009 + 1.1010 + dest->setSize(di, *fStatus); 1.1011 +} 1.1012 + 1.1013 + 1.1014 + 1.1015 +//----------------------------------------------------------------------------- 1.1016 +// 1.1017 +// setEqual Set operation on UVector. 1.1018 +// Compare for equality. 1.1019 +// Elements must be sorted. 1.1020 +// 1.1021 +//----------------------------------------------------------------------------- 1.1022 +UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) { 1.1023 + return a->equals(*b); 1.1024 +} 1.1025 + 1.1026 + 1.1027 +//----------------------------------------------------------------------------- 1.1028 +// 1.1029 +// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos 1.1030 +// for each node in the tree. 1.1031 +// 1.1032 +//----------------------------------------------------------------------------- 1.1033 +#ifdef RBBI_DEBUG 1.1034 +void RBBITableBuilder::printPosSets(RBBINode *n) { 1.1035 + if (n==NULL) { 1.1036 + return; 1.1037 + } 1.1038 + n->printNode(); 1.1039 + RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"TRUE":"FALSE"); 1.1040 + 1.1041 + RBBIDebugPrintf(" firstpos: "); 1.1042 + printSet(n->fFirstPosSet); 1.1043 + 1.1044 + RBBIDebugPrintf(" lastpos: "); 1.1045 + printSet(n->fLastPosSet); 1.1046 + 1.1047 + RBBIDebugPrintf(" followpos: "); 1.1048 + printSet(n->fFollowPos); 1.1049 + 1.1050 + printPosSets(n->fLeftChild); 1.1051 + printPosSets(n->fRightChild); 1.1052 +} 1.1053 +#endif 1.1054 + 1.1055 + 1.1056 + 1.1057 +//----------------------------------------------------------------------------- 1.1058 +// 1.1059 +// getTableSize() Calculate the size of the runtime form of this 1.1060 +// state transition table. 1.1061 +// 1.1062 +//----------------------------------------------------------------------------- 1.1063 +int32_t RBBITableBuilder::getTableSize() const { 1.1064 + int32_t size = 0; 1.1065 + int32_t numRows; 1.1066 + int32_t numCols; 1.1067 + int32_t rowSize; 1.1068 + 1.1069 + if (fTree == NULL) { 1.1070 + return 0; 1.1071 + } 1.1072 + 1.1073 + size = sizeof(RBBIStateTable) - 4; // The header, with no rows to the table. 1.1074 + 1.1075 + numRows = fDStates->size(); 1.1076 + numCols = fRB->fSetBuilder->getNumCharCategories(); 1.1077 + 1.1078 + // Note The declaration of RBBIStateTableRow is for a table of two columns. 1.1079 + // Therefore we subtract two from numCols when determining 1.1080 + // how much storage to add to a row for the total columns. 1.1081 + rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2); 1.1082 + size += numRows * rowSize; 1.1083 + return size; 1.1084 +} 1.1085 + 1.1086 + 1.1087 + 1.1088 +//----------------------------------------------------------------------------- 1.1089 +// 1.1090 +// exportTable() export the state transition table in the format required 1.1091 +// by the runtime engine. getTableSize() bytes of memory 1.1092 +// must be available at the output address "where". 1.1093 +// 1.1094 +//----------------------------------------------------------------------------- 1.1095 +void RBBITableBuilder::exportTable(void *where) { 1.1096 + RBBIStateTable *table = (RBBIStateTable *)where; 1.1097 + uint32_t state; 1.1098 + int col; 1.1099 + 1.1100 + if (U_FAILURE(*fStatus) || fTree == NULL) { 1.1101 + return; 1.1102 + } 1.1103 + 1.1104 + if (fRB->fSetBuilder->getNumCharCategories() > 0x7fff || 1.1105 + fDStates->size() > 0x7fff) { 1.1106 + *fStatus = U_BRK_INTERNAL_ERROR; 1.1107 + return; 1.1108 + } 1.1109 + 1.1110 + table->fRowLen = sizeof(RBBIStateTableRow) + 1.1111 + sizeof(uint16_t) * (fRB->fSetBuilder->getNumCharCategories() - 2); 1.1112 + table->fNumStates = fDStates->size(); 1.1113 + table->fFlags = 0; 1.1114 + if (fRB->fLookAheadHardBreak) { 1.1115 + table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK; 1.1116 + } 1.1117 + if (fRB->fSetBuilder->sawBOF()) { 1.1118 + table->fFlags |= RBBI_BOF_REQUIRED; 1.1119 + } 1.1120 + table->fReserved = 0; 1.1121 + 1.1122 + for (state=0; state<table->fNumStates; state++) { 1.1123 + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); 1.1124 + RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen); 1.1125 + U_ASSERT (-32768 < sd->fAccepting && sd->fAccepting <= 32767); 1.1126 + U_ASSERT (-32768 < sd->fLookAhead && sd->fLookAhead <= 32767); 1.1127 + row->fAccepting = (int16_t)sd->fAccepting; 1.1128 + row->fLookAhead = (int16_t)sd->fLookAhead; 1.1129 + row->fTagIdx = (int16_t)sd->fTagsIdx; 1.1130 + for (col=0; col<fRB->fSetBuilder->getNumCharCategories(); col++) { 1.1131 + row->fNextState[col] = (uint16_t)sd->fDtran->elementAti(col); 1.1132 + } 1.1133 + } 1.1134 +} 1.1135 + 1.1136 + 1.1137 + 1.1138 +//----------------------------------------------------------------------------- 1.1139 +// 1.1140 +// printSet Debug function. Print the contents of a UVector 1.1141 +// 1.1142 +//----------------------------------------------------------------------------- 1.1143 +#ifdef RBBI_DEBUG 1.1144 +void RBBITableBuilder::printSet(UVector *s) { 1.1145 + int32_t i; 1.1146 + for (i=0; i<s->size(); i++) { 1.1147 + void *v = s->elementAt(i); 1.1148 + RBBIDebugPrintf("%10p", v); 1.1149 + } 1.1150 + RBBIDebugPrintf("\n"); 1.1151 +} 1.1152 +#endif 1.1153 + 1.1154 + 1.1155 +//----------------------------------------------------------------------------- 1.1156 +// 1.1157 +// printStates Debug Function. Dump the fully constructed state transition table. 1.1158 +// 1.1159 +//----------------------------------------------------------------------------- 1.1160 +#ifdef RBBI_DEBUG 1.1161 +void RBBITableBuilder::printStates() { 1.1162 + int c; // input "character" 1.1163 + int n; // state number 1.1164 + 1.1165 + RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); 1.1166 + RBBIDebugPrintf(" | Acc LA Tag"); 1.1167 + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { 1.1168 + RBBIDebugPrintf(" %2d", c); 1.1169 + } 1.1170 + RBBIDebugPrintf("\n"); 1.1171 + RBBIDebugPrintf(" |---------------"); 1.1172 + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { 1.1173 + RBBIDebugPrintf("---"); 1.1174 + } 1.1175 + RBBIDebugPrintf("\n"); 1.1176 + 1.1177 + for (n=0; n<fDStates->size(); n++) { 1.1178 + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); 1.1179 + RBBIDebugPrintf(" %3d | " , n); 1.1180 + RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx); 1.1181 + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { 1.1182 + RBBIDebugPrintf(" %2d", sd->fDtran->elementAti(c)); 1.1183 + } 1.1184 + RBBIDebugPrintf("\n"); 1.1185 + } 1.1186 + RBBIDebugPrintf("\n\n"); 1.1187 +} 1.1188 +#endif 1.1189 + 1.1190 + 1.1191 + 1.1192 +//----------------------------------------------------------------------------- 1.1193 +// 1.1194 +// printRuleStatusTable Debug Function. Dump the common rule status table 1.1195 +// 1.1196 +//----------------------------------------------------------------------------- 1.1197 +#ifdef RBBI_DEBUG 1.1198 +void RBBITableBuilder::printRuleStatusTable() { 1.1199 + int32_t thisRecord = 0; 1.1200 + int32_t nextRecord = 0; 1.1201 + int i; 1.1202 + UVector *tbl = fRB->fRuleStatusVals; 1.1203 + 1.1204 + RBBIDebugPrintf("index | tags \n"); 1.1205 + RBBIDebugPrintf("-------------------\n"); 1.1206 + 1.1207 + while (nextRecord < tbl->size()) { 1.1208 + thisRecord = nextRecord; 1.1209 + nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1; 1.1210 + RBBIDebugPrintf("%4d ", thisRecord); 1.1211 + for (i=thisRecord+1; i<nextRecord; i++) { 1.1212 + RBBIDebugPrintf(" %5d", tbl->elementAti(i)); 1.1213 + } 1.1214 + RBBIDebugPrintf("\n"); 1.1215 + } 1.1216 + RBBIDebugPrintf("\n\n"); 1.1217 +} 1.1218 +#endif 1.1219 + 1.1220 + 1.1221 +//----------------------------------------------------------------------------- 1.1222 +// 1.1223 +// RBBIStateDescriptor Methods. This is a very struct-like class 1.1224 +// Most access is directly to the fields. 1.1225 +// 1.1226 +//----------------------------------------------------------------------------- 1.1227 + 1.1228 +RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) { 1.1229 + fMarked = FALSE; 1.1230 + fAccepting = 0; 1.1231 + fLookAhead = 0; 1.1232 + fTagsIdx = 0; 1.1233 + fTagVals = NULL; 1.1234 + fPositions = NULL; 1.1235 + fDtran = NULL; 1.1236 + 1.1237 + fDtran = new UVector(lastInputSymbol+1, *fStatus); 1.1238 + if (U_FAILURE(*fStatus)) { 1.1239 + return; 1.1240 + } 1.1241 + if (fDtran == NULL) { 1.1242 + *fStatus = U_MEMORY_ALLOCATION_ERROR; 1.1243 + return; 1.1244 + } 1.1245 + fDtran->setSize(lastInputSymbol+1, *fStatus); // fDtran needs to be pre-sized. 1.1246 + // It is indexed by input symbols, and will 1.1247 + // hold the next state number for each 1.1248 + // symbol. 1.1249 +} 1.1250 + 1.1251 + 1.1252 +RBBIStateDescriptor::~RBBIStateDescriptor() { 1.1253 + delete fPositions; 1.1254 + delete fDtran; 1.1255 + delete fTagVals; 1.1256 + fPositions = NULL; 1.1257 + fDtran = NULL; 1.1258 + fTagVals = NULL; 1.1259 +} 1.1260 + 1.1261 +U_NAMESPACE_END 1.1262 + 1.1263 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */