1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/rbbinode.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,358 @@ 1.4 +/* 1.5 +*************************************************************************** 1.6 +* Copyright (C) 2002-2008 International Business Machines Corporation * 1.7 +* and others. All rights reserved. * 1.8 +*************************************************************************** 1.9 +*/ 1.10 + 1.11 +// 1.12 +// File: rbbinode.cpp 1.13 +// 1.14 +// Implementation of class RBBINode, which represents a node in the 1.15 +// tree generated when parsing the Rules Based Break Iterator rules. 1.16 +// 1.17 +// This "Class" is actually closer to a struct. 1.18 +// Code using it is expected to directly access fields much of the time. 1.19 +// 1.20 + 1.21 +#include "unicode/utypes.h" 1.22 + 1.23 +#if !UCONFIG_NO_BREAK_ITERATION 1.24 + 1.25 +#include "unicode/unistr.h" 1.26 +#include "unicode/uniset.h" 1.27 +#include "unicode/uchar.h" 1.28 +#include "unicode/parsepos.h" 1.29 +#include "uvector.h" 1.30 + 1.31 +#include "rbbirb.h" 1.32 +#include "rbbinode.h" 1.33 + 1.34 +#include "uassert.h" 1.35 + 1.36 + 1.37 +U_NAMESPACE_BEGIN 1.38 + 1.39 +#ifdef RBBI_DEBUG 1.40 +static int gLastSerial = 0; 1.41 +#endif 1.42 + 1.43 + 1.44 +//------------------------------------------------------------------------- 1.45 +// 1.46 +// Constructor. Just set the fields to reasonable default values. 1.47 +// 1.48 +//------------------------------------------------------------------------- 1.49 +RBBINode::RBBINode(NodeType t) : UMemory() { 1.50 +#ifdef RBBI_DEBUG 1.51 + fSerialNum = ++gLastSerial; 1.52 +#endif 1.53 + fType = t; 1.54 + fParent = NULL; 1.55 + fLeftChild = NULL; 1.56 + fRightChild = NULL; 1.57 + fInputSet = NULL; 1.58 + fFirstPos = 0; 1.59 + fLastPos = 0; 1.60 + fNullable = FALSE; 1.61 + fLookAheadEnd = FALSE; 1.62 + fVal = 0; 1.63 + fPrecedence = precZero; 1.64 + 1.65 + UErrorCode status = U_ZERO_ERROR; 1.66 + fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere 1.67 + fLastPosSet = new UVector(status); 1.68 + fFollowPos = new UVector(status); 1.69 + if (t==opCat) {fPrecedence = precOpCat;} 1.70 + else if (t==opOr) {fPrecedence = precOpOr;} 1.71 + else if (t==opStart) {fPrecedence = precStart;} 1.72 + else if (t==opLParen) {fPrecedence = precLParen;} 1.73 + 1.74 +} 1.75 + 1.76 + 1.77 +RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { 1.78 +#ifdef RBBI_DEBUG 1.79 + fSerialNum = ++gLastSerial; 1.80 +#endif 1.81 + fType = other.fType; 1.82 + fParent = NULL; 1.83 + fLeftChild = NULL; 1.84 + fRightChild = NULL; 1.85 + fInputSet = other.fInputSet; 1.86 + fPrecedence = other.fPrecedence; 1.87 + fText = other.fText; 1.88 + fFirstPos = other.fFirstPos; 1.89 + fLastPos = other.fLastPos; 1.90 + fNullable = other.fNullable; 1.91 + fVal = other.fVal; 1.92 + UErrorCode status = U_ZERO_ERROR; 1.93 + fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere 1.94 + fLastPosSet = new UVector(status); 1.95 + fFollowPos = new UVector(status); 1.96 +} 1.97 + 1.98 + 1.99 +//------------------------------------------------------------------------- 1.100 +// 1.101 +// Destructor. Deletes both this node AND any child nodes, 1.102 +// except in the case of variable reference nodes. For 1.103 +// these, the l. child points back to the definition, which 1.104 +// is common for all references to the variable, meaning 1.105 +// it can't be deleted here. 1.106 +// 1.107 +//------------------------------------------------------------------------- 1.108 +RBBINode::~RBBINode() { 1.109 + // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); 1.110 + delete fInputSet; 1.111 + fInputSet = NULL; 1.112 + 1.113 + switch (this->fType) { 1.114 + case varRef: 1.115 + case setRef: 1.116 + // for these node types, multiple instances point to the same "children" 1.117 + // Storage ownership of children handled elsewhere. Don't delete here. 1.118 + break; 1.119 + 1.120 + default: 1.121 + delete fLeftChild; 1.122 + fLeftChild = NULL; 1.123 + delete fRightChild; 1.124 + fRightChild = NULL; 1.125 + } 1.126 + 1.127 + 1.128 + delete fFirstPosSet; 1.129 + delete fLastPosSet; 1.130 + delete fFollowPos; 1.131 + 1.132 +} 1.133 + 1.134 + 1.135 +//------------------------------------------------------------------------- 1.136 +// 1.137 +// cloneTree Make a copy of the subtree rooted at this node. 1.138 +// Discard any variable references encountered along the way, 1.139 +// and replace with copies of the variable's definitions. 1.140 +// Used to replicate the expression underneath variable 1.141 +// references in preparation for generating the DFA tables. 1.142 +// 1.143 +//------------------------------------------------------------------------- 1.144 +RBBINode *RBBINode::cloneTree() { 1.145 + RBBINode *n; 1.146 + 1.147 + if (fType == RBBINode::varRef) { 1.148 + // If the current node is a variable reference, skip over it 1.149 + // and clone the definition of the variable instead. 1.150 + n = fLeftChild->cloneTree(); 1.151 + } else if (fType == RBBINode::uset) { 1.152 + n = this; 1.153 + } else { 1.154 + n = new RBBINode(*this); 1.155 + // Check for null pointer. 1.156 + if (n != NULL) { 1.157 + if (fLeftChild != NULL) { 1.158 + n->fLeftChild = fLeftChild->cloneTree(); 1.159 + n->fLeftChild->fParent = n; 1.160 + } 1.161 + if (fRightChild != NULL) { 1.162 + n->fRightChild = fRightChild->cloneTree(); 1.163 + n->fRightChild->fParent = n; 1.164 + } 1.165 + } 1.166 + } 1.167 + return n; 1.168 +} 1.169 + 1.170 + 1.171 + 1.172 +//------------------------------------------------------------------------- 1.173 +// 1.174 +// flattenVariables Walk a parse tree, replacing any variable 1.175 +// references with a copy of the variable's definition. 1.176 +// Aside from variables, the tree is not changed. 1.177 +// 1.178 +// Return the root of the tree. If the root was not a variable 1.179 +// reference, it remains unchanged - the root we started with 1.180 +// is the root we return. If, however, the root was a variable 1.181 +// reference, the root of the newly cloned replacement tree will 1.182 +// be returned, and the original tree deleted. 1.183 +// 1.184 +// This function works by recursively walking the tree 1.185 +// without doing anything until a variable reference is 1.186 +// found, then calling cloneTree() at that point. Any 1.187 +// nested references are handled by cloneTree(), not here. 1.188 +// 1.189 +//------------------------------------------------------------------------- 1.190 +RBBINode *RBBINode::flattenVariables() { 1.191 + if (fType == varRef) { 1.192 + RBBINode *retNode = fLeftChild->cloneTree(); 1.193 + delete this; 1.194 + return retNode; 1.195 + } 1.196 + 1.197 + if (fLeftChild != NULL) { 1.198 + fLeftChild = fLeftChild->flattenVariables(); 1.199 + fLeftChild->fParent = this; 1.200 + } 1.201 + if (fRightChild != NULL) { 1.202 + fRightChild = fRightChild->flattenVariables(); 1.203 + fRightChild->fParent = this; 1.204 + } 1.205 + return this; 1.206 +} 1.207 + 1.208 + 1.209 +//------------------------------------------------------------------------- 1.210 +// 1.211 +// flattenSets Walk the parse tree, replacing any nodes of type setRef 1.212 +// with a copy of the expression tree for the set. A set's 1.213 +// equivalent expression tree is precomputed and saved as 1.214 +// the left child of the uset node. 1.215 +// 1.216 +//------------------------------------------------------------------------- 1.217 +void RBBINode::flattenSets() { 1.218 + U_ASSERT(fType != setRef); 1.219 + 1.220 + if (fLeftChild != NULL) { 1.221 + if (fLeftChild->fType==setRef) { 1.222 + RBBINode *setRefNode = fLeftChild; 1.223 + RBBINode *usetNode = setRefNode->fLeftChild; 1.224 + RBBINode *replTree = usetNode->fLeftChild; 1.225 + fLeftChild = replTree->cloneTree(); 1.226 + fLeftChild->fParent = this; 1.227 + delete setRefNode; 1.228 + } else { 1.229 + fLeftChild->flattenSets(); 1.230 + } 1.231 + } 1.232 + 1.233 + if (fRightChild != NULL) { 1.234 + if (fRightChild->fType==setRef) { 1.235 + RBBINode *setRefNode = fRightChild; 1.236 + RBBINode *usetNode = setRefNode->fLeftChild; 1.237 + RBBINode *replTree = usetNode->fLeftChild; 1.238 + fRightChild = replTree->cloneTree(); 1.239 + fRightChild->fParent = this; 1.240 + delete setRefNode; 1.241 + } else { 1.242 + fRightChild->flattenSets(); 1.243 + } 1.244 + } 1.245 +} 1.246 + 1.247 + 1.248 + 1.249 +//------------------------------------------------------------------------- 1.250 +// 1.251 +// findNodes() Locate all the nodes of the specified type, starting 1.252 +// at the specified root. 1.253 +// 1.254 +//------------------------------------------------------------------------- 1.255 +void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { 1.256 + /* test for buffer overflows */ 1.257 + if (U_FAILURE(status)) { 1.258 + return; 1.259 + } 1.260 + if (fType == kind) { 1.261 + dest->addElement(this, status); 1.262 + } 1.263 + if (fLeftChild != NULL) { 1.264 + fLeftChild->findNodes(dest, kind, status); 1.265 + } 1.266 + if (fRightChild != NULL) { 1.267 + fRightChild->findNodes(dest, kind, status); 1.268 + } 1.269 +} 1.270 + 1.271 + 1.272 +//------------------------------------------------------------------------- 1.273 +// 1.274 +// print. Print out a single node, for debugging. 1.275 +// 1.276 +//------------------------------------------------------------------------- 1.277 +#ifdef RBBI_DEBUG 1.278 +void RBBINode::printNode() { 1.279 + static const char * const nodeTypeNames[] = { 1.280 + "setRef", 1.281 + "uset", 1.282 + "varRef", 1.283 + "leafChar", 1.284 + "lookAhead", 1.285 + "tag", 1.286 + "endMark", 1.287 + "opStart", 1.288 + "opCat", 1.289 + "opOr", 1.290 + "opStar", 1.291 + "opPlus", 1.292 + "opQuestion", 1.293 + "opBreak", 1.294 + "opReverse", 1.295 + "opLParen" 1.296 + }; 1.297 + 1.298 + if (this==NULL) { 1.299 + RBBIDebugPrintf("%10p", (void *)this); 1.300 + } else { 1.301 + RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ", 1.302 + (void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild, 1.303 + fSerialNum, fFirstPos, fVal); 1.304 + if (fType == varRef) { 1.305 + RBBI_DEBUG_printUnicodeString(fText); 1.306 + } 1.307 + } 1.308 + RBBIDebugPrintf("\n"); 1.309 +} 1.310 +#endif 1.311 + 1.312 + 1.313 +#ifdef RBBI_DEBUG 1.314 +U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) 1.315 +{ 1.316 + int i; 1.317 + for (i=0; i<s.length(); i++) { 1.318 + RBBIDebugPrintf("%c", s.charAt(i)); 1.319 + // putc(s.charAt(i), stdout); 1.320 + } 1.321 + for (i=s.length(); i<minWidth; i++) { 1.322 + RBBIDebugPrintf(" "); 1.323 + } 1.324 +} 1.325 +#endif 1.326 + 1.327 + 1.328 +//------------------------------------------------------------------------- 1.329 +// 1.330 +// print. Print out the tree of nodes rooted at "this" 1.331 +// 1.332 +//------------------------------------------------------------------------- 1.333 +#ifdef RBBI_DEBUG 1.334 +void RBBINode::printTree(UBool printHeading) { 1.335 + if (printHeading) { 1.336 + RBBIDebugPrintf( "-------------------------------------------------------------------\n" 1.337 + " Address type Parent LeftChild RightChild serial position value\n" 1.338 + ); 1.339 + } 1.340 + this->printNode(); 1.341 + if (this != NULL) { 1.342 + // Only dump the definition under a variable reference if asked to. 1.343 + // Unconditinally dump children of all other node types. 1.344 + if (fType != varRef) { 1.345 + if (fLeftChild != NULL) { 1.346 + fLeftChild->printTree(FALSE); 1.347 + } 1.348 + 1.349 + if (fRightChild != NULL) { 1.350 + fRightChild->printTree(FALSE); 1.351 + } 1.352 + } 1.353 + } 1.354 +} 1.355 +#endif 1.356 + 1.357 + 1.358 + 1.359 +U_NAMESPACE_END 1.360 + 1.361 +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */