michael@0: /* michael@0: *************************************************************************** michael@0: * Copyright (C) 2002-2008 International Business Machines Corporation * michael@0: * and others. All rights reserved. * michael@0: *************************************************************************** michael@0: */ michael@0: michael@0: // michael@0: // File: rbbinode.cpp michael@0: // michael@0: // Implementation of class RBBINode, which represents a node in the michael@0: // tree generated when parsing the Rules Based Break Iterator rules. michael@0: // michael@0: // This "Class" is actually closer to a struct. michael@0: // Code using it is expected to directly access fields much of the time. michael@0: // michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_BREAK_ITERATION michael@0: michael@0: #include "unicode/unistr.h" michael@0: #include "unicode/uniset.h" michael@0: #include "unicode/uchar.h" michael@0: #include "unicode/parsepos.h" michael@0: #include "uvector.h" michael@0: michael@0: #include "rbbirb.h" michael@0: #include "rbbinode.h" michael@0: michael@0: #include "uassert.h" michael@0: michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: #ifdef RBBI_DEBUG michael@0: static int gLastSerial = 0; michael@0: #endif michael@0: michael@0: michael@0: //------------------------------------------------------------------------- michael@0: // michael@0: // Constructor. Just set the fields to reasonable default values. michael@0: // michael@0: //------------------------------------------------------------------------- michael@0: RBBINode::RBBINode(NodeType t) : UMemory() { michael@0: #ifdef RBBI_DEBUG michael@0: fSerialNum = ++gLastSerial; michael@0: #endif michael@0: fType = t; michael@0: fParent = NULL; michael@0: fLeftChild = NULL; michael@0: fRightChild = NULL; michael@0: fInputSet = NULL; michael@0: fFirstPos = 0; michael@0: fLastPos = 0; michael@0: fNullable = FALSE; michael@0: fLookAheadEnd = FALSE; michael@0: fVal = 0; michael@0: fPrecedence = precZero; michael@0: michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere michael@0: fLastPosSet = new UVector(status); michael@0: fFollowPos = new UVector(status); michael@0: if (t==opCat) {fPrecedence = precOpCat;} michael@0: else if (t==opOr) {fPrecedence = precOpOr;} michael@0: else if (t==opStart) {fPrecedence = precStart;} michael@0: else if (t==opLParen) {fPrecedence = precLParen;} michael@0: michael@0: } michael@0: michael@0: michael@0: RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { michael@0: #ifdef RBBI_DEBUG michael@0: fSerialNum = ++gLastSerial; michael@0: #endif michael@0: fType = other.fType; michael@0: fParent = NULL; michael@0: fLeftChild = NULL; michael@0: fRightChild = NULL; michael@0: fInputSet = other.fInputSet; michael@0: fPrecedence = other.fPrecedence; michael@0: fText = other.fText; michael@0: fFirstPos = other.fFirstPos; michael@0: fLastPos = other.fLastPos; michael@0: fNullable = other.fNullable; michael@0: fVal = other.fVal; michael@0: UErrorCode status = U_ZERO_ERROR; michael@0: fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere michael@0: fLastPosSet = new UVector(status); michael@0: fFollowPos = new UVector(status); michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------- michael@0: // michael@0: // Destructor. Deletes both this node AND any child nodes, michael@0: // except in the case of variable reference nodes. For michael@0: // these, the l. child points back to the definition, which michael@0: // is common for all references to the variable, meaning michael@0: // it can't be deleted here. michael@0: // michael@0: //------------------------------------------------------------------------- michael@0: RBBINode::~RBBINode() { michael@0: // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); michael@0: delete fInputSet; michael@0: fInputSet = NULL; michael@0: michael@0: switch (this->fType) { michael@0: case varRef: michael@0: case setRef: michael@0: // for these node types, multiple instances point to the same "children" michael@0: // Storage ownership of children handled elsewhere. Don't delete here. michael@0: break; michael@0: michael@0: default: michael@0: delete fLeftChild; michael@0: fLeftChild = NULL; michael@0: delete fRightChild; michael@0: fRightChild = NULL; michael@0: } michael@0: michael@0: michael@0: delete fFirstPosSet; michael@0: delete fLastPosSet; michael@0: delete fFollowPos; michael@0: michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------- michael@0: // michael@0: // cloneTree Make a copy of the subtree rooted at this node. michael@0: // Discard any variable references encountered along the way, michael@0: // and replace with copies of the variable's definitions. michael@0: // Used to replicate the expression underneath variable michael@0: // references in preparation for generating the DFA tables. michael@0: // michael@0: //------------------------------------------------------------------------- michael@0: RBBINode *RBBINode::cloneTree() { michael@0: RBBINode *n; michael@0: michael@0: if (fType == RBBINode::varRef) { michael@0: // If the current node is a variable reference, skip over it michael@0: // and clone the definition of the variable instead. michael@0: n = fLeftChild->cloneTree(); michael@0: } else if (fType == RBBINode::uset) { michael@0: n = this; michael@0: } else { michael@0: n = new RBBINode(*this); michael@0: // Check for null pointer. michael@0: if (n != NULL) { michael@0: if (fLeftChild != NULL) { michael@0: n->fLeftChild = fLeftChild->cloneTree(); michael@0: n->fLeftChild->fParent = n; michael@0: } michael@0: if (fRightChild != NULL) { michael@0: n->fRightChild = fRightChild->cloneTree(); michael@0: n->fRightChild->fParent = n; michael@0: } michael@0: } michael@0: } michael@0: return n; michael@0: } michael@0: michael@0: michael@0: michael@0: //------------------------------------------------------------------------- michael@0: // michael@0: // flattenVariables Walk a parse tree, replacing any variable michael@0: // references with a copy of the variable's definition. michael@0: // Aside from variables, the tree is not changed. michael@0: // michael@0: // Return the root of the tree. If the root was not a variable michael@0: // reference, it remains unchanged - the root we started with michael@0: // is the root we return. If, however, the root was a variable michael@0: // reference, the root of the newly cloned replacement tree will michael@0: // be returned, and the original tree deleted. michael@0: // michael@0: // This function works by recursively walking the tree michael@0: // without doing anything until a variable reference is michael@0: // found, then calling cloneTree() at that point. Any michael@0: // nested references are handled by cloneTree(), not here. michael@0: // michael@0: //------------------------------------------------------------------------- michael@0: RBBINode *RBBINode::flattenVariables() { michael@0: if (fType == varRef) { michael@0: RBBINode *retNode = fLeftChild->cloneTree(); michael@0: delete this; michael@0: return retNode; michael@0: } michael@0: michael@0: if (fLeftChild != NULL) { michael@0: fLeftChild = fLeftChild->flattenVariables(); michael@0: fLeftChild->fParent = this; michael@0: } michael@0: if (fRightChild != NULL) { michael@0: fRightChild = fRightChild->flattenVariables(); michael@0: fRightChild->fParent = this; michael@0: } michael@0: return this; michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------- michael@0: // michael@0: // flattenSets Walk the parse tree, replacing any nodes of type setRef michael@0: // with a copy of the expression tree for the set. A set's michael@0: // equivalent expression tree is precomputed and saved as michael@0: // the left child of the uset node. michael@0: // michael@0: //------------------------------------------------------------------------- michael@0: void RBBINode::flattenSets() { michael@0: U_ASSERT(fType != setRef); michael@0: michael@0: if (fLeftChild != NULL) { michael@0: if (fLeftChild->fType==setRef) { michael@0: RBBINode *setRefNode = fLeftChild; michael@0: RBBINode *usetNode = setRefNode->fLeftChild; michael@0: RBBINode *replTree = usetNode->fLeftChild; michael@0: fLeftChild = replTree->cloneTree(); michael@0: fLeftChild->fParent = this; michael@0: delete setRefNode; michael@0: } else { michael@0: fLeftChild->flattenSets(); michael@0: } michael@0: } michael@0: michael@0: if (fRightChild != NULL) { michael@0: if (fRightChild->fType==setRef) { michael@0: RBBINode *setRefNode = fRightChild; michael@0: RBBINode *usetNode = setRefNode->fLeftChild; michael@0: RBBINode *replTree = usetNode->fLeftChild; michael@0: fRightChild = replTree->cloneTree(); michael@0: fRightChild->fParent = this; michael@0: delete setRefNode; michael@0: } else { michael@0: fRightChild->flattenSets(); michael@0: } michael@0: } michael@0: } michael@0: michael@0: michael@0: michael@0: //------------------------------------------------------------------------- michael@0: // michael@0: // findNodes() Locate all the nodes of the specified type, starting michael@0: // at the specified root. michael@0: // michael@0: //------------------------------------------------------------------------- michael@0: void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { michael@0: /* test for buffer overflows */ michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: if (fType == kind) { michael@0: dest->addElement(this, status); michael@0: } michael@0: if (fLeftChild != NULL) { michael@0: fLeftChild->findNodes(dest, kind, status); michael@0: } michael@0: if (fRightChild != NULL) { michael@0: fRightChild->findNodes(dest, kind, status); michael@0: } michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------- michael@0: // michael@0: // print. Print out a single node, for debugging. michael@0: // michael@0: //------------------------------------------------------------------------- michael@0: #ifdef RBBI_DEBUG michael@0: void RBBINode::printNode() { michael@0: static const char * const nodeTypeNames[] = { michael@0: "setRef", michael@0: "uset", michael@0: "varRef", michael@0: "leafChar", michael@0: "lookAhead", michael@0: "tag", michael@0: "endMark", michael@0: "opStart", michael@0: "opCat", michael@0: "opOr", michael@0: "opStar", michael@0: "opPlus", michael@0: "opQuestion", michael@0: "opBreak", michael@0: "opReverse", michael@0: "opLParen" michael@0: }; michael@0: michael@0: if (this==NULL) { michael@0: RBBIDebugPrintf("%10p", (void *)this); michael@0: } else { michael@0: RBBIDebugPrintf("%10p %12s %10p %10p %10p %4d %6d %d ", michael@0: (void *)this, nodeTypeNames[fType], (void *)fParent, (void *)fLeftChild, (void *)fRightChild, michael@0: fSerialNum, fFirstPos, fVal); michael@0: if (fType == varRef) { michael@0: RBBI_DEBUG_printUnicodeString(fText); michael@0: } michael@0: } michael@0: RBBIDebugPrintf("\n"); michael@0: } michael@0: #endif michael@0: michael@0: michael@0: #ifdef RBBI_DEBUG michael@0: U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) michael@0: { michael@0: int i; michael@0: for (i=0; iprintNode(); michael@0: if (this != NULL) { michael@0: // Only dump the definition under a variable reference if asked to. michael@0: // Unconditinally dump children of all other node types. michael@0: if (fType != varRef) { michael@0: if (fLeftChild != NULL) { michael@0: fLeftChild->printTree(FALSE); michael@0: } michael@0: michael@0: if (fRightChild != NULL) { michael@0: fRightChild->printTree(FALSE); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_BREAK_ITERATION */