michael@0: // michael@0: // rbbisetb.cpp michael@0: // michael@0: /* michael@0: *************************************************************************** michael@0: * Copyright (C) 2002-2008 International Business Machines Corporation * michael@0: * and others. All rights reserved. * michael@0: *************************************************************************** michael@0: */ michael@0: // michael@0: // RBBISetBuilder Handles processing of Unicode Sets from RBBI rules michael@0: // (part of the rule building process.) michael@0: // michael@0: // Starting with the rules parse tree from the scanner, michael@0: // michael@0: // - Enumerate the set of UnicodeSets that are referenced michael@0: // by the RBBI rules. michael@0: // - compute a set of non-overlapping character ranges michael@0: // with all characters within a range belonging to the same michael@0: // set of input uniocde sets. michael@0: // - Derive a set of non-overlapping UnicodeSet (like things) michael@0: // that will correspond to columns in the state table for michael@0: // the RBBI execution engine. All characters within one michael@0: // of these sets belong to the same set of the original michael@0: // UnicodeSets from the user's rules. michael@0: // - construct the trie table that maps input characters michael@0: // to the index of the matching non-overlapping set of set from michael@0: // the previous step. michael@0: // michael@0: michael@0: #include "unicode/utypes.h" michael@0: michael@0: #if !UCONFIG_NO_BREAK_ITERATION michael@0: michael@0: #include "unicode/uniset.h" michael@0: #include "utrie.h" michael@0: #include "uvector.h" michael@0: #include "uassert.h" michael@0: #include "cmemory.h" michael@0: #include "cstring.h" michael@0: michael@0: #include "rbbisetb.h" michael@0: #include "rbbinode.h" michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // getFoldedRBBIValue Call-back function used during building of Trie table. michael@0: // Folding value: just store the offset (16 bits) michael@0: // if there is any non-0 entry. michael@0: // (It'd really be nice if the Trie builder would provide a michael@0: // simple default, so this function could go away from here.) michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: /* folding value: just store the offset (16 bits) if there is any non-0 entry */ michael@0: U_CDECL_BEGIN michael@0: static uint32_t U_CALLCONV michael@0: getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) { michael@0: uint32_t value; michael@0: UChar32 limit; michael@0: UBool inBlockZero; michael@0: michael@0: limit=start+0x400; michael@0: while(startfStatus; michael@0: fRangeList = 0; michael@0: fTrie = 0; michael@0: fTrieSize = 0; michael@0: fGroupCount = 0; michael@0: fSawBOF = FALSE; michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // Destructor michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: RBBISetBuilder::~RBBISetBuilder() michael@0: { michael@0: RangeDescriptor *nextRangeDesc; michael@0: michael@0: // Walk through & delete the linked list of RangeDescriptors michael@0: for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) { michael@0: RangeDescriptor *r = nextRangeDesc; michael@0: nextRangeDesc = r->fNext; michael@0: delete r; michael@0: } michael@0: michael@0: utrie_close(fTrie); michael@0: } michael@0: michael@0: michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // build Build the list of non-overlapping character ranges michael@0: // from the Unicode Sets. michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: void RBBISetBuilder::build() { michael@0: RBBINode *usetNode; michael@0: RangeDescriptor *rlRange; michael@0: michael@0: if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();} michael@0: michael@0: // michael@0: // Initialize the process by creating a single range encompassing all characters michael@0: // that is in no sets. michael@0: // michael@0: fRangeList = new RangeDescriptor(*fStatus); // will check for status here michael@0: if (fRangeList == NULL) { michael@0: *fStatus = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: fRangeList->fStartChar = 0; michael@0: fRangeList->fEndChar = 0x10ffff; michael@0: michael@0: if (U_FAILURE(*fStatus)) { michael@0: return; michael@0: } michael@0: michael@0: // michael@0: // Find the set of non-overlapping ranges of characters michael@0: // michael@0: int ni; michael@0: for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules michael@0: usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); michael@0: if (usetNode==NULL) { michael@0: break; michael@0: } michael@0: michael@0: UnicodeSet *inputSet = usetNode->fInputSet; michael@0: int32_t inputSetRangeCount = inputSet->getRangeCount(); michael@0: int inputSetRangeIndex = 0; michael@0: rlRange = fRangeList; michael@0: michael@0: for (;;) { michael@0: if (inputSetRangeIndex >= inputSetRangeCount) { michael@0: break; michael@0: } michael@0: UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex); michael@0: UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex); michael@0: michael@0: // skip over ranges from the range list that are completely michael@0: // below the current range from the input unicode set. michael@0: while (rlRange->fEndChar < inputSetRangeBegin) { michael@0: rlRange = rlRange->fNext; michael@0: } michael@0: michael@0: // If the start of the range from the range list is before with michael@0: // the start of the range from the unicode set, split the range list range michael@0: // in two, with one part being before (wholly outside of) the unicode set michael@0: // and the other containing the rest. michael@0: // Then continue the loop; the post-split current range will then be skipped michael@0: // over michael@0: if (rlRange->fStartChar < inputSetRangeBegin) { michael@0: rlRange->split(inputSetRangeBegin, *fStatus); michael@0: if (U_FAILURE(*fStatus)) { michael@0: return; michael@0: } michael@0: continue; michael@0: } michael@0: michael@0: // Same thing at the end of the ranges... michael@0: // If the end of the range from the range list doesn't coincide with michael@0: // the end of the range from the unicode set, split the range list michael@0: // range in two. The first part of the split range will be michael@0: // wholly inside the Unicode set. michael@0: if (rlRange->fEndChar > inputSetRangeEnd) { michael@0: rlRange->split(inputSetRangeEnd+1, *fStatus); michael@0: if (U_FAILURE(*fStatus)) { michael@0: return; michael@0: } michael@0: } michael@0: michael@0: // The current rlRange is now entirely within the UnicodeSet range. michael@0: // Add this unicode set to the list of sets for this rlRange michael@0: if (rlRange->fIncludesSets->indexOf(usetNode) == -1) { michael@0: rlRange->fIncludesSets->addElement(usetNode, *fStatus); michael@0: if (U_FAILURE(*fStatus)) { michael@0: return; michael@0: } michael@0: } michael@0: michael@0: // Advance over ranges that we are finished with. michael@0: if (inputSetRangeEnd == rlRange->fEndChar) { michael@0: inputSetRangeIndex++; michael@0: } michael@0: rlRange = rlRange->fNext; michael@0: } michael@0: } michael@0: michael@0: if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();} michael@0: michael@0: // michael@0: // Group the above ranges, with each group consisting of one or more michael@0: // ranges that are in exactly the same set of original UnicodeSets. michael@0: // The groups are numbered, and these group numbers are the set of michael@0: // input symbols recognized by the run-time state machine. michael@0: // michael@0: // Numbering: # 0 (state table column 0) is unused. michael@0: // # 1 is reserved - table column 1 is for end-of-input michael@0: // # 2 is reserved - table column 2 is for beginning-in-input michael@0: // # 3 is the first range list. michael@0: // michael@0: RangeDescriptor *rlSearchRange; michael@0: for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { michael@0: for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) { michael@0: if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) { michael@0: rlRange->fNum = rlSearchRange->fNum; michael@0: break; michael@0: } michael@0: } michael@0: if (rlRange->fNum == 0) { michael@0: fGroupCount ++; michael@0: rlRange->fNum = fGroupCount+2; michael@0: rlRange->setDictionaryFlag(); michael@0: addValToSets(rlRange->fIncludesSets, fGroupCount+2); michael@0: } michael@0: } michael@0: michael@0: // Handle input sets that contain the special string {eof}. michael@0: // Column 1 of the state table is reserved for EOF on input. michael@0: // Column 2 is reserved for before-the-start-input. michael@0: // (This column can be optimized away later if there are no rule michael@0: // references to {bof}.) michael@0: // Add this column value (1 or 2) to the equivalent expression michael@0: // subtree for each UnicodeSet that contains the string {eof} michael@0: // Because {bof} and {eof} are not a characters in the normal sense, michael@0: // they doesn't affect the computation of ranges or TRIE. michael@0: static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0}; michael@0: static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0}; michael@0: michael@0: UnicodeString eofString(eofUString); michael@0: UnicodeString bofString(bofUString); michael@0: for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules michael@0: usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); michael@0: if (usetNode==NULL) { michael@0: break; michael@0: } michael@0: UnicodeSet *inputSet = usetNode->fInputSet; michael@0: if (inputSet->contains(eofString)) { michael@0: addValToSet(usetNode, 1); michael@0: } michael@0: if (inputSet->contains(bofString)) { michael@0: addValToSet(usetNode, 2); michael@0: fSawBOF = TRUE; michael@0: } michael@0: } michael@0: michael@0: michael@0: if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();} michael@0: if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();} michael@0: michael@0: // michael@0: // Build the Trie table for mapping UChar32 values to the corresponding michael@0: // range group number michael@0: // michael@0: fTrie = utrie_open(NULL, // Pre-existing trie to be filled in michael@0: NULL, // Data array (utrie will allocate one) michael@0: 100000, // Max Data Length michael@0: 0, // Initial value for all code points michael@0: 0, // Lead surrogate unit value michael@0: TRUE); // Keep Latin 1 in separately michael@0: michael@0: michael@0: for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { michael@0: utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE); michael@0: } michael@0: } michael@0: michael@0: michael@0: michael@0: //----------------------------------------------------------------------------------- michael@0: // michael@0: // getTrieSize() Return the size that will be required to serialize the Trie. michael@0: // michael@0: //----------------------------------------------------------------------------------- michael@0: int32_t RBBISetBuilder::getTrieSize() /*const*/ { michael@0: fTrieSize = utrie_serialize(fTrie, michael@0: NULL, // Buffer michael@0: 0, // Capacity michael@0: getFoldedRBBIValue, michael@0: TRUE, // Reduce to 16 bits michael@0: fStatus); michael@0: // RBBIDebugPrintf("Trie table size is %d\n", trieSize); michael@0: return fTrieSize; michael@0: } michael@0: michael@0: michael@0: //----------------------------------------------------------------------------------- michael@0: // michael@0: // serializeTrie() Put the serialized trie at the specified address. michael@0: // Trust the caller to have given us enough memory. michael@0: // getTrieSize() MUST be called first. michael@0: // michael@0: //----------------------------------------------------------------------------------- michael@0: void RBBISetBuilder::serializeTrie(uint8_t *where) { michael@0: utrie_serialize(fTrie, michael@0: where, // Buffer michael@0: fTrieSize, // Capacity michael@0: getFoldedRBBIValue, michael@0: TRUE, // Reduce to 16 bits michael@0: fStatus); michael@0: } michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // addValToSets Add a runtime-mapped input value to each uset from a michael@0: // list of uset nodes. (val corresponds to a state table column.) michael@0: // For each of the original Unicode sets - which correspond michael@0: // directly to uset nodes - a logically equivalent expression michael@0: // is constructed in terms of the remapped runtime input michael@0: // symbol set. This function adds one runtime input symbol to michael@0: // a list of sets. michael@0: // michael@0: // The "logically equivalent expression" is the tree for an michael@0: // or-ing together of all of the symbols that go into the set. michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) { michael@0: int32_t ix; michael@0: michael@0: for (ix=0; ixsize(); ix++) { michael@0: RBBINode *usetNode = (RBBINode *)sets->elementAt(ix); michael@0: addValToSet(usetNode, val); michael@0: } michael@0: } michael@0: michael@0: void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) { michael@0: RBBINode *leafNode = new RBBINode(RBBINode::leafChar); michael@0: if (leafNode == NULL) { michael@0: *fStatus = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: leafNode->fVal = (unsigned short)val; michael@0: if (usetNode->fLeftChild == NULL) { michael@0: usetNode->fLeftChild = leafNode; michael@0: leafNode->fParent = usetNode; michael@0: } else { michael@0: // There are already input symbols present for this set. michael@0: // Set up an OR node, with the previous stuff as the left child michael@0: // and the new value as the right child. michael@0: RBBINode *orNode = new RBBINode(RBBINode::opOr); michael@0: if (orNode == NULL) { michael@0: *fStatus = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: orNode->fLeftChild = usetNode->fLeftChild; michael@0: orNode->fRightChild = leafNode; michael@0: orNode->fLeftChild->fParent = orNode; michael@0: orNode->fRightChild->fParent = orNode; michael@0: usetNode->fLeftChild = orNode; michael@0: orNode->fParent = usetNode; michael@0: } michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // getNumCharCategories michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: int32_t RBBISetBuilder::getNumCharCategories() const { michael@0: return fGroupCount + 3; michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // sawBOF michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: UBool RBBISetBuilder::sawBOF() const { michael@0: return fSawBOF; michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // getFirstChar Given a runtime RBBI character category, find michael@0: // the first UChar32 that is in the set of chars michael@0: // in the category. michael@0: //------------------------------------------------------------------------ michael@0: UChar32 RBBISetBuilder::getFirstChar(int32_t category) const { michael@0: RangeDescriptor *rlRange; michael@0: UChar32 retVal = (UChar32)-1; michael@0: for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { michael@0: if (rlRange->fNum == category) { michael@0: retVal = rlRange->fStartChar; michael@0: break; michael@0: } michael@0: } michael@0: return retVal; michael@0: } michael@0: michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // printRanges A debugging function. michael@0: // dump out all of the range definitions. michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: #ifdef RBBI_DEBUG michael@0: void RBBISetBuilder::printRanges() { michael@0: RangeDescriptor *rlRange; michael@0: int i; michael@0: michael@0: RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n"); michael@0: for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { michael@0: RBBIDebugPrintf("%2i %4x-%4x ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar); michael@0: michael@0: for (i=0; ifIncludesSets->size(); i++) { michael@0: RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); michael@0: UnicodeString setName = UNICODE_STRING("anon", 4); michael@0: RBBINode *setRef = usetNode->fParent; michael@0: if (setRef != NULL) { michael@0: RBBINode *varRef = setRef->fParent; michael@0: if (varRef != NULL && varRef->fType == RBBINode::varRef) { michael@0: setName = varRef->fText; michael@0: } michael@0: } michael@0: RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" "); michael@0: } michael@0: RBBIDebugPrintf("\n"); michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // printRangeGroups A debugging function. michael@0: // dump out all of the range groups. michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: #ifdef RBBI_DEBUG michael@0: void RBBISetBuilder::printRangeGroups() { michael@0: RangeDescriptor *rlRange; michael@0: RangeDescriptor *tRange; michael@0: int i; michael@0: int lastPrintedGroupNum = 0; michael@0: michael@0: RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n"); michael@0: for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) { michael@0: int groupNum = rlRange->fNum & 0xbfff; michael@0: if (groupNum > lastPrintedGroupNum) { michael@0: lastPrintedGroupNum = groupNum; michael@0: RBBIDebugPrintf("%2i ", groupNum); michael@0: michael@0: if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" ");} michael@0: michael@0: for (i=0; ifIncludesSets->size(); i++) { michael@0: RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); michael@0: UnicodeString setName = UNICODE_STRING("anon", 4); michael@0: RBBINode *setRef = usetNode->fParent; michael@0: if (setRef != NULL) { michael@0: RBBINode *varRef = setRef->fParent; michael@0: if (varRef != NULL && varRef->fType == RBBINode::varRef) { michael@0: setName = varRef->fText; michael@0: } michael@0: } michael@0: RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" "); michael@0: } michael@0: michael@0: i = 0; michael@0: for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) { michael@0: if (tRange->fNum == rlRange->fNum) { michael@0: if (i++ % 5 == 0) { michael@0: RBBIDebugPrintf("\n "); michael@0: } michael@0: RBBIDebugPrintf(" %05x-%05x", tRange->fStartChar, tRange->fEndChar); michael@0: } michael@0: } michael@0: RBBIDebugPrintf("\n"); michael@0: } michael@0: } michael@0: RBBIDebugPrintf("\n"); michael@0: } michael@0: #endif michael@0: michael@0: michael@0: //------------------------------------------------------------------------ michael@0: // michael@0: // printSets A debugging function. michael@0: // dump out all of the set definitions. michael@0: // michael@0: //------------------------------------------------------------------------ michael@0: #ifdef RBBI_DEBUG michael@0: void RBBISetBuilder::printSets() { michael@0: int i; michael@0: michael@0: RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n"); michael@0: for (i=0; ; i++) { michael@0: RBBINode *usetNode; michael@0: RBBINode *setRef; michael@0: RBBINode *varRef; michael@0: UnicodeString setName; michael@0: michael@0: usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i); michael@0: if (usetNode == NULL) { michael@0: break; michael@0: } michael@0: michael@0: RBBIDebugPrintf("%3d ", i); michael@0: setName = UNICODE_STRING("anonymous", 9); michael@0: setRef = usetNode->fParent; michael@0: if (setRef != NULL) { michael@0: varRef = setRef->fParent; michael@0: if (varRef != NULL && varRef->fType == RBBINode::varRef) { michael@0: setName = varRef->fText; michael@0: } michael@0: } michael@0: RBBI_DEBUG_printUnicodeString(setName); michael@0: RBBIDebugPrintf(" "); michael@0: RBBI_DEBUG_printUnicodeString(usetNode->fText); michael@0: RBBIDebugPrintf("\n"); michael@0: if (usetNode->fLeftChild != NULL) { michael@0: usetNode->fLeftChild->printTree(TRUE); michael@0: } michael@0: } michael@0: RBBIDebugPrintf("\n"); michael@0: } michael@0: #endif michael@0: michael@0: michael@0: michael@0: //------------------------------------------------------------------------------------- michael@0: // michael@0: // RangeDescriptor copy constructor michael@0: // michael@0: //------------------------------------------------------------------------------------- michael@0: michael@0: RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) { michael@0: int i; michael@0: michael@0: this->fStartChar = other.fStartChar; michael@0: this->fEndChar = other.fEndChar; michael@0: this->fNum = other.fNum; michael@0: this->fNext = NULL; michael@0: UErrorCode oldstatus = status; michael@0: this->fIncludesSets = new UVector(status); michael@0: if (U_FAILURE(oldstatus)) { michael@0: status = oldstatus; michael@0: } michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: /* test for NULL */ michael@0: if (this->fIncludesSets == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: michael@0: for (i=0; isize(); i++) { michael@0: this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status); michael@0: } michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------------------- michael@0: // michael@0: // RangeDesriptor default constructor michael@0: // michael@0: //------------------------------------------------------------------------------------- michael@0: RangeDescriptor::RangeDescriptor(UErrorCode &status) { michael@0: this->fStartChar = 0; michael@0: this->fEndChar = 0; michael@0: this->fNum = 0; michael@0: this->fNext = NULL; michael@0: UErrorCode oldstatus = status; michael@0: this->fIncludesSets = new UVector(status); michael@0: if (U_FAILURE(oldstatus)) { michael@0: status = oldstatus; michael@0: } michael@0: if (U_FAILURE(status)) { michael@0: return; michael@0: } michael@0: /* test for NULL */ michael@0: if(this->fIncludesSets == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------------------- michael@0: // michael@0: // RangeDesriptor Destructor michael@0: // michael@0: //------------------------------------------------------------------------------------- michael@0: RangeDescriptor::~RangeDescriptor() { michael@0: delete fIncludesSets; michael@0: fIncludesSets = NULL; michael@0: } michael@0: michael@0: //------------------------------------------------------------------------------------- michael@0: // michael@0: // RangeDesriptor::split() michael@0: // michael@0: //------------------------------------------------------------------------------------- michael@0: void RangeDescriptor::split(UChar32 where, UErrorCode &status) { michael@0: U_ASSERT(where>fStartChar && where<=fEndChar); michael@0: RangeDescriptor *nr = new RangeDescriptor(*this, status); michael@0: if(nr == 0) { michael@0: status = U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: if (U_FAILURE(status)) { michael@0: delete nr; michael@0: return; michael@0: } michael@0: // RangeDescriptor copy constructor copies all fields. michael@0: // Only need to update those that are different after the split. michael@0: nr->fStartChar = where; michael@0: this->fEndChar = where-1; michael@0: nr->fNext = this->fNext; michael@0: this->fNext = nr; michael@0: } michael@0: michael@0: michael@0: //------------------------------------------------------------------------------------- michael@0: // michael@0: // RangeDescriptor::setDictionaryFlag michael@0: // michael@0: // Character Category Numbers that include characters from michael@0: // the original Unicode Set named "dictionary" have bit 14 michael@0: // set to 1. The RBBI runtime engine uses this to trigger michael@0: // use of the word dictionary. michael@0: // michael@0: // This function looks through the Unicode Sets that it michael@0: // (the range) includes, and sets the bit in fNum when michael@0: // "dictionary" is among them. michael@0: // michael@0: // TODO: a faster way would be to find the set node for michael@0: // "dictionary" just once, rather than looking it michael@0: // up by name every time. michael@0: // michael@0: //------------------------------------------------------------------------------------- michael@0: void RangeDescriptor::setDictionaryFlag() { michael@0: int i; michael@0: michael@0: for (i=0; ifIncludesSets->size(); i++) { michael@0: RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); michael@0: UnicodeString setName; michael@0: RBBINode *setRef = usetNode->fParent; michael@0: if (setRef != NULL) { michael@0: RBBINode *varRef = setRef->fParent; michael@0: if (varRef != NULL && varRef->fType == RBBINode::varRef) { michael@0: setName = varRef->fText; michael@0: } michael@0: } michael@0: if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) { // TODO: no string literals. michael@0: this->fNum |= 0x4000; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: michael@0: michael@0: U_NAMESPACE_END michael@0: michael@0: #endif /* #if !UCONFIG_NO_BREAK_ITERATION */