intl/icu/source/common/rbbisetb.cpp

Wed, 31 Dec 2014 07:22:50 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:22:50 +0100
branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
permissions
-rw-r--r--

Correct previous dual key logic pending first delivery installment.

     1 //
     2 //  rbbisetb.cpp
     3 //
     4 /*
     5 ***************************************************************************
     6 *   Copyright (C) 2002-2008 International Business Machines Corporation   *
     7 *   and others. All rights reserved.                                      *
     8 ***************************************************************************
     9 */
    10 //
    11 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules
    12 //                   (part of the rule building process.)
    13 //
    14 //      Starting with the rules parse tree from the scanner,
    15 //
    16 //                   -  Enumerate the set of UnicodeSets that are referenced
    17 //                      by the RBBI rules.
    18 //                   -  compute a set of non-overlapping character ranges
    19 //                      with all characters within a range belonging to the same
    20 //                      set of input uniocde sets.
    21 //                   -  Derive a set of non-overlapping UnicodeSet (like things)
    22 //                      that will correspond to columns in the state table for
    23 //                      the RBBI execution engine.  All characters within one
    24 //                      of these sets belong to the same set of the original
    25 //                      UnicodeSets from the user's rules.
    26 //                   -  construct the trie table that maps input characters
    27 //                      to the index of the matching non-overlapping set of set from
    28 //                      the previous step.
    29 //
    31 #include "unicode/utypes.h"
    33 #if !UCONFIG_NO_BREAK_ITERATION
    35 #include "unicode/uniset.h"
    36 #include "utrie.h"
    37 #include "uvector.h"
    38 #include "uassert.h"
    39 #include "cmemory.h"
    40 #include "cstring.h"
    42 #include "rbbisetb.h"
    43 #include "rbbinode.h"
    46 //------------------------------------------------------------------------
    47 //
    48 //   getFoldedRBBIValue        Call-back function used during building of Trie table.
    49 //                             Folding value: just store the offset (16 bits)
    50 //                             if there is any non-0 entry.
    51 //                             (It'd really be nice if the Trie builder would provide a
    52 //                             simple default, so this function could go away from here.)
    53 //
    54 //------------------------------------------------------------------------
    55 /* folding value: just store the offset (16 bits) if there is any non-0 entry */
    56 U_CDECL_BEGIN
    57 static uint32_t U_CALLCONV
    58 getFoldedRBBIValue(UNewTrie *trie, UChar32 start, int32_t offset) {
    59     uint32_t value;
    60     UChar32 limit;
    61     UBool inBlockZero;
    63     limit=start+0x400;
    64     while(start<limit) {
    65         value=utrie_get32(trie, start, &inBlockZero);
    66         if(inBlockZero) {
    67             start+=UTRIE_DATA_BLOCK_LENGTH;
    68         } else if(value!=0) {
    69             return (uint32_t)(offset|0x8000);
    70         } else {
    71             ++start;
    72         }
    73     }
    74     return 0;
    75 }
    78 U_CDECL_END
    82 U_NAMESPACE_BEGIN
    84 //------------------------------------------------------------------------
    85 //
    86 //   Constructor
    87 //
    88 //------------------------------------------------------------------------
    89 RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb)
    90 {
    91     fRB             = rb;
    92     fStatus         = rb->fStatus;
    93     fRangeList      = 0;
    94     fTrie           = 0;
    95     fTrieSize       = 0;
    96     fGroupCount     = 0;
    97     fSawBOF         = FALSE;
    98 }
   101 //------------------------------------------------------------------------
   102 //
   103 //   Destructor
   104 //
   105 //------------------------------------------------------------------------
   106 RBBISetBuilder::~RBBISetBuilder()
   107 {
   108     RangeDescriptor   *nextRangeDesc;
   110     // Walk through & delete the linked list of RangeDescriptors
   111     for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) {
   112         RangeDescriptor *r = nextRangeDesc;
   113         nextRangeDesc      = r->fNext;
   114         delete r;
   115     }
   117     utrie_close(fTrie);
   118 }
   123 //------------------------------------------------------------------------
   124 //
   125 //   build          Build the list of non-overlapping character ranges
   126 //                  from the Unicode Sets.
   127 //
   128 //------------------------------------------------------------------------
   129 void RBBISetBuilder::build() {
   130     RBBINode        *usetNode;
   131     RangeDescriptor *rlRange;
   133     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();}
   135     //
   136     //  Initialize the process by creating a single range encompassing all characters
   137     //  that is in no sets.
   138     //
   139     fRangeList                = new RangeDescriptor(*fStatus); // will check for status here
   140     if (fRangeList == NULL) {
   141         *fStatus = U_MEMORY_ALLOCATION_ERROR;
   142         return;
   143     }
   144     fRangeList->fStartChar    = 0;
   145     fRangeList->fEndChar      = 0x10ffff;
   147     if (U_FAILURE(*fStatus)) {
   148         return;
   149     }
   151     //
   152     //  Find the set of non-overlapping ranges of characters
   153     //
   154     int  ni;
   155     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
   156         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
   157         if (usetNode==NULL) {
   158             break;
   159         }
   161         UnicodeSet      *inputSet             = usetNode->fInputSet;
   162         int32_t          inputSetRangeCount   = inputSet->getRangeCount();
   163         int              inputSetRangeIndex   = 0;
   164                          rlRange              = fRangeList;
   166         for (;;) {
   167             if (inputSetRangeIndex >= inputSetRangeCount) {
   168                 break;
   169             }
   170             UChar32      inputSetRangeBegin  = inputSet->getRangeStart(inputSetRangeIndex);
   171             UChar32      inputSetRangeEnd    = inputSet->getRangeEnd(inputSetRangeIndex);
   173             // skip over ranges from the range list that are completely
   174             //   below the current range from the input unicode set.
   175             while (rlRange->fEndChar < inputSetRangeBegin) {
   176                 rlRange = rlRange->fNext;
   177             }
   179             // If the start of the range from the range list is before with
   180             //   the start of the range from the unicode set, split the range list range
   181             //   in two, with one part being before (wholly outside of) the unicode set
   182             //   and the other containing the rest.
   183             //   Then continue the loop; the post-split current range will then be skipped
   184             //     over
   185             if (rlRange->fStartChar < inputSetRangeBegin) {
   186                 rlRange->split(inputSetRangeBegin, *fStatus);
   187                 if (U_FAILURE(*fStatus)) {
   188                     return;
   189                 }
   190                 continue;
   191             }
   193             // Same thing at the end of the ranges...
   194             // If the end of the range from the range list doesn't coincide with
   195             //   the end of the range from the unicode set, split the range list
   196             //   range in two.  The first part of the split range will be
   197             //   wholly inside the Unicode set.
   198             if (rlRange->fEndChar > inputSetRangeEnd) {
   199                 rlRange->split(inputSetRangeEnd+1, *fStatus);
   200                 if (U_FAILURE(*fStatus)) {
   201                     return;
   202                 }
   203             }
   205             // The current rlRange is now entirely within the UnicodeSet range.
   206             // Add this unicode set to the list of sets for this rlRange
   207             if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
   208                 rlRange->fIncludesSets->addElement(usetNode, *fStatus);
   209                 if (U_FAILURE(*fStatus)) {
   210                     return;
   211                 }
   212             }
   214             // Advance over ranges that we are finished with.
   215             if (inputSetRangeEnd == rlRange->fEndChar) {
   216                 inputSetRangeIndex++;
   217             }
   218             rlRange = rlRange->fNext;
   219         }
   220     }
   222     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
   224     //
   225     //  Group the above ranges, with each group consisting of one or more
   226     //    ranges that are in exactly the same set of original UnicodeSets.
   227     //    The groups are numbered, and these group numbers are the set of
   228     //    input symbols recognized by the run-time state machine.
   229     //
   230     //    Numbering: # 0  (state table column 0) is unused.
   231     //               # 1  is reserved - table column 1 is for end-of-input
   232     //               # 2  is reserved - table column 2 is for beginning-in-input
   233     //               # 3  is the first range list.
   234     //
   235     RangeDescriptor *rlSearchRange;
   236     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
   237         for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
   238             if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
   239                 rlRange->fNum = rlSearchRange->fNum;
   240                 break;
   241             }
   242         }
   243         if (rlRange->fNum == 0) {
   244             fGroupCount ++;
   245             rlRange->fNum = fGroupCount+2; 
   246             rlRange->setDictionaryFlag();
   247             addValToSets(rlRange->fIncludesSets, fGroupCount+2);
   248         }
   249     }
   251     // Handle input sets that contain the special string {eof}.
   252     //   Column 1 of the state table is reserved for EOF on input.
   253     //   Column 2 is reserved for before-the-start-input.
   254     //            (This column can be optimized away later if there are no rule
   255     //             references to {bof}.)
   256     //   Add this column value (1 or 2) to the equivalent expression
   257     //     subtree for each UnicodeSet that contains the string {eof}
   258     //   Because {bof} and {eof} are not a characters in the normal sense,
   259     //   they doesn't affect the computation of ranges or TRIE.
   260     static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
   261     static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};
   263     UnicodeString eofString(eofUString);
   264     UnicodeString bofString(bofUString);
   265     for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
   266         usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
   267         if (usetNode==NULL) {
   268             break;
   269         }
   270         UnicodeSet      *inputSet = usetNode->fInputSet;
   271         if (inputSet->contains(eofString)) {
   272             addValToSet(usetNode, 1);
   273         }
   274         if (inputSet->contains(bofString)) {
   275             addValToSet(usetNode, 2);
   276             fSawBOF = TRUE;
   277         }
   278     }
   281     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
   282     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
   284     //
   285     // Build the Trie table for mapping UChar32 values to the corresponding
   286     //   range group number
   287     //
   288     fTrie = utrie_open(NULL,    //  Pre-existing trie to be filled in
   289                       NULL,    //  Data array  (utrie will allocate one)
   290                       100000,  //  Max Data Length
   291                       0,       //  Initial value for all code points
   292                       0,       //  Lead surrogate unit value
   293                       TRUE);   //  Keep Latin 1 in separately
   296     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
   297         utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
   298     }
   299 }
   303 //-----------------------------------------------------------------------------------
   304 //
   305 //  getTrieSize()    Return the size that will be required to serialize the Trie.
   306 //
   307 //-----------------------------------------------------------------------------------
   308 int32_t RBBISetBuilder::getTrieSize() /*const*/ {
   309     fTrieSize  = utrie_serialize(fTrie,
   310                                     NULL,                // Buffer
   311                                     0,                   // Capacity
   312                                     getFoldedRBBIValue,
   313                                     TRUE,                // Reduce to 16 bits
   314                                     fStatus);
   315     // RBBIDebugPrintf("Trie table size is %d\n", trieSize);
   316     return fTrieSize;
   317 }
   320 //-----------------------------------------------------------------------------------
   321 //
   322 //  serializeTrie()   Put the serialized trie at the specified address.
   323 //                    Trust the caller to have given us enough memory.
   324 //                    getTrieSize() MUST be called first.
   325 //
   326 //-----------------------------------------------------------------------------------
   327 void RBBISetBuilder::serializeTrie(uint8_t *where) {
   328     utrie_serialize(fTrie,
   329                     where,                   // Buffer
   330                     fTrieSize,               // Capacity
   331                     getFoldedRBBIValue,
   332                     TRUE,                    // Reduce to 16 bits
   333                     fStatus);
   334 }
   336 //------------------------------------------------------------------------
   337 //
   338 //  addValToSets     Add a runtime-mapped input value to each uset from a
   339 //                   list of uset nodes. (val corresponds to a state table column.)
   340 //                   For each of the original Unicode sets - which correspond
   341 //                   directly to uset nodes - a logically equivalent expression
   342 //                   is constructed in terms of the remapped runtime input
   343 //                   symbol set.  This function adds one runtime input symbol to
   344 //                   a list of sets.
   345 //
   346 //                   The "logically equivalent expression" is the tree for an
   347 //                   or-ing together of all of the symbols that go into the set.
   348 //
   349 //------------------------------------------------------------------------
   350 void  RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) {
   351     int32_t       ix;
   353     for (ix=0; ix<sets->size(); ix++) {
   354         RBBINode *usetNode = (RBBINode *)sets->elementAt(ix);
   355         addValToSet(usetNode, val);
   356     }
   357 }
   359 void  RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) {
   360     RBBINode *leafNode = new RBBINode(RBBINode::leafChar);
   361     if (leafNode == NULL) {
   362         *fStatus = U_MEMORY_ALLOCATION_ERROR;
   363         return;
   364     }
   365     leafNode->fVal = (unsigned short)val;
   366     if (usetNode->fLeftChild == NULL) {
   367         usetNode->fLeftChild = leafNode;
   368         leafNode->fParent    = usetNode;
   369     } else {
   370         // There are already input symbols present for this set.
   371         // Set up an OR node, with the previous stuff as the left child
   372         //   and the new value as the right child.
   373         RBBINode *orNode = new RBBINode(RBBINode::opOr);
   374         if (orNode == NULL) {
   375             *fStatus = U_MEMORY_ALLOCATION_ERROR;
   376             return;
   377         }
   378         orNode->fLeftChild  = usetNode->fLeftChild;
   379         orNode->fRightChild = leafNode;
   380         orNode->fLeftChild->fParent  = orNode;
   381         orNode->fRightChild->fParent = orNode;
   382         usetNode->fLeftChild = orNode;
   383         orNode->fParent = usetNode;
   384     }
   385 }
   388 //------------------------------------------------------------------------
   389 //
   390 //   getNumCharCategories
   391 //
   392 //------------------------------------------------------------------------
   393 int32_t  RBBISetBuilder::getNumCharCategories() const {
   394     return fGroupCount + 3;
   395 }
   398 //------------------------------------------------------------------------
   399 //
   400 //   sawBOF
   401 //
   402 //------------------------------------------------------------------------
   403 UBool  RBBISetBuilder::sawBOF() const {
   404     return fSawBOF;
   405 }
   408 //------------------------------------------------------------------------
   409 //
   410 //   getFirstChar      Given a runtime RBBI character category, find
   411 //                     the first UChar32 that is in the set of chars 
   412 //                     in the category.
   413 //------------------------------------------------------------------------
   414 UChar32  RBBISetBuilder::getFirstChar(int32_t category) const {
   415     RangeDescriptor   *rlRange;
   416     UChar32            retVal = (UChar32)-1;
   417     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
   418         if (rlRange->fNum == category) {
   419             retVal = rlRange->fStartChar;
   420             break;
   421         }
   422     }
   423     return retVal;
   424 }
   428 //------------------------------------------------------------------------
   429 //
   430 //   printRanges        A debugging function.
   431 //                      dump out all of the range definitions.
   432 //
   433 //------------------------------------------------------------------------
   434 #ifdef RBBI_DEBUG
   435 void RBBISetBuilder::printRanges() {
   436     RangeDescriptor       *rlRange;
   437     int                    i;
   439     RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n");
   440     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
   441         RBBIDebugPrintf("%2i  %4x-%4x  ", rlRange->fNum, rlRange->fStartChar, rlRange->fEndChar);
   443         for (i=0; i<rlRange->fIncludesSets->size(); i++) {
   444             RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
   445             UnicodeString   setName = UNICODE_STRING("anon", 4);
   446             RBBINode       *setRef = usetNode->fParent;
   447             if (setRef != NULL) {
   448                 RBBINode *varRef = setRef->fParent;
   449                 if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
   450                     setName = varRef->fText;
   451                 }
   452             }
   453             RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf("  ");
   454         }
   455         RBBIDebugPrintf("\n");
   456     }
   457 }
   458 #endif
   461 //------------------------------------------------------------------------
   462 //
   463 //   printRangeGroups     A debugging function.
   464 //                        dump out all of the range groups.
   465 //
   466 //------------------------------------------------------------------------
   467 #ifdef RBBI_DEBUG
   468 void RBBISetBuilder::printRangeGroups() {
   469     RangeDescriptor       *rlRange;
   470     RangeDescriptor       *tRange;
   471     int                    i;
   472     int                    lastPrintedGroupNum = 0;
   474     RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n");
   475     for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
   476         int groupNum = rlRange->fNum & 0xbfff;
   477         if (groupNum > lastPrintedGroupNum) {
   478             lastPrintedGroupNum = groupNum;
   479             RBBIDebugPrintf("%2i  ", groupNum);
   481             if (rlRange->fNum & 0x4000) { RBBIDebugPrintf(" <DICT> ");}
   483             for (i=0; i<rlRange->fIncludesSets->size(); i++) {
   484                 RBBINode       *usetNode    = (RBBINode *)rlRange->fIncludesSets->elementAt(i);
   485                 UnicodeString   setName = UNICODE_STRING("anon", 4);
   486                 RBBINode       *setRef = usetNode->fParent;
   487                 if (setRef != NULL) {
   488                     RBBINode *varRef = setRef->fParent;
   489                     if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
   490                         setName = varRef->fText;
   491                     }
   492                 }
   493                 RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" ");
   494             }
   496             i = 0;
   497             for (tRange = rlRange; tRange != 0; tRange = tRange->fNext) {
   498                 if (tRange->fNum == rlRange->fNum) {
   499                     if (i++ % 5 == 0) {
   500                         RBBIDebugPrintf("\n    ");
   501                     }
   502                     RBBIDebugPrintf("  %05x-%05x", tRange->fStartChar, tRange->fEndChar);
   503                 }
   504             }
   505             RBBIDebugPrintf("\n");
   506         }
   507     }
   508     RBBIDebugPrintf("\n");
   509 }
   510 #endif
   513 //------------------------------------------------------------------------
   514 //
   515 //   printSets          A debugging function.
   516 //                      dump out all of the set definitions.
   517 //
   518 //------------------------------------------------------------------------
   519 #ifdef RBBI_DEBUG
   520 void RBBISetBuilder::printSets() {
   521     int                   i;
   523     RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n");
   524     for (i=0; ; i++) {
   525         RBBINode        *usetNode;
   526         RBBINode        *setRef;
   527         RBBINode        *varRef;
   528         UnicodeString    setName;
   530         usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i);
   531         if (usetNode == NULL) {
   532             break;
   533         }
   535         RBBIDebugPrintf("%3d    ", i);
   536         setName = UNICODE_STRING("anonymous", 9);
   537         setRef = usetNode->fParent;
   538         if (setRef != NULL) {
   539             varRef = setRef->fParent;
   540             if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
   541                 setName = varRef->fText;
   542             }
   543         }
   544         RBBI_DEBUG_printUnicodeString(setName);
   545         RBBIDebugPrintf("   ");
   546         RBBI_DEBUG_printUnicodeString(usetNode->fText);
   547         RBBIDebugPrintf("\n");
   548         if (usetNode->fLeftChild != NULL) {
   549             usetNode->fLeftChild->printTree(TRUE);
   550         }
   551     }
   552     RBBIDebugPrintf("\n");
   553 }
   554 #endif
   558 //-------------------------------------------------------------------------------------
   559 //
   560 //  RangeDescriptor copy constructor
   561 //
   562 //-------------------------------------------------------------------------------------
   564 RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) {
   565     int  i;
   567     this->fStartChar    = other.fStartChar;
   568     this->fEndChar      = other.fEndChar;
   569     this->fNum          = other.fNum;
   570     this->fNext         = NULL;
   571     UErrorCode oldstatus = status;
   572     this->fIncludesSets = new UVector(status);
   573     if (U_FAILURE(oldstatus)) {
   574         status = oldstatus;
   575     }
   576     if (U_FAILURE(status)) {
   577         return;
   578     }
   579     /* test for NULL */
   580     if (this->fIncludesSets == 0) {
   581         status = U_MEMORY_ALLOCATION_ERROR;
   582         return;
   583     }
   585     for (i=0; i<other.fIncludesSets->size(); i++) {
   586         this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status);
   587     }
   588 }
   591 //-------------------------------------------------------------------------------------
   592 //
   593 //  RangeDesriptor default constructor
   594 //
   595 //-------------------------------------------------------------------------------------
   596 RangeDescriptor::RangeDescriptor(UErrorCode &status) {
   597     this->fStartChar    = 0;
   598     this->fEndChar      = 0;
   599     this->fNum          = 0;
   600     this->fNext         = NULL;
   601     UErrorCode oldstatus = status;
   602     this->fIncludesSets = new UVector(status);
   603     if (U_FAILURE(oldstatus)) {
   604         status = oldstatus;
   605     }
   606     if (U_FAILURE(status)) {
   607         return;
   608     }
   609     /* test for NULL */
   610     if(this->fIncludesSets == 0) {
   611         status = U_MEMORY_ALLOCATION_ERROR;
   612         return;
   613     }
   615 }
   618 //-------------------------------------------------------------------------------------
   619 //
   620 //  RangeDesriptor Destructor
   621 //
   622 //-------------------------------------------------------------------------------------
   623 RangeDescriptor::~RangeDescriptor() {
   624     delete  fIncludesSets;
   625     fIncludesSets = NULL;
   626 }
   628 //-------------------------------------------------------------------------------------
   629 //
   630 //  RangeDesriptor::split()
   631 //
   632 //-------------------------------------------------------------------------------------
   633 void RangeDescriptor::split(UChar32 where, UErrorCode &status) {
   634     U_ASSERT(where>fStartChar && where<=fEndChar);
   635     RangeDescriptor *nr = new RangeDescriptor(*this, status);
   636     if(nr == 0) {
   637         status = U_MEMORY_ALLOCATION_ERROR;
   638         return;
   639     }
   640     if (U_FAILURE(status)) {
   641         delete nr;
   642         return;
   643     }
   644     //  RangeDescriptor copy constructor copies all fields.
   645     //  Only need to update those that are different after the split.
   646     nr->fStartChar = where;
   647     this->fEndChar = where-1;
   648     nr->fNext      = this->fNext;
   649     this->fNext    = nr;
   650 }
   653 //-------------------------------------------------------------------------------------
   654 //
   655 //   RangeDescriptor::setDictionaryFlag
   656 //
   657 //            Character Category Numbers that include characters from
   658 //            the original Unicode Set named "dictionary" have bit 14
   659 //            set to 1.  The RBBI runtime engine uses this to trigger
   660 //            use of the word dictionary.
   661 //
   662 //            This function looks through the Unicode Sets that it
   663 //            (the range) includes, and sets the bit in fNum when
   664 //            "dictionary" is among them.
   665 //
   666 //            TODO:  a faster way would be to find the set node for
   667 //                   "dictionary" just once, rather than looking it
   668 //                   up by name every time.
   669 //
   670 //-------------------------------------------------------------------------------------
   671 void RangeDescriptor::setDictionaryFlag() {
   672     int i;
   674     for (i=0; i<this->fIncludesSets->size(); i++) {
   675         RBBINode       *usetNode    = (RBBINode *)fIncludesSets->elementAt(i);
   676         UnicodeString   setName;
   677         RBBINode       *setRef = usetNode->fParent;
   678         if (setRef != NULL) {
   679             RBBINode *varRef = setRef->fParent;
   680             if (varRef != NULL  &&  varRef->fType == RBBINode::varRef) {
   681                 setName = varRef->fText;
   682             }
   683         }
   684         if (setName.compare(UNICODE_STRING("dictionary", 10)) == 0) {   // TODO:  no string literals.
   685             this->fNum |= 0x4000;
   686             break;
   687         }
   688     }
   689 }
   693 U_NAMESPACE_END
   695 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

mercurial