intl/icu/source/common/ucharstrieiterator.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *   Copyright (C) 2010-2011, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 *   file name:  ucharstrieiterator.h
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   created on: 2010nov15
    12 *   created by: Markus W. Scherer
    13 */
    15 #include "unicode/utypes.h"
    16 #include "unicode/ucharstrie.h"
    17 #include "unicode/unistr.h"
    18 #include "uvectr32.h"
    20 U_NAMESPACE_BEGIN
    22 UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength,
    23                                UErrorCode &errorCode)
    24         : uchars_(trieUChars),
    25           pos_(uchars_), initialPos_(uchars_),
    26           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
    27           skipValue_(FALSE),
    28           maxLength_(maxStringLength), value_(0), stack_(NULL) {
    29     if(U_FAILURE(errorCode)) {
    30         return;
    31     }
    32     // stack_ is a pointer so that it's easy to turn ucharstrie.h into
    33     // a public API header for which we would want it to depend only on
    34     // other public headers.
    35     // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
    36     // via the UnicodeString and UVector32 implementations, so this additional
    37     // cost is minimal.
    38     stack_=new UVector32(errorCode);
    39     if(stack_==NULL) {
    40         errorCode=U_MEMORY_ALLOCATION_ERROR;
    41     }
    42 }
    44 UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
    45                                UErrorCode &errorCode)
    46         : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
    47           remainingMatchLength_(trie.remainingMatchLength_),
    48           initialRemainingMatchLength_(trie.remainingMatchLength_),
    49           skipValue_(FALSE),
    50           maxLength_(maxStringLength), value_(0), stack_(NULL) {
    51     if(U_FAILURE(errorCode)) {
    52         return;
    53     }
    54     stack_=new UVector32(errorCode);
    55     if(U_FAILURE(errorCode)) {
    56         return;
    57     }
    58     if(stack_==NULL) {
    59         errorCode=U_MEMORY_ALLOCATION_ERROR;
    60         return;
    61     }
    62     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
    63     if(length>=0) {
    64         // Pending linear-match node, append remaining UChars to str_.
    65         ++length;
    66         if(maxLength_>0 && length>maxLength_) {
    67             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
    68         }
    69         str_.append(pos_, length);
    70         pos_+=length;
    71         remainingMatchLength_-=length;
    72     }
    73 }
    75 UCharsTrie::Iterator::~Iterator() {
    76     delete stack_;
    77 }
    79 UCharsTrie::Iterator &
    80 UCharsTrie::Iterator::reset() {
    81     pos_=initialPos_;
    82     remainingMatchLength_=initialRemainingMatchLength_;
    83     skipValue_=FALSE;
    84     int32_t length=remainingMatchLength_+1;  // Remaining match length.
    85     if(maxLength_>0 && length>maxLength_) {
    86         length=maxLength_;
    87     }
    88     str_.truncate(length);
    89     pos_+=length;
    90     remainingMatchLength_-=length;
    91     stack_->setSize(0);
    92     return *this;
    93 }
    95 UBool
    96 UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
    98 UBool
    99 UCharsTrie::Iterator::next(UErrorCode &errorCode) {
   100     if(U_FAILURE(errorCode)) {
   101         return FALSE;
   102     }
   103     const UChar *pos=pos_;
   104     if(pos==NULL) {
   105         if(stack_->isEmpty()) {
   106             return FALSE;
   107         }
   108         // Pop the state off the stack and continue with the next outbound edge of
   109         // the branch node.
   110         int32_t stackSize=stack_->size();
   111         int32_t length=stack_->elementAti(stackSize-1);
   112         pos=uchars_+stack_->elementAti(stackSize-2);
   113         stack_->setSize(stackSize-2);
   114         str_.truncate(length&0xffff);
   115         length=(int32_t)((uint32_t)length>>16);
   116         if(length>1) {
   117             pos=branchNext(pos, length, errorCode);
   118             if(pos==NULL) {
   119                 return TRUE;  // Reached a final value.
   120             }
   121         } else {
   122             str_.append(*pos++);
   123         }
   124     }
   125     if(remainingMatchLength_>=0) {
   126         // We only get here if we started in a pending linear-match node
   127         // with more than maxLength remaining units.
   128         return truncateAndStop();
   129     }
   130     for(;;) {
   131         int32_t node=*pos++;
   132         if(node>=kMinValueLead) {
   133             if(skipValue_) {
   134                 pos=skipNodeValue(pos, node);
   135                 node&=kNodeTypeMask;
   136                 skipValue_=FALSE;
   137             } else {
   138                 // Deliver value for the string so far.
   139                 UBool isFinal=(UBool)(node>>15);
   140                 if(isFinal) {
   141                     value_=readValue(pos, node&0x7fff);
   142                 } else {
   143                     value_=readNodeValue(pos, node);
   144                 }
   145                 if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
   146                     pos_=NULL;
   147                 } else {
   148                     // We cannot skip the value right here because it shares its
   149                     // lead unit with a match node which we have to evaluate
   150                     // next time.
   151                     // Instead, keep pos_ on the node lead unit itself.
   152                     pos_=pos-1;
   153                     skipValue_=TRUE;
   154                 }
   155                 return TRUE;
   156             }
   157         }
   158         if(maxLength_>0 && str_.length()==maxLength_) {
   159             return truncateAndStop();
   160         }
   161         if(node<kMinLinearMatch) {
   162             if(node==0) {
   163                 node=*pos++;
   164             }
   165             pos=branchNext(pos, node+1, errorCode);
   166             if(pos==NULL) {
   167                 return TRUE;  // Reached a final value.
   168             }
   169         } else {
   170             // Linear-match node, append length units to str_.
   171             int32_t length=node-kMinLinearMatch+1;
   172             if(maxLength_>0 && str_.length()+length>maxLength_) {
   173                 str_.append(pos, maxLength_-str_.length());
   174                 return truncateAndStop();
   175             }
   176             str_.append(pos, length);
   177             pos+=length;
   178         }
   179     }
   180 }
   182 // Branch node, needs to take the first outbound edge and push state for the rest.
   183 const UChar *
   184 UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
   185     while(length>kMaxBranchLinearSubNodeLength) {
   186         ++pos;  // ignore the comparison unit
   187         // Push state for the greater-or-equal edge.
   188         stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
   189         stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
   190         // Follow the less-than edge.
   191         length>>=1;
   192         pos=jumpByDelta(pos);
   193     }
   194     // List of key-value pairs where values are either final values or jump deltas.
   195     // Read the first (key, value) pair.
   196     UChar trieUnit=*pos++;
   197     int32_t node=*pos++;
   198     UBool isFinal=(UBool)(node>>15);
   199     int32_t value=readValue(pos, node&=0x7fff);
   200     pos=skipValue(pos, node);
   201     stack_->addElement((int32_t)(pos-uchars_), errorCode);
   202     stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
   203     str_.append(trieUnit);
   204     if(isFinal) {
   205         pos_=NULL;
   206         value_=value;
   207         return NULL;
   208     } else {
   209         return pos+value;
   210     }
   211 }
   213 U_NAMESPACE_END

mercurial