intl/icu/source/common/bytestrieiterator.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2 *******************************************************************************
     3 *   Copyright (C) 2010-2012, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 *   file name:  bytestrieiterator.cpp
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   created on: 2010nov03
    12 *   created by: Markus W. Scherer
    13 */
    15 #include "unicode/utypes.h"
    16 #include "unicode/bytestrie.h"
    17 #include "unicode/stringpiece.h"
    18 #include "charstr.h"
    19 #include "uvectr32.h"
    21 U_NAMESPACE_BEGIN
    23 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
    24                               UErrorCode &errorCode)
    25         : bytes_(static_cast<const uint8_t *>(trieBytes)),
    26           pos_(bytes_), initialPos_(bytes_),
    27           remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
    28           str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
    29     if(U_FAILURE(errorCode)) {
    30         return;
    31     }
    32     // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
    33     // a public API header for which we would want it to depend only on
    34     // other public headers.
    35     // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
    36     // via the CharString and UVector32 implementations, so this additional
    37     // cost is minimal.
    38     str_=new CharString();
    39     stack_=new UVector32(errorCode);
    40     if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
    41         errorCode=U_MEMORY_ALLOCATION_ERROR;
    42     }
    43 }
    45 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
    46                               UErrorCode &errorCode)
    47         : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
    48           remainingMatchLength_(trie.remainingMatchLength_),
    49           initialRemainingMatchLength_(trie.remainingMatchLength_),
    50           str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
    51     if(U_FAILURE(errorCode)) {
    52         return;
    53     }
    54     str_=new CharString();
    55     stack_=new UVector32(errorCode);
    56     if(U_FAILURE(errorCode)) {
    57         return;
    58     }
    59     if(str_==NULL || stack_==NULL) {
    60         errorCode=U_MEMORY_ALLOCATION_ERROR;
    61         return;
    62     }
    63     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
    64     if(length>=0) {
    65         // Pending linear-match node, append remaining bytes to str_.
    66         ++length;
    67         if(maxLength_>0 && length>maxLength_) {
    68             length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
    69         }
    70         str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
    71         pos_+=length;
    72         remainingMatchLength_-=length;
    73     }
    74 }
    76 BytesTrie::Iterator::~Iterator() {
    77     delete str_;
    78     delete stack_;
    79 }
    81 BytesTrie::Iterator &
    82 BytesTrie::Iterator::reset() {
    83     pos_=initialPos_;
    84     remainingMatchLength_=initialRemainingMatchLength_;
    85     int32_t length=remainingMatchLength_+1;  // Remaining match length.
    86     if(maxLength_>0 && length>maxLength_) {
    87         length=maxLength_;
    88     }
    89     str_->truncate(length);
    90     pos_+=length;
    91     remainingMatchLength_-=length;
    92     stack_->setSize(0);
    93     return *this;
    94 }
    96 UBool
    97 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
    99 UBool
   100 BytesTrie::Iterator::next(UErrorCode &errorCode) {
   101     if(U_FAILURE(errorCode)) {
   102         return FALSE;
   103     }
   104     const uint8_t *pos=pos_;
   105     if(pos==NULL) {
   106         if(stack_->isEmpty()) {
   107             return FALSE;
   108         }
   109         // Pop the state off the stack and continue with the next outbound edge of
   110         // the branch node.
   111         int32_t stackSize=stack_->size();
   112         int32_t length=stack_->elementAti(stackSize-1);
   113         pos=bytes_+stack_->elementAti(stackSize-2);
   114         stack_->setSize(stackSize-2);
   115         str_->truncate(length&0xffff);
   116         length=(int32_t)((uint32_t)length>>16);
   117         if(length>1) {
   118             pos=branchNext(pos, length, errorCode);
   119             if(pos==NULL) {
   120                 return TRUE;  // Reached a final value.
   121             }
   122         } else {
   123             str_->append((char)*pos++, errorCode);
   124         }
   125     }
   126     if(remainingMatchLength_>=0) {
   127         // We only get here if we started in a pending linear-match node
   128         // with more than maxLength remaining bytes.
   129         return truncateAndStop();
   130     }
   131     for(;;) {
   132         int32_t node=*pos++;
   133         if(node>=kMinValueLead) {
   134             // Deliver value for the byte sequence so far.
   135             UBool isFinal=(UBool)(node&kValueIsFinal);
   136             value_=readValue(pos, node>>1);
   137             if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
   138                 pos_=NULL;
   139             } else {
   140                 pos_=skipValue(pos, node);
   141             }
   142             sp_.set(str_->data(), str_->length());
   143             return TRUE;
   144         }
   145         if(maxLength_>0 && str_->length()==maxLength_) {
   146             return truncateAndStop();
   147         }
   148         if(node<kMinLinearMatch) {
   149             if(node==0) {
   150                 node=*pos++;
   151             }
   152             pos=branchNext(pos, node+1, errorCode);
   153             if(pos==NULL) {
   154                 return TRUE;  // Reached a final value.
   155             }
   156         } else {
   157             // Linear-match node, append length bytes to str_.
   158             int32_t length=node-kMinLinearMatch+1;
   159             if(maxLength_>0 && str_->length()+length>maxLength_) {
   160                 str_->append(reinterpret_cast<const char *>(pos),
   161                             maxLength_-str_->length(), errorCode);
   162                 return truncateAndStop();
   163             }
   164             str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
   165             pos+=length;
   166         }
   167     }
   168 }
   170 UBool
   171 BytesTrie::Iterator::truncateAndStop() {
   172     pos_=NULL;
   173     sp_.set(str_->data(), str_->length());
   174     value_=-1;  // no real value for str
   175     return TRUE;
   176 }
   178 // Branch node, needs to take the first outbound edge and push state for the rest.
   179 const uint8_t *
   180 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
   181     while(length>kMaxBranchLinearSubNodeLength) {
   182         ++pos;  // ignore the comparison byte
   183         // Push state for the greater-or-equal edge.
   184         stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
   185         stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
   186         // Follow the less-than edge.
   187         length>>=1;
   188         pos=jumpByDelta(pos);
   189     }
   190     // List of key-value pairs where values are either final values or jump deltas.
   191     // Read the first (key, value) pair.
   192     uint8_t trieByte=*pos++;
   193     int32_t node=*pos++;
   194     UBool isFinal=(UBool)(node&kValueIsFinal);
   195     int32_t value=readValue(pos, node>>1);
   196     pos=skipValue(pos, node);
   197     stack_->addElement((int32_t)(pos-bytes_), errorCode);
   198     stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
   199     str_->append((char)trieByte, errorCode);
   200     if(isFinal) {
   201         pos_=NULL;
   202         sp_.set(str_->data(), str_->length());
   203         value_=value;
   204         return NULL;
   205     } else {
   206         return pos+value;
   207     }
   208 }
   210 U_NAMESPACE_END

mercurial