1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/ucharstrieiterator.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,213 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2010-2011, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: ucharstrieiterator.h 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2010nov15 1.15 +* created by: Markus W. Scherer 1.16 +*/ 1.17 + 1.18 +#include "unicode/utypes.h" 1.19 +#include "unicode/ucharstrie.h" 1.20 +#include "unicode/unistr.h" 1.21 +#include "uvectr32.h" 1.22 + 1.23 +U_NAMESPACE_BEGIN 1.24 + 1.25 +UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, 1.26 + UErrorCode &errorCode) 1.27 + : uchars_(trieUChars), 1.28 + pos_(uchars_), initialPos_(uchars_), 1.29 + remainingMatchLength_(-1), initialRemainingMatchLength_(-1), 1.30 + skipValue_(FALSE), 1.31 + maxLength_(maxStringLength), value_(0), stack_(NULL) { 1.32 + if(U_FAILURE(errorCode)) { 1.33 + return; 1.34 + } 1.35 + // stack_ is a pointer so that it's easy to turn ucharstrie.h into 1.36 + // a public API header for which we would want it to depend only on 1.37 + // other public headers. 1.38 + // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway 1.39 + // via the UnicodeString and UVector32 implementations, so this additional 1.40 + // cost is minimal. 1.41 + stack_=new UVector32(errorCode); 1.42 + if(stack_==NULL) { 1.43 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.44 + } 1.45 +} 1.46 + 1.47 +UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, 1.48 + UErrorCode &errorCode) 1.49 + : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), 1.50 + remainingMatchLength_(trie.remainingMatchLength_), 1.51 + initialRemainingMatchLength_(trie.remainingMatchLength_), 1.52 + skipValue_(FALSE), 1.53 + maxLength_(maxStringLength), value_(0), stack_(NULL) { 1.54 + if(U_FAILURE(errorCode)) { 1.55 + return; 1.56 + } 1.57 + stack_=new UVector32(errorCode); 1.58 + if(U_FAILURE(errorCode)) { 1.59 + return; 1.60 + } 1.61 + if(stack_==NULL) { 1.62 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.63 + return; 1.64 + } 1.65 + int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 1.66 + if(length>=0) { 1.67 + // Pending linear-match node, append remaining UChars to str_. 1.68 + ++length; 1.69 + if(maxLength_>0 && length>maxLength_) { 1.70 + length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 1.71 + } 1.72 + str_.append(pos_, length); 1.73 + pos_+=length; 1.74 + remainingMatchLength_-=length; 1.75 + } 1.76 +} 1.77 + 1.78 +UCharsTrie::Iterator::~Iterator() { 1.79 + delete stack_; 1.80 +} 1.81 + 1.82 +UCharsTrie::Iterator & 1.83 +UCharsTrie::Iterator::reset() { 1.84 + pos_=initialPos_; 1.85 + remainingMatchLength_=initialRemainingMatchLength_; 1.86 + skipValue_=FALSE; 1.87 + int32_t length=remainingMatchLength_+1; // Remaining match length. 1.88 + if(maxLength_>0 && length>maxLength_) { 1.89 + length=maxLength_; 1.90 + } 1.91 + str_.truncate(length); 1.92 + pos_+=length; 1.93 + remainingMatchLength_-=length; 1.94 + stack_->setSize(0); 1.95 + return *this; 1.96 +} 1.97 + 1.98 +UBool 1.99 +UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } 1.100 + 1.101 +UBool 1.102 +UCharsTrie::Iterator::next(UErrorCode &errorCode) { 1.103 + if(U_FAILURE(errorCode)) { 1.104 + return FALSE; 1.105 + } 1.106 + const UChar *pos=pos_; 1.107 + if(pos==NULL) { 1.108 + if(stack_->isEmpty()) { 1.109 + return FALSE; 1.110 + } 1.111 + // Pop the state off the stack and continue with the next outbound edge of 1.112 + // the branch node. 1.113 + int32_t stackSize=stack_->size(); 1.114 + int32_t length=stack_->elementAti(stackSize-1); 1.115 + pos=uchars_+stack_->elementAti(stackSize-2); 1.116 + stack_->setSize(stackSize-2); 1.117 + str_.truncate(length&0xffff); 1.118 + length=(int32_t)((uint32_t)length>>16); 1.119 + if(length>1) { 1.120 + pos=branchNext(pos, length, errorCode); 1.121 + if(pos==NULL) { 1.122 + return TRUE; // Reached a final value. 1.123 + } 1.124 + } else { 1.125 + str_.append(*pos++); 1.126 + } 1.127 + } 1.128 + if(remainingMatchLength_>=0) { 1.129 + // We only get here if we started in a pending linear-match node 1.130 + // with more than maxLength remaining units. 1.131 + return truncateAndStop(); 1.132 + } 1.133 + for(;;) { 1.134 + int32_t node=*pos++; 1.135 + if(node>=kMinValueLead) { 1.136 + if(skipValue_) { 1.137 + pos=skipNodeValue(pos, node); 1.138 + node&=kNodeTypeMask; 1.139 + skipValue_=FALSE; 1.140 + } else { 1.141 + // Deliver value for the string so far. 1.142 + UBool isFinal=(UBool)(node>>15); 1.143 + if(isFinal) { 1.144 + value_=readValue(pos, node&0x7fff); 1.145 + } else { 1.146 + value_=readNodeValue(pos, node); 1.147 + } 1.148 + if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { 1.149 + pos_=NULL; 1.150 + } else { 1.151 + // We cannot skip the value right here because it shares its 1.152 + // lead unit with a match node which we have to evaluate 1.153 + // next time. 1.154 + // Instead, keep pos_ on the node lead unit itself. 1.155 + pos_=pos-1; 1.156 + skipValue_=TRUE; 1.157 + } 1.158 + return TRUE; 1.159 + } 1.160 + } 1.161 + if(maxLength_>0 && str_.length()==maxLength_) { 1.162 + return truncateAndStop(); 1.163 + } 1.164 + if(node<kMinLinearMatch) { 1.165 + if(node==0) { 1.166 + node=*pos++; 1.167 + } 1.168 + pos=branchNext(pos, node+1, errorCode); 1.169 + if(pos==NULL) { 1.170 + return TRUE; // Reached a final value. 1.171 + } 1.172 + } else { 1.173 + // Linear-match node, append length units to str_. 1.174 + int32_t length=node-kMinLinearMatch+1; 1.175 + if(maxLength_>0 && str_.length()+length>maxLength_) { 1.176 + str_.append(pos, maxLength_-str_.length()); 1.177 + return truncateAndStop(); 1.178 + } 1.179 + str_.append(pos, length); 1.180 + pos+=length; 1.181 + } 1.182 + } 1.183 +} 1.184 + 1.185 +// Branch node, needs to take the first outbound edge and push state for the rest. 1.186 +const UChar * 1.187 +UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { 1.188 + while(length>kMaxBranchLinearSubNodeLength) { 1.189 + ++pos; // ignore the comparison unit 1.190 + // Push state for the greater-or-equal edge. 1.191 + stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); 1.192 + stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); 1.193 + // Follow the less-than edge. 1.194 + length>>=1; 1.195 + pos=jumpByDelta(pos); 1.196 + } 1.197 + // List of key-value pairs where values are either final values or jump deltas. 1.198 + // Read the first (key, value) pair. 1.199 + UChar trieUnit=*pos++; 1.200 + int32_t node=*pos++; 1.201 + UBool isFinal=(UBool)(node>>15); 1.202 + int32_t value=readValue(pos, node&=0x7fff); 1.203 + pos=skipValue(pos, node); 1.204 + stack_->addElement((int32_t)(pos-uchars_), errorCode); 1.205 + stack_->addElement(((length-1)<<16)|str_.length(), errorCode); 1.206 + str_.append(trieUnit); 1.207 + if(isFinal) { 1.208 + pos_=NULL; 1.209 + value_=value; 1.210 + return NULL; 1.211 + } else { 1.212 + return pos+value; 1.213 + } 1.214 +} 1.215 + 1.216 +U_NAMESPACE_END