1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/icu/source/common/bytestrieiterator.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,210 @@ 1.4 +/* 1.5 +******************************************************************************* 1.6 +* Copyright (C) 2010-2012, International Business Machines 1.7 +* Corporation and others. All Rights Reserved. 1.8 +******************************************************************************* 1.9 +* file name: bytestrieiterator.cpp 1.10 +* encoding: US-ASCII 1.11 +* tab size: 8 (not used) 1.12 +* indentation:4 1.13 +* 1.14 +* created on: 2010nov03 1.15 +* created by: Markus W. Scherer 1.16 +*/ 1.17 + 1.18 +#include "unicode/utypes.h" 1.19 +#include "unicode/bytestrie.h" 1.20 +#include "unicode/stringpiece.h" 1.21 +#include "charstr.h" 1.22 +#include "uvectr32.h" 1.23 + 1.24 +U_NAMESPACE_BEGIN 1.25 + 1.26 +BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, 1.27 + UErrorCode &errorCode) 1.28 + : bytes_(static_cast<const uint8_t *>(trieBytes)), 1.29 + pos_(bytes_), initialPos_(bytes_), 1.30 + remainingMatchLength_(-1), initialRemainingMatchLength_(-1), 1.31 + str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { 1.32 + if(U_FAILURE(errorCode)) { 1.33 + return; 1.34 + } 1.35 + // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into 1.36 + // a public API header for which we would want it to depend only on 1.37 + // other public headers. 1.38 + // Unlike BytesTrie itself, its Iterator performs memory allocations anyway 1.39 + // via the CharString and UVector32 implementations, so this additional 1.40 + // cost is minimal. 1.41 + str_=new CharString(); 1.42 + stack_=new UVector32(errorCode); 1.43 + if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { 1.44 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.45 + } 1.46 +} 1.47 + 1.48 +BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, 1.49 + UErrorCode &errorCode) 1.50 + : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), 1.51 + remainingMatchLength_(trie.remainingMatchLength_), 1.52 + initialRemainingMatchLength_(trie.remainingMatchLength_), 1.53 + str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { 1.54 + if(U_FAILURE(errorCode)) { 1.55 + return; 1.56 + } 1.57 + str_=new CharString(); 1.58 + stack_=new UVector32(errorCode); 1.59 + if(U_FAILURE(errorCode)) { 1.60 + return; 1.61 + } 1.62 + if(str_==NULL || stack_==NULL) { 1.63 + errorCode=U_MEMORY_ALLOCATION_ERROR; 1.64 + return; 1.65 + } 1.66 + int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 1.67 + if(length>=0) { 1.68 + // Pending linear-match node, append remaining bytes to str_. 1.69 + ++length; 1.70 + if(maxLength_>0 && length>maxLength_) { 1.71 + length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 1.72 + } 1.73 + str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); 1.74 + pos_+=length; 1.75 + remainingMatchLength_-=length; 1.76 + } 1.77 +} 1.78 + 1.79 +BytesTrie::Iterator::~Iterator() { 1.80 + delete str_; 1.81 + delete stack_; 1.82 +} 1.83 + 1.84 +BytesTrie::Iterator & 1.85 +BytesTrie::Iterator::reset() { 1.86 + pos_=initialPos_; 1.87 + remainingMatchLength_=initialRemainingMatchLength_; 1.88 + int32_t length=remainingMatchLength_+1; // Remaining match length. 1.89 + if(maxLength_>0 && length>maxLength_) { 1.90 + length=maxLength_; 1.91 + } 1.92 + str_->truncate(length); 1.93 + pos_+=length; 1.94 + remainingMatchLength_-=length; 1.95 + stack_->setSize(0); 1.96 + return *this; 1.97 +} 1.98 + 1.99 +UBool 1.100 +BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } 1.101 + 1.102 +UBool 1.103 +BytesTrie::Iterator::next(UErrorCode &errorCode) { 1.104 + if(U_FAILURE(errorCode)) { 1.105 + return FALSE; 1.106 + } 1.107 + const uint8_t *pos=pos_; 1.108 + if(pos==NULL) { 1.109 + if(stack_->isEmpty()) { 1.110 + return FALSE; 1.111 + } 1.112 + // Pop the state off the stack and continue with the next outbound edge of 1.113 + // the branch node. 1.114 + int32_t stackSize=stack_->size(); 1.115 + int32_t length=stack_->elementAti(stackSize-1); 1.116 + pos=bytes_+stack_->elementAti(stackSize-2); 1.117 + stack_->setSize(stackSize-2); 1.118 + str_->truncate(length&0xffff); 1.119 + length=(int32_t)((uint32_t)length>>16); 1.120 + if(length>1) { 1.121 + pos=branchNext(pos, length, errorCode); 1.122 + if(pos==NULL) { 1.123 + return TRUE; // Reached a final value. 1.124 + } 1.125 + } else { 1.126 + str_->append((char)*pos++, errorCode); 1.127 + } 1.128 + } 1.129 + if(remainingMatchLength_>=0) { 1.130 + // We only get here if we started in a pending linear-match node 1.131 + // with more than maxLength remaining bytes. 1.132 + return truncateAndStop(); 1.133 + } 1.134 + for(;;) { 1.135 + int32_t node=*pos++; 1.136 + if(node>=kMinValueLead) { 1.137 + // Deliver value for the byte sequence so far. 1.138 + UBool isFinal=(UBool)(node&kValueIsFinal); 1.139 + value_=readValue(pos, node>>1); 1.140 + if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { 1.141 + pos_=NULL; 1.142 + } else { 1.143 + pos_=skipValue(pos, node); 1.144 + } 1.145 + sp_.set(str_->data(), str_->length()); 1.146 + return TRUE; 1.147 + } 1.148 + if(maxLength_>0 && str_->length()==maxLength_) { 1.149 + return truncateAndStop(); 1.150 + } 1.151 + if(node<kMinLinearMatch) { 1.152 + if(node==0) { 1.153 + node=*pos++; 1.154 + } 1.155 + pos=branchNext(pos, node+1, errorCode); 1.156 + if(pos==NULL) { 1.157 + return TRUE; // Reached a final value. 1.158 + } 1.159 + } else { 1.160 + // Linear-match node, append length bytes to str_. 1.161 + int32_t length=node-kMinLinearMatch+1; 1.162 + if(maxLength_>0 && str_->length()+length>maxLength_) { 1.163 + str_->append(reinterpret_cast<const char *>(pos), 1.164 + maxLength_-str_->length(), errorCode); 1.165 + return truncateAndStop(); 1.166 + } 1.167 + str_->append(reinterpret_cast<const char *>(pos), length, errorCode); 1.168 + pos+=length; 1.169 + } 1.170 + } 1.171 +} 1.172 + 1.173 +UBool 1.174 +BytesTrie::Iterator::truncateAndStop() { 1.175 + pos_=NULL; 1.176 + sp_.set(str_->data(), str_->length()); 1.177 + value_=-1; // no real value for str 1.178 + return TRUE; 1.179 +} 1.180 + 1.181 +// Branch node, needs to take the first outbound edge and push state for the rest. 1.182 +const uint8_t * 1.183 +BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { 1.184 + while(length>kMaxBranchLinearSubNodeLength) { 1.185 + ++pos; // ignore the comparison byte 1.186 + // Push state for the greater-or-equal edge. 1.187 + stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); 1.188 + stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); 1.189 + // Follow the less-than edge. 1.190 + length>>=1; 1.191 + pos=jumpByDelta(pos); 1.192 + } 1.193 + // List of key-value pairs where values are either final values or jump deltas. 1.194 + // Read the first (key, value) pair. 1.195 + uint8_t trieByte=*pos++; 1.196 + int32_t node=*pos++; 1.197 + UBool isFinal=(UBool)(node&kValueIsFinal); 1.198 + int32_t value=readValue(pos, node>>1); 1.199 + pos=skipValue(pos, node); 1.200 + stack_->addElement((int32_t)(pos-bytes_), errorCode); 1.201 + stack_->addElement(((length-1)<<16)|str_->length(), errorCode); 1.202 + str_->append((char)trieByte, errorCode); 1.203 + if(isFinal) { 1.204 + pos_=NULL; 1.205 + sp_.set(str_->data(), str_->length()); 1.206 + value_=value; 1.207 + return NULL; 1.208 + } else { 1.209 + return pos+value; 1.210 + } 1.211 +} 1.212 + 1.213 +U_NAMESPACE_END