michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2010-2012, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: bytestrieiterator.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2010nov03 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/bytestrie.h" michael@0: #include "unicode/stringpiece.h" michael@0: #include "charstr.h" michael@0: #include "uvectr32.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, michael@0: UErrorCode &errorCode) michael@0: : bytes_(static_cast(trieBytes)), michael@0: pos_(bytes_), initialPos_(bytes_), michael@0: remainingMatchLength_(-1), initialRemainingMatchLength_(-1), michael@0: str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into michael@0: // a public API header for which we would want it to depend only on michael@0: // other public headers. michael@0: // Unlike BytesTrie itself, its Iterator performs memory allocations anyway michael@0: // via the CharString and UVector32 implementations, so this additional michael@0: // cost is minimal. michael@0: str_=new CharString(); michael@0: stack_=new UVector32(errorCode); michael@0: if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: } michael@0: michael@0: BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, michael@0: UErrorCode &errorCode) michael@0: : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), michael@0: remainingMatchLength_(trie.remainingMatchLength_), michael@0: initialRemainingMatchLength_(trie.remainingMatchLength_), michael@0: str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: str_=new CharString(); michael@0: stack_=new UVector32(errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: if(str_==NULL || stack_==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. michael@0: if(length>=0) { michael@0: // Pending linear-match node, append remaining bytes to str_. michael@0: ++length; michael@0: if(maxLength_>0 && length>maxLength_) { michael@0: length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. michael@0: } michael@0: str_->append(reinterpret_cast(pos_), length, errorCode); michael@0: pos_+=length; michael@0: remainingMatchLength_-=length; michael@0: } michael@0: } michael@0: michael@0: BytesTrie::Iterator::~Iterator() { michael@0: delete str_; michael@0: delete stack_; michael@0: } michael@0: michael@0: BytesTrie::Iterator & michael@0: BytesTrie::Iterator::reset() { michael@0: pos_=initialPos_; michael@0: remainingMatchLength_=initialRemainingMatchLength_; michael@0: int32_t length=remainingMatchLength_+1; // Remaining match length. michael@0: if(maxLength_>0 && length>maxLength_) { michael@0: length=maxLength_; michael@0: } michael@0: str_->truncate(length); michael@0: pos_+=length; michael@0: remainingMatchLength_-=length; michael@0: stack_->setSize(0); michael@0: return *this; michael@0: } michael@0: michael@0: UBool michael@0: BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } michael@0: michael@0: UBool michael@0: BytesTrie::Iterator::next(UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: const uint8_t *pos=pos_; michael@0: if(pos==NULL) { michael@0: if(stack_->isEmpty()) { michael@0: return FALSE; michael@0: } michael@0: // Pop the state off the stack and continue with the next outbound edge of michael@0: // the branch node. michael@0: int32_t stackSize=stack_->size(); michael@0: int32_t length=stack_->elementAti(stackSize-1); michael@0: pos=bytes_+stack_->elementAti(stackSize-2); michael@0: stack_->setSize(stackSize-2); michael@0: str_->truncate(length&0xffff); michael@0: length=(int32_t)((uint32_t)length>>16); michael@0: if(length>1) { michael@0: pos=branchNext(pos, length, errorCode); michael@0: if(pos==NULL) { michael@0: return TRUE; // Reached a final value. michael@0: } michael@0: } else { michael@0: str_->append((char)*pos++, errorCode); michael@0: } michael@0: } michael@0: if(remainingMatchLength_>=0) { michael@0: // We only get here if we started in a pending linear-match node michael@0: // with more than maxLength remaining bytes. michael@0: return truncateAndStop(); michael@0: } michael@0: for(;;) { michael@0: int32_t node=*pos++; michael@0: if(node>=kMinValueLead) { michael@0: // Deliver value for the byte sequence so far. michael@0: UBool isFinal=(UBool)(node&kValueIsFinal); michael@0: value_=readValue(pos, node>>1); michael@0: if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { michael@0: pos_=NULL; michael@0: } else { michael@0: pos_=skipValue(pos, node); michael@0: } michael@0: sp_.set(str_->data(), str_->length()); michael@0: return TRUE; michael@0: } michael@0: if(maxLength_>0 && str_->length()==maxLength_) { michael@0: return truncateAndStop(); michael@0: } michael@0: if(node0 && str_->length()+length>maxLength_) { michael@0: str_->append(reinterpret_cast(pos), michael@0: maxLength_-str_->length(), errorCode); michael@0: return truncateAndStop(); michael@0: } michael@0: str_->append(reinterpret_cast(pos), length, errorCode); michael@0: pos+=length; michael@0: } michael@0: } michael@0: } michael@0: michael@0: UBool michael@0: BytesTrie::Iterator::truncateAndStop() { michael@0: pos_=NULL; michael@0: sp_.set(str_->data(), str_->length()); michael@0: value_=-1; // no real value for str michael@0: return TRUE; michael@0: } michael@0: michael@0: // Branch node, needs to take the first outbound edge and push state for the rest. michael@0: const uint8_t * michael@0: BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { michael@0: while(length>kMaxBranchLinearSubNodeLength) { michael@0: ++pos; // ignore the comparison byte michael@0: // Push state for the greater-or-equal edge. michael@0: stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); michael@0: stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); michael@0: // Follow the less-than edge. michael@0: length>>=1; michael@0: pos=jumpByDelta(pos); michael@0: } michael@0: // List of key-value pairs where values are either final values or jump deltas. michael@0: // Read the first (key, value) pair. michael@0: uint8_t trieByte=*pos++; michael@0: int32_t node=*pos++; michael@0: UBool isFinal=(UBool)(node&kValueIsFinal); michael@0: int32_t value=readValue(pos, node>>1); michael@0: pos=skipValue(pos, node); michael@0: stack_->addElement((int32_t)(pos-bytes_), errorCode); michael@0: stack_->addElement(((length-1)<<16)|str_->length(), errorCode); michael@0: str_->append((char)trieByte, errorCode); michael@0: if(isFinal) { michael@0: pos_=NULL; michael@0: sp_.set(str_->data(), str_->length()); michael@0: value_=value; michael@0: return NULL; michael@0: } else { michael@0: return pos+value; michael@0: } michael@0: } michael@0: michael@0: U_NAMESPACE_END