michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2010-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: ucharstrieiterator.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2010nov15 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/ucharstrie.h" michael@0: #include "unicode/unistr.h" michael@0: #include "uvectr32.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength, michael@0: UErrorCode &errorCode) michael@0: : uchars_(trieUChars), michael@0: pos_(uchars_), initialPos_(uchars_), michael@0: remainingMatchLength_(-1), initialRemainingMatchLength_(-1), michael@0: skipValue_(FALSE), michael@0: maxLength_(maxStringLength), value_(0), stack_(NULL) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: // stack_ is a pointer so that it's easy to turn ucharstrie.h into michael@0: // a public API header for which we would want it to depend only on michael@0: // other public headers. michael@0: // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway michael@0: // via the UnicodeString and UVector32 implementations, so this additional michael@0: // cost is minimal. michael@0: stack_=new UVector32(errorCode); michael@0: if(stack_==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: } michael@0: } michael@0: michael@0: UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, michael@0: UErrorCode &errorCode) michael@0: : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), michael@0: remainingMatchLength_(trie.remainingMatchLength_), michael@0: initialRemainingMatchLength_(trie.remainingMatchLength_), michael@0: skipValue_(FALSE), michael@0: maxLength_(maxStringLength), value_(0), stack_(NULL) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: stack_=new UVector32(errorCode); michael@0: if(U_FAILURE(errorCode)) { michael@0: return; michael@0: } michael@0: if(stack_==NULL) { michael@0: errorCode=U_MEMORY_ALLOCATION_ERROR; michael@0: return; michael@0: } michael@0: int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. michael@0: if(length>=0) { michael@0: // Pending linear-match node, append remaining UChars to str_. michael@0: ++length; michael@0: if(maxLength_>0 && length>maxLength_) { michael@0: length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. michael@0: } michael@0: str_.append(pos_, length); michael@0: pos_+=length; michael@0: remainingMatchLength_-=length; michael@0: } michael@0: } michael@0: michael@0: UCharsTrie::Iterator::~Iterator() { michael@0: delete stack_; michael@0: } michael@0: michael@0: UCharsTrie::Iterator & michael@0: UCharsTrie::Iterator::reset() { michael@0: pos_=initialPos_; michael@0: remainingMatchLength_=initialRemainingMatchLength_; michael@0: skipValue_=FALSE; michael@0: int32_t length=remainingMatchLength_+1; // Remaining match length. michael@0: if(maxLength_>0 && length>maxLength_) { michael@0: length=maxLength_; michael@0: } michael@0: str_.truncate(length); michael@0: pos_+=length; michael@0: remainingMatchLength_-=length; michael@0: stack_->setSize(0); michael@0: return *this; michael@0: } michael@0: michael@0: UBool michael@0: UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } michael@0: michael@0: UBool michael@0: UCharsTrie::Iterator::next(UErrorCode &errorCode) { michael@0: if(U_FAILURE(errorCode)) { michael@0: return FALSE; michael@0: } michael@0: const UChar *pos=pos_; michael@0: if(pos==NULL) { michael@0: if(stack_->isEmpty()) { michael@0: return FALSE; michael@0: } michael@0: // Pop the state off the stack and continue with the next outbound edge of michael@0: // the branch node. michael@0: int32_t stackSize=stack_->size(); michael@0: int32_t length=stack_->elementAti(stackSize-1); michael@0: pos=uchars_+stack_->elementAti(stackSize-2); michael@0: stack_->setSize(stackSize-2); michael@0: str_.truncate(length&0xffff); michael@0: length=(int32_t)((uint32_t)length>>16); michael@0: if(length>1) { michael@0: pos=branchNext(pos, length, errorCode); michael@0: if(pos==NULL) { michael@0: return TRUE; // Reached a final value. michael@0: } michael@0: } else { michael@0: str_.append(*pos++); michael@0: } michael@0: } michael@0: if(remainingMatchLength_>=0) { michael@0: // We only get here if we started in a pending linear-match node michael@0: // with more than maxLength remaining units. michael@0: return truncateAndStop(); michael@0: } michael@0: for(;;) { michael@0: int32_t node=*pos++; michael@0: if(node>=kMinValueLead) { michael@0: if(skipValue_) { michael@0: pos=skipNodeValue(pos, node); michael@0: node&=kNodeTypeMask; michael@0: skipValue_=FALSE; michael@0: } else { michael@0: // Deliver value for the string so far. michael@0: UBool isFinal=(UBool)(node>>15); michael@0: if(isFinal) { michael@0: value_=readValue(pos, node&0x7fff); michael@0: } else { michael@0: value_=readNodeValue(pos, node); michael@0: } michael@0: if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { michael@0: pos_=NULL; michael@0: } else { michael@0: // We cannot skip the value right here because it shares its michael@0: // lead unit with a match node which we have to evaluate michael@0: // next time. michael@0: // Instead, keep pos_ on the node lead unit itself. michael@0: pos_=pos-1; michael@0: skipValue_=TRUE; michael@0: } michael@0: return TRUE; michael@0: } michael@0: } michael@0: if(maxLength_>0 && str_.length()==maxLength_) { michael@0: return truncateAndStop(); michael@0: } michael@0: if(node0 && str_.length()+length>maxLength_) { michael@0: str_.append(pos, maxLength_-str_.length()); michael@0: return truncateAndStop(); michael@0: } michael@0: str_.append(pos, length); michael@0: pos+=length; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Branch node, needs to take the first outbound edge and push state for the rest. michael@0: const UChar * michael@0: UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { michael@0: while(length>kMaxBranchLinearSubNodeLength) { michael@0: ++pos; // ignore the comparison unit michael@0: // Push state for the greater-or-equal edge. michael@0: stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); michael@0: stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); michael@0: // Follow the less-than edge. michael@0: length>>=1; michael@0: pos=jumpByDelta(pos); michael@0: } michael@0: // List of key-value pairs where values are either final values or jump deltas. michael@0: // Read the first (key, value) pair. michael@0: UChar trieUnit=*pos++; michael@0: int32_t node=*pos++; michael@0: UBool isFinal=(UBool)(node>>15); michael@0: int32_t value=readValue(pos, node&=0x7fff); michael@0: pos=skipValue(pos, node); michael@0: stack_->addElement((int32_t)(pos-uchars_), errorCode); michael@0: stack_->addElement(((length-1)<<16)|str_.length(), errorCode); michael@0: str_.append(trieUnit); michael@0: if(isFinal) { michael@0: pos_=NULL; michael@0: value_=value; michael@0: return NULL; michael@0: } else { michael@0: return pos+value; michael@0: } michael@0: } michael@0: michael@0: U_NAMESPACE_END