michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2010-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: ucharstrie.h michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2010nov14 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/appendable.h" michael@0: #include "unicode/ucharstrie.h" michael@0: #include "unicode/uobject.h" michael@0: #include "unicode/utf16.h" michael@0: #include "cmemory.h" michael@0: #include "uassert.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: UCharsTrie::~UCharsTrie() { michael@0: uprv_free(ownedArray_); michael@0: } michael@0: michael@0: UStringTrieResult michael@0: UCharsTrie::current() const { michael@0: const UChar *pos=pos_; michael@0: if(pos==NULL) { michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } else { michael@0: int32_t node; michael@0: return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } michael@0: } michael@0: michael@0: UStringTrieResult michael@0: UCharsTrie::firstForCodePoint(UChar32 cp) { michael@0: return cp<=0xffff ? michael@0: first(cp) : michael@0: (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ? michael@0: next(U16_TRAIL(cp)) : michael@0: USTRINGTRIE_NO_MATCH); michael@0: } michael@0: michael@0: UStringTrieResult michael@0: UCharsTrie::nextForCodePoint(UChar32 cp) { michael@0: return cp<=0xffff ? michael@0: next(cp) : michael@0: (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ? michael@0: next(U16_TRAIL(cp)) : michael@0: USTRINGTRIE_NO_MATCH); michael@0: } michael@0: michael@0: UStringTrieResult michael@0: UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { michael@0: // Branch according to the current unit. michael@0: if(length==0) { michael@0: length=*pos++; michael@0: } michael@0: ++length; michael@0: // The length of the branch is the number of units to select from. michael@0: // The data structure encodes a binary search. michael@0: while(length>kMaxBranchLinearSubNodeLength) { michael@0: if(uchar<*pos++) { michael@0: length>>=1; michael@0: pos=jumpByDelta(pos); michael@0: } else { michael@0: length=length-(length>>1); michael@0: pos=skipDelta(pos); michael@0: } michael@0: } michael@0: // Drop down to linear search for the last few units. michael@0: // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 michael@0: // and divides length by 2. michael@0: do { michael@0: if(uchar==*pos++) { michael@0: UStringTrieResult result; michael@0: int32_t node=*pos; michael@0: if(node&kValueIsFinal) { michael@0: // Leave the final value for getValue() to read. michael@0: result=USTRINGTRIE_FINAL_VALUE; michael@0: } else { michael@0: // Use the non-final value as the jump delta. michael@0: ++pos; michael@0: // int32_t delta=readValue(pos, node); michael@0: int32_t delta; michael@0: if(node=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } michael@0: pos_=pos; michael@0: return result; michael@0: } michael@0: --length; michael@0: pos=skipValue(pos); michael@0: } while(length>1); michael@0: if(uchar==*pos++) { michael@0: pos_=pos; michael@0: int32_t node=*pos; michael@0: return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } else { michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: } michael@0: michael@0: UStringTrieResult michael@0: UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) { michael@0: int32_t node=*pos++; michael@0: for(;;) { michael@0: if(node=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } else { michael@0: // No match. michael@0: break; michael@0: } michael@0: } else if(node&kValueIsFinal) { michael@0: // No further matching units. michael@0: break; michael@0: } else { michael@0: // Skip intermediate value. michael@0: pos=skipNodeValue(pos, node); michael@0: node&=kNodeTypeMask; michael@0: } michael@0: } michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: michael@0: UStringTrieResult michael@0: UCharsTrie::next(int32_t uchar) { michael@0: const UChar *pos=pos_; michael@0: if(pos==NULL) { michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. michael@0: if(length>=0) { michael@0: // Remaining part of a linear-match node. michael@0: if(uchar==*pos++) { michael@0: remainingMatchLength_=--length; michael@0: pos_=pos; michael@0: int32_t node; michael@0: return (length<0 && (node=*pos)>=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } else { michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: } michael@0: return nextImpl(pos, uchar); michael@0: } michael@0: michael@0: UStringTrieResult michael@0: UCharsTrie::next(const UChar *s, int32_t sLength) { michael@0: if(sLength<0 ? *s==0 : sLength==0) { michael@0: // Empty input. michael@0: return current(); michael@0: } michael@0: const UChar *pos=pos_; michael@0: if(pos==NULL) { michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. michael@0: for(;;) { michael@0: // Fetch the next input unit, if there is one. michael@0: // Continue a linear-match node without rechecking sLength<0. michael@0: int32_t uchar; michael@0: if(sLength<0) { michael@0: for(;;) { michael@0: if((uchar=*s++)==0) { michael@0: remainingMatchLength_=length; michael@0: pos_=pos; michael@0: int32_t node; michael@0: return (length<0 && (node=*pos)>=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } michael@0: if(length<0) { michael@0: remainingMatchLength_=length; michael@0: break; michael@0: } michael@0: if(uchar!=*pos) { michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: ++pos; michael@0: --length; michael@0: } michael@0: } else { michael@0: for(;;) { michael@0: if(sLength==0) { michael@0: remainingMatchLength_=length; michael@0: pos_=pos; michael@0: int32_t node; michael@0: return (length<0 && (node=*pos)>=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } michael@0: uchar=*s++; michael@0: --sLength; michael@0: if(length<0) { michael@0: remainingMatchLength_=length; michael@0: break; michael@0: } michael@0: if(uchar!=*pos) { michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: ++pos; michael@0: --length; michael@0: } michael@0: } michael@0: int32_t node=*pos++; michael@0: for(;;) { michael@0: if(nodekMaxBranchLinearSubNodeLength) { michael@0: ++pos; // ignore the comparison unit michael@0: if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { michael@0: return NULL; michael@0: } michael@0: length=length-(length>>1); michael@0: pos=skipDelta(pos); michael@0: } michael@0: do { michael@0: ++pos; // ignore a comparison unit michael@0: // handle its value michael@0: int32_t node=*pos++; michael@0: UBool isFinal=(UBool)(node>>15); michael@0: node&=0x7fff; michael@0: int32_t value=readValue(pos, node); michael@0: pos=skipValue(pos, node); michael@0: if(isFinal) { michael@0: if(haveUniqueValue) { michael@0: if(value!=uniqueValue) { michael@0: return NULL; michael@0: } michael@0: } else { michael@0: uniqueValue=value; michael@0: haveUniqueValue=TRUE; michael@0: } michael@0: } else { michael@0: if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { michael@0: return NULL; michael@0: } michael@0: haveUniqueValue=TRUE; michael@0: } michael@0: } while(--length>1); michael@0: return pos+1; // ignore the last comparison unit michael@0: } michael@0: michael@0: UBool michael@0: UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) { michael@0: int32_t node=*pos++; michael@0: for(;;) { michael@0: if(node>15); michael@0: int32_t value; michael@0: if(isFinal) { michael@0: value=readValue(pos, node&0x7fff); michael@0: } else { michael@0: value=readNodeValue(pos, node); michael@0: } michael@0: if(haveUniqueValue) { michael@0: if(value!=uniqueValue) { michael@0: return FALSE; michael@0: } michael@0: } else { michael@0: uniqueValue=value; michael@0: haveUniqueValue=TRUE; michael@0: } michael@0: if(isFinal) { michael@0: return TRUE; michael@0: } michael@0: pos=skipNodeValue(pos, node); michael@0: node&=kNodeTypeMask; michael@0: } michael@0: } michael@0: } michael@0: michael@0: int32_t michael@0: UCharsTrie::getNextUChars(Appendable &out) const { michael@0: const UChar *pos=pos_; michael@0: if(pos==NULL) { michael@0: return 0; michael@0: } michael@0: if(remainingMatchLength_>=0) { michael@0: out.appendCodeUnit(*pos); // Next unit of a pending linear-match node. michael@0: return 1; michael@0: } michael@0: int32_t node=*pos++; michael@0: if(node>=kMinValueLead) { michael@0: if(node&kValueIsFinal) { michael@0: return 0; michael@0: } else { michael@0: pos=skipNodeValue(pos, node); michael@0: node&=kNodeTypeMask; michael@0: } michael@0: } michael@0: if(nodekMaxBranchLinearSubNodeLength) { michael@0: ++pos; // ignore the comparison unit michael@0: getNextBranchUChars(jumpByDelta(pos), length>>1, out); michael@0: length=length-(length>>1); michael@0: pos=skipDelta(pos); michael@0: } michael@0: do { michael@0: out.appendCodeUnit(*pos++); michael@0: pos=skipValue(pos); michael@0: } while(--length>1); michael@0: out.appendCodeUnit(*pos); michael@0: } michael@0: michael@0: U_NAMESPACE_END