michael@0: /* michael@0: ******************************************************************************* michael@0: * Copyright (C) 2010-2011, International Business Machines michael@0: * Corporation and others. All Rights Reserved. michael@0: ******************************************************************************* michael@0: * file name: bytestrie.cpp michael@0: * encoding: US-ASCII michael@0: * tab size: 8 (not used) michael@0: * indentation:4 michael@0: * michael@0: * created on: 2010sep25 michael@0: * created by: Markus W. Scherer michael@0: */ michael@0: michael@0: #include "unicode/utypes.h" michael@0: #include "unicode/bytestream.h" michael@0: #include "unicode/bytestrie.h" michael@0: #include "unicode/uobject.h" michael@0: #include "cmemory.h" michael@0: #include "uassert.h" michael@0: michael@0: U_NAMESPACE_BEGIN michael@0: michael@0: BytesTrie::~BytesTrie() { michael@0: uprv_free(ownedArray_); michael@0: } michael@0: michael@0: // lead byte already shifted right by 1. michael@0: int32_t michael@0: BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) { michael@0: int32_t value; michael@0: if(leadByte=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } michael@0: } michael@0: michael@0: UStringTrieResult michael@0: BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { michael@0: // Branch according to the current byte. michael@0: if(length==0) { michael@0: length=*pos++; michael@0: } michael@0: ++length; michael@0: // The length of the branch is the number of bytes to select from. michael@0: // The data structure encodes a binary search. michael@0: while(length>kMaxBranchLinearSubNodeLength) { michael@0: if(inByte<*pos++) { michael@0: length>>=1; michael@0: pos=jumpByDelta(pos); michael@0: } else { michael@0: length=length-(length>>1); michael@0: pos=skipDelta(pos); michael@0: } michael@0: } michael@0: // Drop down to linear search for the last few bytes. michael@0: // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 michael@0: // and divides length by 2. michael@0: do { michael@0: if(inByte==*pos++) { michael@0: UStringTrieResult result; michael@0: int32_t node=*pos; michael@0: U_ASSERT(node>=kMinValueLead); michael@0: if(node&kValueIsFinal) { michael@0: // Leave the final value for getValue() to read. michael@0: result=USTRINGTRIE_FINAL_VALUE; michael@0: } else { michael@0: // Use the non-final value as the jump delta. michael@0: ++pos; michael@0: // int32_t delta=readValue(pos, node>>1); michael@0: node>>=1; michael@0: int32_t delta; michael@0: if(node=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } michael@0: pos_=pos; michael@0: return result; michael@0: } michael@0: --length; michael@0: pos=skipValue(pos); michael@0: } while(length>1); michael@0: if(inByte==*pos++) { michael@0: pos_=pos; michael@0: int32_t node=*pos; michael@0: return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } else { michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: } michael@0: michael@0: UStringTrieResult michael@0: BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) { michael@0: for(;;) { michael@0: int32_t node=*pos++; michael@0: if(node=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } else { michael@0: // No match. michael@0: break; michael@0: } michael@0: } else if(node&kValueIsFinal) { michael@0: // No further matching bytes. michael@0: break; michael@0: } else { michael@0: // Skip intermediate value. michael@0: pos=skipValue(pos, node); michael@0: // The next node must not also be a value node. michael@0: U_ASSERT(*pos=0) { michael@0: // Remaining part of a linear-match node. michael@0: if(inByte==*pos++) { michael@0: remainingMatchLength_=--length; michael@0: pos_=pos; michael@0: int32_t node; michael@0: return (length<0 && (node=*pos)>=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } else { michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: } michael@0: return nextImpl(pos, inByte); michael@0: } michael@0: michael@0: UStringTrieResult michael@0: BytesTrie::next(const char *s, int32_t sLength) { michael@0: if(sLength<0 ? *s==0 : sLength==0) { michael@0: // Empty input. michael@0: return current(); michael@0: } michael@0: const uint8_t *pos=pos_; michael@0: if(pos==NULL) { michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. michael@0: for(;;) { michael@0: // Fetch the next input byte, if there is one. michael@0: // Continue a linear-match node without rechecking sLength<0. michael@0: int32_t inByte; michael@0: if(sLength<0) { michael@0: for(;;) { michael@0: if((inByte=*s++)==0) { michael@0: remainingMatchLength_=length; michael@0: pos_=pos; michael@0: int32_t node; michael@0: return (length<0 && (node=*pos)>=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } michael@0: if(length<0) { michael@0: remainingMatchLength_=length; michael@0: break; michael@0: } michael@0: if(inByte!=*pos) { michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: ++pos; michael@0: --length; michael@0: } michael@0: } else { michael@0: for(;;) { michael@0: if(sLength==0) { michael@0: remainingMatchLength_=length; michael@0: pos_=pos; michael@0: int32_t node; michael@0: return (length<0 && (node=*pos)>=kMinValueLead) ? michael@0: valueResult(node) : USTRINGTRIE_NO_VALUE; michael@0: } michael@0: inByte=*s++; michael@0: --sLength; michael@0: if(length<0) { michael@0: remainingMatchLength_=length; michael@0: break; michael@0: } michael@0: if(inByte!=*pos) { michael@0: stop(); michael@0: return USTRINGTRIE_NO_MATCH; michael@0: } michael@0: ++pos; michael@0: --length; michael@0: } michael@0: } michael@0: for(;;) { michael@0: int32_t node=*pos++; michael@0: if(nodekMaxBranchLinearSubNodeLength) { michael@0: ++pos; // ignore the comparison byte michael@0: if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { michael@0: return NULL; michael@0: } michael@0: length=length-(length>>1); michael@0: pos=skipDelta(pos); michael@0: } michael@0: do { michael@0: ++pos; // ignore a comparison byte michael@0: // handle its value michael@0: int32_t node=*pos++; michael@0: UBool isFinal=(UBool)(node&kValueIsFinal); michael@0: int32_t value=readValue(pos, node>>1); michael@0: pos=skipValue(pos, node); michael@0: if(isFinal) { michael@0: if(haveUniqueValue) { michael@0: if(value!=uniqueValue) { michael@0: return NULL; michael@0: } michael@0: } else { michael@0: uniqueValue=value; michael@0: haveUniqueValue=TRUE; michael@0: } michael@0: } else { michael@0: if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { michael@0: return NULL; michael@0: } michael@0: haveUniqueValue=TRUE; michael@0: } michael@0: } while(--length>1); michael@0: return pos+1; // ignore the last comparison byte michael@0: } michael@0: michael@0: UBool michael@0: BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) { michael@0: for(;;) { michael@0: int32_t node=*pos++; michael@0: if(node>1); michael@0: if(haveUniqueValue) { michael@0: if(value!=uniqueValue) { michael@0: return FALSE; michael@0: } michael@0: } else { michael@0: uniqueValue=value; michael@0: haveUniqueValue=TRUE; michael@0: } michael@0: if(isFinal) { michael@0: return TRUE; michael@0: } michael@0: pos=skipValue(pos, node); michael@0: } michael@0: } michael@0: } michael@0: michael@0: int32_t michael@0: BytesTrie::getNextBytes(ByteSink &out) const { michael@0: const uint8_t *pos=pos_; michael@0: if(pos==NULL) { michael@0: return 0; michael@0: } michael@0: if(remainingMatchLength_>=0) { michael@0: append(out, *pos); // Next byte of a pending linear-match node. michael@0: return 1; michael@0: } michael@0: int32_t node=*pos++; michael@0: if(node>=kMinValueLead) { michael@0: if(node&kValueIsFinal) { michael@0: return 0; michael@0: } else { michael@0: pos=skipValue(pos, node); michael@0: node=*pos++; michael@0: U_ASSERT(nodekMaxBranchLinearSubNodeLength) { michael@0: ++pos; // ignore the comparison byte michael@0: getNextBranchBytes(jumpByDelta(pos), length>>1, out); michael@0: length=length-(length>>1); michael@0: pos=skipDelta(pos); michael@0: } michael@0: do { michael@0: append(out, *pos++); michael@0: pos=skipValue(pos); michael@0: } while(--length>1); michael@0: append(out, *pos); michael@0: } michael@0: michael@0: void michael@0: BytesTrie::append(ByteSink &out, int c) { michael@0: char ch=(char)c; michael@0: out.Append(&ch, 1); michael@0: } michael@0: michael@0: U_NAMESPACE_END