intl/icu/source/common/bytestriebuilder.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 *******************************************************************************
     3 *   Copyright (C) 2010-2012, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 *   file name:  bytestriebuilder.cpp
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   created on: 2010sep25
    12 *   created by: Markus W. Scherer
    13 */
    15 #include "unicode/utypes.h"
    16 #include "unicode/bytestrie.h"
    17 #include "unicode/bytestriebuilder.h"
    18 #include "unicode/stringpiece.h"
    19 #include "charstr.h"
    20 #include "cmemory.h"
    21 #include "uhash.h"
    22 #include "uarrsort.h"
    23 #include "uassert.h"
    24 #include "ustr_imp.h"
    26 U_NAMESPACE_BEGIN
    28 /*
    29  * Note: This builder implementation stores (bytes, value) pairs with full copies
    30  * of the byte sequences, until the BytesTrie is built.
    31  * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
    32  */
    34 class BytesTrieElement : public UMemory {
    35 public:
    36     // Use compiler's default constructor, initializes nothing.
    38     void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode);
    40     StringPiece getString(const CharString &strings) const {
    41         int32_t offset=stringOffset;
    42         int32_t length;
    43         if(offset>=0) {
    44             length=(uint8_t)strings[offset++];
    45         } else {
    46             offset=~offset;
    47             length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
    48             offset+=2;
    49         }
    50         return StringPiece(strings.data()+offset, length);
    51     }
    52     int32_t getStringLength(const CharString &strings) const {
    53         int32_t offset=stringOffset;
    54         if(offset>=0) {
    55             return (uint8_t)strings[offset];
    56         } else {
    57             offset=~offset;
    58             return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
    59         }
    60     }
    62     char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
    64     int32_t getValue() const { return value; }
    66     int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
    68 private:
    69     const char *data(const CharString &strings) const {
    70         int32_t offset=stringOffset;
    71         if(offset>=0) {
    72             ++offset;
    73         } else {
    74             offset=~offset+2;
    75         }
    76         return strings.data()+offset;
    77     }
    79     // If the stringOffset is non-negative, then the first strings byte contains
    80     // the string length.
    81     // If the stringOffset is negative, then the first two strings bytes contain
    82     // the string length (big-endian), and the offset needs to be bit-inverted.
    83     // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
    84     int32_t stringOffset;
    85     int32_t value;
    86 };
    88 void
    89 BytesTrieElement::setTo(const StringPiece &s, int32_t val,
    90                         CharString &strings, UErrorCode &errorCode) {
    91     if(U_FAILURE(errorCode)) {
    92         return;
    93     }
    94     int32_t length=s.length();
    95     if(length>0xffff) {
    96         // Too long: We store the length in 1 or 2 bytes.
    97         errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    98         return;
    99     }
   100     int32_t offset=strings.length();
   101     if(length>0xff) {
   102         offset=~offset;
   103         strings.append((char)(length>>8), errorCode);
   104     }
   105     strings.append((char)length, errorCode);
   106     stringOffset=offset;
   107     value=val;
   108     strings.append(s, errorCode);
   109 }
   111 int32_t
   112 BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
   113     // TODO: add StringPiece::compare(), see ticket #8187
   114     StringPiece thisString=getString(strings);
   115     StringPiece otherString=other.getString(strings);
   116     int32_t lengthDiff=thisString.length()-otherString.length();
   117     int32_t commonLength;
   118     if(lengthDiff<=0) {
   119         commonLength=thisString.length();
   120     } else {
   121         commonLength=otherString.length();
   122     }
   123     int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
   124     return diff!=0 ? diff : lengthDiff;
   125 }
   127 BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
   128         : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
   129           bytes(NULL), bytesCapacity(0), bytesLength(0) {
   130     if(U_FAILURE(errorCode)) {
   131         return;
   132     }
   133     strings=new CharString();
   134     if(strings==NULL) {
   135         errorCode=U_MEMORY_ALLOCATION_ERROR;
   136     }
   137 }
   139 BytesTrieBuilder::~BytesTrieBuilder() {
   140     delete strings;
   141     delete[] elements;
   142     uprv_free(bytes);
   143 }
   145 BytesTrieBuilder &
   146 BytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
   147     if(U_FAILURE(errorCode)) {
   148         return *this;
   149     }
   150     if(bytesLength>0) {
   151         // Cannot add elements after building.
   152         errorCode=U_NO_WRITE_PERMISSION;
   153         return *this;
   154     }
   155     if(elementsLength==elementsCapacity) {
   156         int32_t newCapacity;
   157         if(elementsCapacity==0) {
   158             newCapacity=1024;
   159         } else {
   160             newCapacity=4*elementsCapacity;
   161         }
   162         BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
   163         if(newElements==NULL) {
   164             errorCode=U_MEMORY_ALLOCATION_ERROR;
   165             return *this; // error instead of dereferencing null
   166         }
   167         if(elementsLength>0) {
   168             uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement));
   169         }
   170         delete[] elements;
   171         elements=newElements;
   172         elementsCapacity=newCapacity;
   173     }
   174     elements[elementsLength++].setTo(s, value, *strings, errorCode);
   175     return *this;
   176 }
   178 U_CDECL_BEGIN
   180 static int32_t U_CALLCONV
   181 compareElementStrings(const void *context, const void *left, const void *right) {
   182     const CharString *strings=static_cast<const CharString *>(context);
   183     const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
   184     const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
   185     return leftElement->compareStringTo(*rightElement, *strings);
   186 }
   188 U_CDECL_END
   190 BytesTrie *
   191 BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
   192     buildBytes(buildOption, errorCode);
   193     BytesTrie *newTrie=NULL;
   194     if(U_SUCCESS(errorCode)) {
   195         newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
   196         if(newTrie==NULL) {
   197             errorCode=U_MEMORY_ALLOCATION_ERROR;
   198         } else {
   199             bytes=NULL;  // The new trie now owns the array.
   200             bytesCapacity=0;
   201         }
   202     }
   203     return newTrie;
   204 }
   206 StringPiece
   207 BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
   208     buildBytes(buildOption, errorCode);
   209     StringPiece result;
   210     if(U_SUCCESS(errorCode)) {
   211         result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
   212     }
   213     return result;
   214 }
   216 void
   217 BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
   218     if(U_FAILURE(errorCode)) {
   219         return;
   220     }
   221     if(bytes!=NULL && bytesLength>0) {
   222         // Already built.
   223         return;
   224     }
   225     if(bytesLength==0) {
   226         if(elementsLength==0) {
   227             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
   228             return;
   229         }
   230         uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
   231                       compareElementStrings, strings,
   232                       FALSE,  // need not be a stable sort
   233                       &errorCode);
   234         if(U_FAILURE(errorCode)) {
   235             return;
   236         }
   237         // Duplicate strings are not allowed.
   238         StringPiece prev=elements[0].getString(*strings);
   239         for(int32_t i=1; i<elementsLength; ++i) {
   240             StringPiece current=elements[i].getString(*strings);
   241             if(prev==current) {
   242                 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
   243                 return;
   244             }
   245             prev=current;
   246         }
   247     }
   248     // Create and byte-serialize the trie for the elements.
   249     bytesLength=0;
   250     int32_t capacity=strings->length();
   251     if(capacity<1024) {
   252         capacity=1024;
   253     }
   254     if(bytesCapacity<capacity) {
   255         uprv_free(bytes);
   256         bytes=static_cast<char *>(uprv_malloc(capacity));
   257         if(bytes==NULL) {
   258             errorCode=U_MEMORY_ALLOCATION_ERROR;
   259             bytesCapacity=0;
   260             return;
   261         }
   262         bytesCapacity=capacity;
   263     }
   264     StringTrieBuilder::build(buildOption, elementsLength, errorCode);
   265     if(bytes==NULL) {
   266         errorCode=U_MEMORY_ALLOCATION_ERROR;
   267     }
   268 }
   270 BytesTrieBuilder &
   271 BytesTrieBuilder::clear() {
   272     strings->clear();
   273     elementsLength=0;
   274     bytesLength=0;
   275     return *this;
   276 }
   278 int32_t
   279 BytesTrieBuilder::getElementStringLength(int32_t i) const {
   280     return elements[i].getStringLength(*strings);
   281 }
   283 UChar
   284 BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
   285     return (uint8_t)elements[i].charAt(byteIndex, *strings);
   286 }
   288 int32_t
   289 BytesTrieBuilder::getElementValue(int32_t i) const {
   290     return elements[i].getValue();
   291 }
   293 int32_t
   294 BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
   295     const BytesTrieElement &firstElement=elements[first];
   296     const BytesTrieElement &lastElement=elements[last];
   297     int32_t minStringLength=firstElement.getStringLength(*strings);
   298     while(++byteIndex<minStringLength &&
   299             firstElement.charAt(byteIndex, *strings)==
   300             lastElement.charAt(byteIndex, *strings)) {}
   301     return byteIndex;
   302 }
   304 int32_t
   305 BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
   306     int32_t length=0;  // Number of different bytes at byteIndex.
   307     int32_t i=start;
   308     do {
   309         char byte=elements[i++].charAt(byteIndex, *strings);
   310         while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
   311             ++i;
   312         }
   313         ++length;
   314     } while(i<limit);
   315     return length;
   316 }
   318 int32_t
   319 BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
   320     do {
   321         char byte=elements[i++].charAt(byteIndex, *strings);
   322         while(byte==elements[i].charAt(byteIndex, *strings)) {
   323             ++i;
   324         }
   325     } while(--count>0);
   326     return i;
   327 }
   329 int32_t
   330 BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
   331     char b=(char)byte;
   332     while(b==elements[i].charAt(byteIndex, *strings)) {
   333         ++i;
   334     }
   335     return i;
   336 }
   338 BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
   339         : LinearMatchNode(len, nextNode), s(bytes) {
   340     hash=hash*37+ustr_hashCharsN(bytes, len);
   341 }
   343 UBool
   344 BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
   345     if(this==&other) {
   346         return TRUE;
   347     }
   348     if(!LinearMatchNode::operator==(other)) {
   349         return FALSE;
   350     }
   351     const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
   352     return 0==uprv_memcmp(s, o.s, length);
   353 }
   355 void
   356 BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
   357     BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
   358     next->write(builder);
   359     b.write(s, length);
   360     offset=b.write(b.getMinLinearMatch()+length-1);
   361 }
   363 StringTrieBuilder::Node *
   364 BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
   365                                         Node *nextNode) const {
   366     return new BTLinearMatchNode(
   367             elements[i].getString(*strings).data()+byteIndex,
   368             length,
   369             nextNode);
   370 }
   372 UBool
   373 BytesTrieBuilder::ensureCapacity(int32_t length) {
   374     if(bytes==NULL) {
   375         return FALSE;  // previous memory allocation had failed
   376     }
   377     if(length>bytesCapacity) {
   378         int32_t newCapacity=bytesCapacity;
   379         do {
   380             newCapacity*=2;
   381         } while(newCapacity<=length);
   382         char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
   383         if(newBytes==NULL) {
   384             // unable to allocate memory
   385             uprv_free(bytes);
   386             bytes=NULL;
   387             bytesCapacity=0;
   388             return FALSE;
   389         }
   390         uprv_memcpy(newBytes+(newCapacity-bytesLength),
   391                     bytes+(bytesCapacity-bytesLength), bytesLength);
   392         uprv_free(bytes);
   393         bytes=newBytes;
   394         bytesCapacity=newCapacity;
   395     }
   396     return TRUE;
   397 }
   399 int32_t
   400 BytesTrieBuilder::write(int32_t byte) {
   401     int32_t newLength=bytesLength+1;
   402     if(ensureCapacity(newLength)) {
   403         bytesLength=newLength;
   404         bytes[bytesCapacity-bytesLength]=(char)byte;
   405     }
   406     return bytesLength;
   407 }
   409 int32_t
   410 BytesTrieBuilder::write(const char *b, int32_t length) {
   411     int32_t newLength=bytesLength+length;
   412     if(ensureCapacity(newLength)) {
   413         bytesLength=newLength;
   414         uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
   415     }
   416     return bytesLength;
   417 }
   419 int32_t
   420 BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
   421     return write(elements[i].getString(*strings).data()+byteIndex, length);
   422 }
   424 int32_t
   425 BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
   426     if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
   427         return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
   428     }
   429     char intBytes[5];
   430     int32_t length=1;
   431     if(i<0 || i>0xffffff) {
   432         intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
   433         intBytes[1]=(char)((uint32_t)i>>24);
   434         intBytes[2]=(char)((uint32_t)i>>16);
   435         intBytes[3]=(char)((uint32_t)i>>8);
   436         intBytes[4]=(char)i;
   437         length=5;
   438     // } else if(i<=BytesTrie::kMaxOneByteValue) {
   439     //     intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
   440     } else {
   441         if(i<=BytesTrie::kMaxTwoByteValue) {
   442             intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
   443         } else {
   444             if(i<=BytesTrie::kMaxThreeByteValue) {
   445                 intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
   446             } else {
   447                 intBytes[0]=(char)BytesTrie::kFourByteValueLead;
   448                 intBytes[1]=(char)(i>>16);
   449                 length=2;
   450             }
   451             intBytes[length++]=(char)(i>>8);
   452         }
   453         intBytes[length++]=(char)i;
   454     }
   455     intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
   456     return write(intBytes, length);
   457 }
   459 int32_t
   460 BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
   461     int32_t offset=write(node);
   462     if(hasValue) {
   463         offset=writeValueAndFinal(value, FALSE);
   464     }
   465     return offset;
   466 }
   468 int32_t
   469 BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
   470     int32_t i=bytesLength-jumpTarget;
   471     U_ASSERT(i>=0);
   472     if(i<=BytesTrie::kMaxOneByteDelta) {
   473         return write(i);
   474     }
   475     char intBytes[5];
   476     int32_t length;
   477     if(i<=BytesTrie::kMaxTwoByteDelta) {
   478         intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
   479         length=1;
   480     } else {
   481         if(i<=BytesTrie::kMaxThreeByteDelta) {
   482             intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
   483             length=2;
   484         } else {
   485             if(i<=0xffffff) {
   486                 intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
   487                 length=3;
   488             } else {
   489                 intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
   490                 intBytes[1]=(char)(i>>24);
   491                 length=4;
   492             }
   493             intBytes[1]=(char)(i>>16);
   494         }
   495         intBytes[1]=(char)(i>>8);
   496     }
   497     intBytes[length++]=(char)i;
   498     return write(intBytes, length);
   499 }
   501 U_NAMESPACE_END

mercurial