intl/icu/source/common/bytestrie.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /*
     2 *******************************************************************************
     3 *   Copyright (C) 2010-2011, International Business Machines
     4 *   Corporation and others.  All Rights Reserved.
     5 *******************************************************************************
     6 *   file name:  bytestrie.cpp
     7 *   encoding:   US-ASCII
     8 *   tab size:   8 (not used)
     9 *   indentation:4
    10 *
    11 *   created on: 2010sep25
    12 *   created by: Markus W. Scherer
    13 */
    15 #include "unicode/utypes.h"
    16 #include "unicode/bytestream.h"
    17 #include "unicode/bytestrie.h"
    18 #include "unicode/uobject.h"
    19 #include "cmemory.h"
    20 #include "uassert.h"
    22 U_NAMESPACE_BEGIN
    24 BytesTrie::~BytesTrie() {
    25     uprv_free(ownedArray_);
    26 }
    28 // lead byte already shifted right by 1.
    29 int32_t
    30 BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
    31     int32_t value;
    32     if(leadByte<kMinTwoByteValueLead) {
    33         value=leadByte-kMinOneByteValueLead;
    34     } else if(leadByte<kMinThreeByteValueLead) {
    35         value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
    36     } else if(leadByte<kFourByteValueLead) {
    37         value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
    38     } else if(leadByte==kFourByteValueLead) {
    39         value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
    40     } else {
    41         value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
    42     }
    43     return value;
    44 }
    46 const uint8_t *
    47 BytesTrie::jumpByDelta(const uint8_t *pos) {
    48     int32_t delta=*pos++;
    49     if(delta<kMinTwoByteDeltaLead) {
    50         // nothing to do
    51     } else if(delta<kMinThreeByteDeltaLead) {
    52         delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
    53     } else if(delta<kFourByteDeltaLead) {
    54         delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
    55         pos+=2;
    56     } else if(delta==kFourByteDeltaLead) {
    57         delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
    58         pos+=3;
    59     } else {
    60         delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
    61         pos+=4;
    62     }
    63     return pos+delta;
    64 }
    66 UStringTrieResult
    67 BytesTrie::current() const {
    68     const uint8_t *pos=pos_;
    69     if(pos==NULL) {
    70         return USTRINGTRIE_NO_MATCH;
    71     } else {
    72         int32_t node;
    73         return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
    74                 valueResult(node) : USTRINGTRIE_NO_VALUE;
    75     }
    76 }
    78 UStringTrieResult
    79 BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
    80     // Branch according to the current byte.
    81     if(length==0) {
    82         length=*pos++;
    83     }
    84     ++length;
    85     // The length of the branch is the number of bytes to select from.
    86     // The data structure encodes a binary search.
    87     while(length>kMaxBranchLinearSubNodeLength) {
    88         if(inByte<*pos++) {
    89             length>>=1;
    90             pos=jumpByDelta(pos);
    91         } else {
    92             length=length-(length>>1);
    93             pos=skipDelta(pos);
    94         }
    95     }
    96     // Drop down to linear search for the last few bytes.
    97     // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
    98     // and divides length by 2.
    99     do {
   100         if(inByte==*pos++) {
   101             UStringTrieResult result;
   102             int32_t node=*pos;
   103             U_ASSERT(node>=kMinValueLead);
   104             if(node&kValueIsFinal) {
   105                 // Leave the final value for getValue() to read.
   106                 result=USTRINGTRIE_FINAL_VALUE;
   107             } else {
   108                 // Use the non-final value as the jump delta.
   109                 ++pos;
   110                 // int32_t delta=readValue(pos, node>>1);
   111                 node>>=1;
   112                 int32_t delta;
   113                 if(node<kMinTwoByteValueLead) {
   114                     delta=node-kMinOneByteValueLead;
   115                 } else if(node<kMinThreeByteValueLead) {
   116                     delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
   117                 } else if(node<kFourByteValueLead) {
   118                     delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
   119                     pos+=2;
   120                 } else if(node==kFourByteValueLead) {
   121                     delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
   122                     pos+=3;
   123                 } else {
   124                     delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
   125                     pos+=4;
   126                 }
   127                 // end readValue()
   128                 pos+=delta;
   129                 node=*pos;
   130                 result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
   131             }
   132             pos_=pos;
   133             return result;
   134         }
   135         --length;
   136         pos=skipValue(pos);
   137     } while(length>1);
   138     if(inByte==*pos++) {
   139         pos_=pos;
   140         int32_t node=*pos;
   141         return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
   142     } else {
   143         stop();
   144         return USTRINGTRIE_NO_MATCH;
   145     }
   146 }
   148 UStringTrieResult
   149 BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
   150     for(;;) {
   151         int32_t node=*pos++;
   152         if(node<kMinLinearMatch) {
   153             return branchNext(pos, node, inByte);
   154         } else if(node<kMinValueLead) {
   155             // Match the first of length+1 bytes.
   156             int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
   157             if(inByte==*pos++) {
   158                 remainingMatchLength_=--length;
   159                 pos_=pos;
   160                 return (length<0 && (node=*pos)>=kMinValueLead) ?
   161                         valueResult(node) : USTRINGTRIE_NO_VALUE;
   162             } else {
   163                 // No match.
   164                 break;
   165             }
   166         } else if(node&kValueIsFinal) {
   167             // No further matching bytes.
   168             break;
   169         } else {
   170             // Skip intermediate value.
   171             pos=skipValue(pos, node);
   172             // The next node must not also be a value node.
   173             U_ASSERT(*pos<kMinValueLead);
   174         }
   175     }
   176     stop();
   177     return USTRINGTRIE_NO_MATCH;
   178 }
   180 UStringTrieResult
   181 BytesTrie::next(int32_t inByte) {
   182     const uint8_t *pos=pos_;
   183     if(pos==NULL) {
   184         return USTRINGTRIE_NO_MATCH;
   185     }
   186     if(inByte<0) {
   187         inByte+=0x100;
   188     }
   189     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
   190     if(length>=0) {
   191         // Remaining part of a linear-match node.
   192         if(inByte==*pos++) {
   193             remainingMatchLength_=--length;
   194             pos_=pos;
   195             int32_t node;
   196             return (length<0 && (node=*pos)>=kMinValueLead) ?
   197                     valueResult(node) : USTRINGTRIE_NO_VALUE;
   198         } else {
   199             stop();
   200             return USTRINGTRIE_NO_MATCH;
   201         }
   202     }
   203     return nextImpl(pos, inByte);
   204 }
   206 UStringTrieResult
   207 BytesTrie::next(const char *s, int32_t sLength) {
   208     if(sLength<0 ? *s==0 : sLength==0) {
   209         // Empty input.
   210         return current();
   211     }
   212     const uint8_t *pos=pos_;
   213     if(pos==NULL) {
   214         return USTRINGTRIE_NO_MATCH;
   215     }
   216     int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
   217     for(;;) {
   218         // Fetch the next input byte, if there is one.
   219         // Continue a linear-match node without rechecking sLength<0.
   220         int32_t inByte;
   221         if(sLength<0) {
   222             for(;;) {
   223                 if((inByte=*s++)==0) {
   224                     remainingMatchLength_=length;
   225                     pos_=pos;
   226                     int32_t node;
   227                     return (length<0 && (node=*pos)>=kMinValueLead) ?
   228                             valueResult(node) : USTRINGTRIE_NO_VALUE;
   229                 }
   230                 if(length<0) {
   231                     remainingMatchLength_=length;
   232                     break;
   233                 }
   234                 if(inByte!=*pos) {
   235                     stop();
   236                     return USTRINGTRIE_NO_MATCH;
   237                 }
   238                 ++pos;
   239                 --length;
   240             }
   241         } else {
   242             for(;;) {
   243                 if(sLength==0) {
   244                     remainingMatchLength_=length;
   245                     pos_=pos;
   246                     int32_t node;
   247                     return (length<0 && (node=*pos)>=kMinValueLead) ?
   248                             valueResult(node) : USTRINGTRIE_NO_VALUE;
   249                 }
   250                 inByte=*s++;
   251                 --sLength;
   252                 if(length<0) {
   253                     remainingMatchLength_=length;
   254                     break;
   255                 }
   256                 if(inByte!=*pos) {
   257                     stop();
   258                     return USTRINGTRIE_NO_MATCH;
   259                 }
   260                 ++pos;
   261                 --length;
   262             }
   263         }
   264         for(;;) {
   265             int32_t node=*pos++;
   266             if(node<kMinLinearMatch) {
   267                 UStringTrieResult result=branchNext(pos, node, inByte);
   268                 if(result==USTRINGTRIE_NO_MATCH) {
   269                     return USTRINGTRIE_NO_MATCH;
   270                 }
   271                 // Fetch the next input byte, if there is one.
   272                 if(sLength<0) {
   273                     if((inByte=*s++)==0) {
   274                         return result;
   275                     }
   276                 } else {
   277                     if(sLength==0) {
   278                         return result;
   279                     }
   280                     inByte=*s++;
   281                     --sLength;
   282                 }
   283                 if(result==USTRINGTRIE_FINAL_VALUE) {
   284                     // No further matching bytes.
   285                     stop();
   286                     return USTRINGTRIE_NO_MATCH;
   287                 }
   288                 pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
   289             } else if(node<kMinValueLead) {
   290                 // Match length+1 bytes.
   291                 length=node-kMinLinearMatch;  // Actual match length minus 1.
   292                 if(inByte!=*pos) {
   293                     stop();
   294                     return USTRINGTRIE_NO_MATCH;
   295                 }
   296                 ++pos;
   297                 --length;
   298                 break;
   299             } else if(node&kValueIsFinal) {
   300                 // No further matching bytes.
   301                 stop();
   302                 return USTRINGTRIE_NO_MATCH;
   303             } else {
   304                 // Skip intermediate value.
   305                 pos=skipValue(pos, node);
   306                 // The next node must not also be a value node.
   307                 U_ASSERT(*pos<kMinValueLead);
   308             }
   309         }
   310     }
   311 }
   313 const uint8_t *
   314 BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
   315                                      UBool haveUniqueValue, int32_t &uniqueValue) {
   316     while(length>kMaxBranchLinearSubNodeLength) {
   317         ++pos;  // ignore the comparison byte
   318         if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
   319             return NULL;
   320         }
   321         length=length-(length>>1);
   322         pos=skipDelta(pos);
   323     }
   324     do {
   325         ++pos;  // ignore a comparison byte
   326         // handle its value
   327         int32_t node=*pos++;
   328         UBool isFinal=(UBool)(node&kValueIsFinal);
   329         int32_t value=readValue(pos, node>>1);
   330         pos=skipValue(pos, node);
   331         if(isFinal) {
   332             if(haveUniqueValue) {
   333                 if(value!=uniqueValue) {
   334                     return NULL;
   335                 }
   336             } else {
   337                 uniqueValue=value;
   338                 haveUniqueValue=TRUE;
   339             }
   340         } else {
   341             if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
   342                 return NULL;
   343             }
   344             haveUniqueValue=TRUE;
   345         }
   346     } while(--length>1);
   347     return pos+1;  // ignore the last comparison byte
   348 }
   350 UBool
   351 BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
   352     for(;;) {
   353         int32_t node=*pos++;
   354         if(node<kMinLinearMatch) {
   355             if(node==0) {
   356                 node=*pos++;
   357             }
   358             pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
   359             if(pos==NULL) {
   360                 return FALSE;
   361             }
   362             haveUniqueValue=TRUE;
   363         } else if(node<kMinValueLead) {
   364             // linear-match node
   365             pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
   366         } else {
   367             UBool isFinal=(UBool)(node&kValueIsFinal);
   368             int32_t value=readValue(pos, node>>1);
   369             if(haveUniqueValue) {
   370                 if(value!=uniqueValue) {
   371                     return FALSE;
   372                 }
   373             } else {
   374                 uniqueValue=value;
   375                 haveUniqueValue=TRUE;
   376             }
   377             if(isFinal) {
   378                 return TRUE;
   379             }
   380             pos=skipValue(pos, node);
   381         }
   382     }
   383 }
   385 int32_t
   386 BytesTrie::getNextBytes(ByteSink &out) const {
   387     const uint8_t *pos=pos_;
   388     if(pos==NULL) {
   389         return 0;
   390     }
   391     if(remainingMatchLength_>=0) {
   392         append(out, *pos);  // Next byte of a pending linear-match node.
   393         return 1;
   394     }
   395     int32_t node=*pos++;
   396     if(node>=kMinValueLead) {
   397         if(node&kValueIsFinal) {
   398             return 0;
   399         } else {
   400             pos=skipValue(pos, node);
   401             node=*pos++;
   402             U_ASSERT(node<kMinValueLead);
   403         }
   404     }
   405     if(node<kMinLinearMatch) {
   406         if(node==0) {
   407             node=*pos++;
   408         }
   409         getNextBranchBytes(pos, ++node, out);
   410         return node;
   411     } else {
   412         // First byte of the linear-match node.
   413         append(out, *pos);
   414         return 1;
   415     }
   416 }
   418 void
   419 BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
   420     while(length>kMaxBranchLinearSubNodeLength) {
   421         ++pos;  // ignore the comparison byte
   422         getNextBranchBytes(jumpByDelta(pos), length>>1, out);
   423         length=length-(length>>1);
   424         pos=skipDelta(pos);
   425     }
   426     do {
   427         append(out, *pos++);
   428         pos=skipValue(pos);
   429     } while(--length>1);
   430     append(out, *pos);
   431 }
   433 void
   434 BytesTrie::append(ByteSink &out, int c) {
   435     char ch=(char)c;
   436     out.Append(&ch, 1);
   437 }
   439 U_NAMESPACE_END

mercurial