intl/icu/source/common/rbbirb.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 //
     2 //  file:  rbbirb.cpp
     3 //
     4 //  Copyright (C) 2002-2011, International Business Machines Corporation and others.
     5 //  All Rights Reserved.
     6 //
     7 //  This file contains the RBBIRuleBuilder class implementation.  This is the main class for
     8 //    building (compiling) break rules into the tables required by the runtime
     9 //    RBBI engine.
    10 //
    12 #include "unicode/utypes.h"
    14 #if !UCONFIG_NO_BREAK_ITERATION
    16 #include "unicode/brkiter.h"
    17 #include "unicode/rbbi.h"
    18 #include "unicode/ubrk.h"
    19 #include "unicode/unistr.h"
    20 #include "unicode/uniset.h"
    21 #include "unicode/uchar.h"
    22 #include "unicode/uchriter.h"
    23 #include "unicode/parsepos.h"
    24 #include "unicode/parseerr.h"
    25 #include "cmemory.h"
    26 #include "cstring.h"
    28 #include "rbbirb.h"
    29 #include "rbbinode.h"
    31 #include "rbbiscan.h"
    32 #include "rbbisetb.h"
    33 #include "rbbitblb.h"
    34 #include "rbbidata.h"
    37 U_NAMESPACE_BEGIN
    40 //----------------------------------------------------------------------------------------
    41 //
    42 //  Constructor.
    43 //
    44 //----------------------------------------------------------------------------------------
    45 RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString   &rules,
    46                                        UParseError     *parseErr,
    47                                        UErrorCode      &status)
    48  : fRules(rules)
    49 {
    50     fStatus = &status; // status is checked below
    51     fParseError = parseErr;
    52     fDebugEnv   = NULL;
    53 #ifdef RBBI_DEBUG
    54     fDebugEnv   = getenv("U_RBBIDEBUG");
    55 #endif
    58     fForwardTree        = NULL;
    59     fReverseTree        = NULL;
    60     fSafeFwdTree        = NULL;
    61     fSafeRevTree        = NULL;
    62     fDefaultTree        = &fForwardTree;
    63     fForwardTables      = NULL;
    64     fReverseTables      = NULL;
    65     fSafeFwdTables      = NULL;
    66     fSafeRevTables      = NULL;
    67     fRuleStatusVals     = NULL;
    68     fChainRules         = FALSE;
    69     fLBCMNoChain        = FALSE;
    70     fLookAheadHardBreak = FALSE;
    71     fUSetNodes          = NULL;
    72     fRuleStatusVals     = NULL;
    73     fScanner            = NULL;
    74     fSetBuilder         = NULL;
    75     if (parseErr) {
    76         uprv_memset(parseErr, 0, sizeof(UParseError));
    77     }
    79     if (U_FAILURE(status)) {
    80         return;
    81     }
    83     fUSetNodes          = new UVector(status); // bcos status gets overwritten here
    84     fRuleStatusVals     = new UVector(status);
    85     fScanner            = new RBBIRuleScanner(this);
    86     fSetBuilder         = new RBBISetBuilder(this);
    87     if (U_FAILURE(status)) {
    88         return;
    89     }
    90     if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
    91         status = U_MEMORY_ALLOCATION_ERROR;
    92     }
    93 }
    97 //----------------------------------------------------------------------------------------
    98 //
    99 //  Destructor
   100 //
   101 //----------------------------------------------------------------------------------------
   102 RBBIRuleBuilder::~RBBIRuleBuilder() {
   104     int        i;
   105     for (i=0; ; i++) {
   106         RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
   107         if (n==NULL) {
   108             break;
   109         }
   110         delete n;
   111     }
   113     delete fUSetNodes;
   114     delete fSetBuilder;
   115     delete fForwardTables;
   116     delete fReverseTables;
   117     delete fSafeFwdTables;
   118     delete fSafeRevTables;
   120     delete fForwardTree;
   121     delete fReverseTree;
   122     delete fSafeFwdTree;
   123     delete fSafeRevTree;
   124     delete fScanner;
   125     delete fRuleStatusVals;
   126 }
   132 //----------------------------------------------------------------------------------------
   133 //
   134 //   flattenData() -  Collect up the compiled RBBI rule data and put it into
   135 //                    the format for saving in ICU data files,
   136 //                    which is also the format needed by the RBBI runtime engine.
   137 //
   138 //----------------------------------------------------------------------------------------
   139 static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
   141 RBBIDataHeader *RBBIRuleBuilder::flattenData() {
   142     int32_t    i;
   144     if (U_FAILURE(*fStatus)) {
   145         return NULL;
   146     }
   148     // Remove comments and whitespace from the rules to make it smaller.
   149     UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
   151     // Calculate the size of each section in the data.
   152     //   Sizes here are padded up to a multiple of 8 for better memory alignment.
   153     //   Sections sizes actually stored in the header are for the actual data
   154     //     without the padding.
   155     //
   156     int32_t headerSize        = align8(sizeof(RBBIDataHeader));
   157     int32_t forwardTableSize  = align8(fForwardTables->getTableSize());
   158     int32_t reverseTableSize  = align8(fReverseTables->getTableSize());
   159     int32_t safeFwdTableSize  = align8(fSafeFwdTables->getTableSize());
   160     int32_t safeRevTableSize  = align8(fSafeRevTables->getTableSize());
   161     int32_t trieSize          = align8(fSetBuilder->getTrieSize());
   162     int32_t statusTableSize   = align8(fRuleStatusVals->size() * sizeof(int32_t));
   163     int32_t rulesSize         = align8((strippedRules.length()+1) * sizeof(UChar));
   165     int32_t         totalSize = headerSize + forwardTableSize + reverseTableSize
   166                                 + safeFwdTableSize + safeRevTableSize 
   167                                 + statusTableSize + trieSize + rulesSize;
   169     RBBIDataHeader  *data     = (RBBIDataHeader *)uprv_malloc(totalSize);
   170     if (data == NULL) {
   171         *fStatus = U_MEMORY_ALLOCATION_ERROR;
   172         return NULL;
   173     }
   174     uprv_memset(data, 0, totalSize);
   177     data->fMagic            = 0xb1a0;
   178     data->fFormatVersion[0] = 3;
   179     data->fFormatVersion[1] = 1;
   180     data->fFormatVersion[2] = 0;
   181     data->fFormatVersion[3] = 0;
   182     data->fLength           = totalSize;
   183     data->fCatCount         = fSetBuilder->getNumCharCategories();
   185     data->fFTable        = headerSize;
   186     data->fFTableLen     = forwardTableSize;
   187     data->fRTable        = data->fFTable  + forwardTableSize;
   188     data->fRTableLen     = reverseTableSize;
   189     data->fSFTable       = data->fRTable  + reverseTableSize;
   190     data->fSFTableLen    = safeFwdTableSize;
   191     data->fSRTable       = data->fSFTable + safeFwdTableSize;
   192     data->fSRTableLen    = safeRevTableSize;
   194     data->fTrie          = data->fSRTable + safeRevTableSize;
   195     data->fTrieLen       = fSetBuilder->getTrieSize();
   196     data->fStatusTable   = data->fTrie    + trieSize;
   197     data->fStatusTableLen= statusTableSize;
   198     data->fRuleSource    = data->fStatusTable + statusTableSize;
   199     data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
   201     uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
   203     fForwardTables->exportTable((uint8_t *)data + data->fFTable);
   204     fReverseTables->exportTable((uint8_t *)data + data->fRTable);
   205     fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
   206     fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
   207     fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
   209     int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
   210     for (i=0; i<fRuleStatusVals->size(); i++) {
   211         ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
   212     }
   214     strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
   216     return data;
   217 }
   224 //----------------------------------------------------------------------------------------
   225 //
   226 //  createRuleBasedBreakIterator    construct from source rules that are passed in
   227 //                                  in a UnicodeString
   228 //
   229 //----------------------------------------------------------------------------------------
   230 BreakIterator *
   231 RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString    &rules,
   232                                     UParseError      *parseError,
   233                                     UErrorCode       &status)
   234 {
   235     // status checked below
   237     //
   238     // Read the input rules, generate a parse tree, symbol table,
   239     // and list of all Unicode Sets referenced by the rules.
   240     //
   241     RBBIRuleBuilder  builder(rules, parseError, status);
   242     if (U_FAILURE(status)) { // status checked here bcos build below doesn't
   243         return NULL;
   244     }
   245     builder.fScanner->parse();
   247     //
   248     // UnicodeSet processing.
   249     //    Munge the Unicode Sets to create a set of character categories.
   250     //    Generate the mapping tables (TRIE) from input 32-bit characters to
   251     //    the character categories.
   252     //
   253     builder.fSetBuilder->build();
   256     //
   257     //   Generate the DFA state transition table.
   258     //
   259     builder.fForwardTables = new RBBITableBuilder(&builder, &builder.fForwardTree);
   260     builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTree);
   261     builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTree);
   262     builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTree);
   263     if (builder.fForwardTables == NULL || builder.fReverseTables == NULL ||
   264         builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL)
   265     {
   266         status = U_MEMORY_ALLOCATION_ERROR;
   267         delete builder.fForwardTables; builder.fForwardTables = NULL;
   268         delete builder.fReverseTables; builder.fReverseTables = NULL;
   269         delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL;
   270         delete builder.fSafeRevTables; builder.fSafeRevTables = NULL;
   271         return NULL;
   272     }
   274     builder.fForwardTables->build();
   275     builder.fReverseTables->build();
   276     builder.fSafeFwdTables->build();
   277     builder.fSafeRevTables->build();
   279 #ifdef RBBI_DEBUG
   280     if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
   281         builder.fForwardTables->printRuleStatusTable();
   282     }
   283 #endif
   285     //
   286     //   Package up the compiled data into a memory image
   287     //      in the run-time format.
   288     //
   289     RBBIDataHeader *data = builder.flattenData(); // returns NULL if error
   290     if (U_FAILURE(*builder.fStatus)) {
   291         return NULL;
   292     }
   295     //
   296     //  Clean up the compiler related stuff
   297     //
   300     //
   301     //  Create a break iterator from the compiled rules.
   302     //     (Identical to creation from stored pre-compiled rules)
   303     //
   304     // status is checked after init in construction.
   305     RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
   306     if (U_FAILURE(status)) {
   307         delete This;
   308         This = NULL;
   309     } 
   310     else if(This == NULL) { // test for NULL
   311         status = U_MEMORY_ALLOCATION_ERROR;
   312     }
   313     return This;
   314 }
   316 U_NAMESPACE_END
   318 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */

mercurial