js/src/yarr/YarrInterpreter.h

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  * vim: set ts=8 sts=4 et sw=4 tw=99:
     3  *
     4  * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions
     8  * are met:
     9  * 1. Redistributions of source code must retain the above copyright
    10  *    notice, this list of conditions and the following disclaimer.
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  *    notice, this list of conditions and the following disclaimer in the
    13  *    documentation and/or other materials provided with the distribution.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
    16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
    19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    26  */
    28 #ifndef yarr_YarrInterpreter_h
    29 #define yarr_YarrInterpreter_h
    31 #include "yarr/YarrPattern.h"
    33 namespace WTF {
    34 class BumpPointerAllocator;
    35 }
    36 using WTF::BumpPointerAllocator;
    38 namespace JSC { namespace Yarr {
    40 class ByteDisjunction;
    42 struct ByteTerm {
    43     enum Type {
    44         TypeBodyAlternativeBegin,
    45         TypeBodyAlternativeDisjunction,
    46         TypeBodyAlternativeEnd,
    47         TypeAlternativeBegin,
    48         TypeAlternativeDisjunction,
    49         TypeAlternativeEnd,
    50         TypeSubpatternBegin,
    51         TypeSubpatternEnd,
    52         TypeAssertionBOL,
    53         TypeAssertionEOL,
    54         TypeAssertionWordBoundary,
    55         TypePatternCharacterOnce,
    56         TypePatternCharacterFixed,
    57         TypePatternCharacterGreedy,
    58         TypePatternCharacterNonGreedy,
    59         TypePatternCasedCharacterOnce,
    60         TypePatternCasedCharacterFixed,
    61         TypePatternCasedCharacterGreedy,
    62         TypePatternCasedCharacterNonGreedy,
    63         TypeCharacterClass,
    64         TypeBackReference,
    65         TypeParenthesesSubpattern,
    66         TypeParenthesesSubpatternOnceBegin,
    67         TypeParenthesesSubpatternOnceEnd,
    68         TypeParenthesesSubpatternTerminalBegin,
    69         TypeParenthesesSubpatternTerminalEnd,
    70         TypeParentheticalAssertionBegin,
    71         TypeParentheticalAssertionEnd,
    72         TypeCheckInput,
    73         TypeUncheckInput,
    74         TypeDotStarEnclosure
    75     } type;
    76     union {
    77         struct {
    78             union {
    79                 UChar patternCharacter;
    80                 struct {
    81                     UChar lo;
    82                     UChar hi;
    83                 } casedCharacter;
    84                 CharacterClass* characterClass;
    85                 unsigned subpatternId;
    86             };
    87             union {
    88                 ByteDisjunction* parenthesesDisjunction;
    89                 unsigned parenthesesWidth;
    90             };
    91             QuantifierType quantityType;
    92             unsigned quantityCount;
    93         } atom;
    94         struct {
    95             int next;
    96             int end;
    97             bool onceThrough;
    98         } alternative;
    99         struct {
   100             bool m_bol : 1;
   101             bool m_eol : 1;
   102         } anchors;
   103         unsigned checkInputCount;
   104     };
   105     unsigned frameLocation;
   106     bool m_capture : 1;
   107     bool m_invert : 1;
   108     unsigned inputPosition;
   110     ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
   111         : frameLocation(frameLocation)
   112         , m_capture(false)
   113         , m_invert(false)
   114     {
   115         switch (quantityType) {
   116         case QuantifierFixedCount:
   117             type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
   118             break;
   119         case QuantifierGreedy:
   120             type = ByteTerm::TypePatternCharacterGreedy;
   121             break;
   122         case QuantifierNonGreedy:
   123             type = ByteTerm::TypePatternCharacterNonGreedy;
   124             break;
   125         }
   127         atom.patternCharacter = ch;
   128         atom.quantityType = quantityType;
   129         atom.quantityCount = quantityCount.unsafeGet();
   130         inputPosition = inputPos;
   131     }
   133     ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
   134         : frameLocation(frameLocation)
   135         , m_capture(false)
   136         , m_invert(false)
   137     {
   138         switch (quantityType) {
   139         case QuantifierFixedCount:
   140             type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
   141             break;
   142         case QuantifierGreedy:
   143             type = ByteTerm::TypePatternCasedCharacterGreedy;
   144             break;
   145         case QuantifierNonGreedy:
   146             type = ByteTerm::TypePatternCasedCharacterNonGreedy;
   147             break;
   148         }
   150         atom.casedCharacter.lo = lo;
   151         atom.casedCharacter.hi = hi;
   152         atom.quantityType = quantityType;
   153         atom.quantityCount = quantityCount.unsafeGet();
   154         inputPosition = inputPos;
   155     }
   157     ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
   158         : type(ByteTerm::TypeCharacterClass)
   159         , m_capture(false)
   160         , m_invert(invert)
   161     {
   162         atom.characterClass = characterClass;
   163         atom.quantityType = QuantifierFixedCount;
   164         atom.quantityCount = 1;
   165         inputPosition = inputPos;
   166     }
   168     ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
   169         : type(type)
   170         , m_capture(capture)
   171         , m_invert(false)
   172     {
   173         atom.subpatternId = subpatternId;
   174         atom.parenthesesDisjunction = parenthesesInfo;
   175         atom.quantityType = QuantifierFixedCount;
   176         atom.quantityCount = 1;
   177         inputPosition = inputPos;
   178     }
   180     ByteTerm(Type type, bool invert = false)
   181         : type(type)
   182         , m_capture(false)
   183         , m_invert(invert)
   184     {
   185         atom.quantityType = QuantifierFixedCount;
   186         atom.quantityCount = 1;
   187     }
   189     ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
   190         : type(type)
   191         , m_capture(capture)
   192         , m_invert(invert)
   193     {
   194         atom.subpatternId = subpatternId;
   195         atom.quantityType = QuantifierFixedCount;
   196         atom.quantityCount = 1;
   197         inputPosition = inputPos;
   198     }
   200     // For js::Vector. Does not create a valid object.
   201     ByteTerm()
   202     {
   203     }
   205     static ByteTerm BOL(int inputPos)
   206     {
   207         ByteTerm term(TypeAssertionBOL);
   208         term.inputPosition = inputPos;
   209         return term;
   210     }
   212     static ByteTerm CheckInput(Checked<unsigned> count)
   213     {
   214         ByteTerm term(TypeCheckInput);
   215         term.checkInputCount = count.unsafeGet();
   216         return term;
   217     }
   219     static ByteTerm UncheckInput(Checked<unsigned> count)
   220     {
   221         ByteTerm term(TypeUncheckInput);
   222         term.checkInputCount = count.unsafeGet();
   223         return term;
   224     }
   226     static ByteTerm EOL(int inputPos)
   227     {
   228         ByteTerm term(TypeAssertionEOL);
   229         term.inputPosition = inputPos;
   230         return term;
   231     }
   233     static ByteTerm WordBoundary(bool invert, int inputPos)
   234     {
   235         ByteTerm term(TypeAssertionWordBoundary, invert);
   236         term.inputPosition = inputPos;
   237         return term;
   238     }
   240     static ByteTerm BackReference(unsigned subpatternId, int inputPos)
   241     {
   242         return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
   243     }
   245     static ByteTerm BodyAlternativeBegin(bool onceThrough)
   246     {
   247         ByteTerm term(TypeBodyAlternativeBegin);
   248         term.alternative.next = 0;
   249         term.alternative.end = 0;
   250         term.alternative.onceThrough = onceThrough;
   251         return term;
   252     }
   254     static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
   255     {
   256         ByteTerm term(TypeBodyAlternativeDisjunction);
   257         term.alternative.next = 0;
   258         term.alternative.end = 0;
   259         term.alternative.onceThrough = onceThrough;
   260         return term;
   261     }
   263     static ByteTerm BodyAlternativeEnd()
   264     {
   265         ByteTerm term(TypeBodyAlternativeEnd);
   266         term.alternative.next = 0;
   267         term.alternative.end = 0;
   268         term.alternative.onceThrough = false;
   269         return term;
   270     }
   272     static ByteTerm AlternativeBegin()
   273     {
   274         ByteTerm term(TypeAlternativeBegin);
   275         term.alternative.next = 0;
   276         term.alternative.end = 0;
   277         term.alternative.onceThrough = false;
   278         return term;
   279     }
   281     static ByteTerm AlternativeDisjunction()
   282     {
   283         ByteTerm term(TypeAlternativeDisjunction);
   284         term.alternative.next = 0;
   285         term.alternative.end = 0;
   286         term.alternative.onceThrough = false;
   287         return term;
   288     }
   290     static ByteTerm AlternativeEnd()
   291     {
   292         ByteTerm term(TypeAlternativeEnd);
   293         term.alternative.next = 0;
   294         term.alternative.end = 0;
   295         term.alternative.onceThrough = false;
   296         return term;
   297     }
   299     static ByteTerm SubpatternBegin()
   300     {
   301         return ByteTerm(TypeSubpatternBegin);
   302     }
   304     static ByteTerm SubpatternEnd()
   305     {
   306         return ByteTerm(TypeSubpatternEnd);
   307     }
   309     static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
   310     {
   311         ByteTerm term(TypeDotStarEnclosure);
   312         term.anchors.m_bol = bolAnchor;
   313         term.anchors.m_eol = eolAnchor;
   314         return term;
   315     }
   317     bool invert()
   318     {
   319         return m_invert;
   320     }
   322     bool capture()
   323     {
   324         return m_capture;
   325     }
   326 };
   328 class ByteDisjunction {
   329     WTF_MAKE_FAST_ALLOCATED;
   330 public:
   331     ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
   332         : m_numSubpatterns(numSubpatterns)
   333         , m_frameSize(frameSize)
   334     {
   335     }
   337     Vector<ByteTerm> terms;
   338     unsigned m_numSubpatterns;
   339     unsigned m_frameSize;
   340 };
   342 struct BytecodePattern {
   343     WTF_MAKE_FAST_ALLOCATED;
   344 public:
   345     BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> &allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
   346         : m_body(body)
   347         , m_ignoreCase(pattern.m_ignoreCase)
   348         , m_multiline(pattern.m_multiline)
   349         , m_allocator(allocator)
   350     {
   351         newlineCharacterClass = pattern.newlineCharacterClass();
   352         wordcharCharacterClass = pattern.wordcharCharacterClass();
   354         // Trick: 'Steal' the YarrPattern's ParenthesesInfo!
   355         // The input vector isn't used afterwards anymore,
   356         // that way we don't have to copy the input.
   357         JS_ASSERT(m_allParenthesesInfo.size() == 0);
   358         m_allParenthesesInfo.swap(allParenthesesInfo);
   360         // Trick: 'Steal' the YarrPattern's CharacterClasses!
   361         // The input vector isn't used afterwards anymore,
   362         // that way we don't have to copy the input.
   363         JS_ASSERT(m_userCharacterClasses.size() == 0);
   364         m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
   365     }
   367     ~BytecodePattern()
   368     {
   369         deleteAllValues(m_allParenthesesInfo);
   370         deleteAllValues(m_userCharacterClasses);
   371     }
   373     OwnPtr<ByteDisjunction> m_body;
   374     bool m_ignoreCase;
   375     bool m_multiline;
   376     // Each BytecodePattern is associated with a RegExp, each RegExp is associated
   377     // with a JSGlobalData.  Cache a pointer to out JSGlobalData's m_regExpAllocator.
   378     BumpPointerAllocator* m_allocator;
   380     CharacterClass* newlineCharacterClass;
   381     CharacterClass* wordcharCharacterClass;
   383 private:
   384     Vector<ByteDisjunction*> m_allParenthesesInfo;
   385     Vector<CharacterClass*> m_userCharacterClasses;
   386 };
   388 JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
   389 JS_EXPORT_PRIVATE unsigned interpret(JSContext *cx, BytecodePattern*, const String& input, unsigned start, unsigned* output);
   390 unsigned interpret(JSContext *cx, BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
   391 unsigned interpret(JSContext *cx, BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
   393 } } // namespace JSC::Yarr
   395 #endif /* yarr_YarrInterpreter_h */

mercurial