js/src/yarr/YarrPattern.h

Wed, 31 Dec 2014 07:53:36 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 07:53:36 +0100
branch
TOR_BUG_3246
changeset 5
4ab42b5ab56c
permissions
-rw-r--r--

Correct small whitespace inconsistency, lost while renaming variables.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  * vim: set ts=8 sts=4 et sw=4 tw=99:
     3  *
     4  * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
     5  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
     6  *
     7  * Redistribution and use in source and binary forms, with or without
     8  * modification, are permitted provided that the following conditions
     9  * are met:
    10  * 1. Redistributions of source code must retain the above copyright
    11  *    notice, this list of conditions and the following disclaimer.
    12  * 2. Redistributions in binary form must reproduce the above copyright
    13  *    notice, this list of conditions and the following disclaimer in the
    14  *    documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
    17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
    20  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    21  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    24  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #ifndef yarr_YarrPattern_h
    30 #define yarr_YarrPattern_h
    32 #include "yarr/wtfbridge.h"
    33 #include "yarr/ASCIICType.h"
    35 namespace JSC { namespace Yarr {
    37 struct PatternDisjunction;
    39 enum ErrorCode {
    40     NoError,
    41     PatternTooLarge,
    42     QuantifierOutOfOrder,
    43     QuantifierWithoutAtom,
    44     QuantifierTooLarge,
    45     MissingParentheses,
    46     ParenthesesUnmatched,
    47     ParenthesesTypeInvalid,
    48     CharacterClassUnmatched,
    49     CharacterClassOutOfOrder,
    50     CharacterClassInvalidRange,
    51     EscapeUnterminated,
    52     RuntimeError,
    53     NumberOfErrorCodes
    54 };
    56 static inline const char* errorMessage(ErrorCode code)
    57 {
    59 #define REGEXP_ERROR_PREFIX "Invalid regular expression: "
    60    // The order of this array must match the ErrorCode enum.
    61    static const char* errorMessages[NumberOfErrorCodes] = {
    62        0, // NoError
    63        REGEXP_ERROR_PREFIX "regular expression too large",
    64        REGEXP_ERROR_PREFIX "numbers out of order in {} quantifier",
    65        REGEXP_ERROR_PREFIX "nothing to repeat",
    66        REGEXP_ERROR_PREFIX "number too large in {} quantifier",
    67        REGEXP_ERROR_PREFIX "missing )",
    68        REGEXP_ERROR_PREFIX "unmatched parentheses",
    69        REGEXP_ERROR_PREFIX "unrecognized character after (?",
    70        REGEXP_ERROR_PREFIX "missing terminating ] for character class",
    71        REGEXP_ERROR_PREFIX "character class out of order"
    72        REGEXP_ERROR_PREFIX "range out of order in character class",
    73        REGEXP_ERROR_PREFIX "\\ at end of pattern"
    74    };
    75 #undef REGEXP_ERROR_PREFIX
    77    return errorMessages[code];
    78 }
    80 struct CharacterRange {
    81     UChar begin;
    82     UChar end;
    84     CharacterRange(UChar begin, UChar end)
    85         : begin(begin)
    86         , end(end)
    87     {
    88     }
    89 };
    91 struct CharacterClass {
    92     WTF_MAKE_FAST_ALLOCATED;
    93 public:
    94     // All CharacterClass instances have to have the full set of matches and ranges,
    95     // they may have an optional m_table for faster lookups (which must match the
    96     // specified matches and ranges)
    97     CharacterClass()
    98         : m_table(0)
    99     {
   100     }
   101     CharacterClass(const char* table, bool inverted)
   102         : m_table(table)
   103         , m_tableInverted(inverted)
   104     {
   105     }
   106     Vector<UChar> m_matches;
   107     Vector<CharacterRange> m_ranges;
   108     Vector<UChar> m_matchesUnicode;
   109     Vector<CharacterRange> m_rangesUnicode;
   111     const char* m_table;
   112     bool m_tableInverted;
   113 };
   115 enum QuantifierType {
   116     QuantifierFixedCount,
   117     QuantifierGreedy,
   118     QuantifierNonGreedy
   119 };
   121 struct PatternTerm {
   122     enum Type {
   123         TypeAssertionBOL,
   124         TypeAssertionEOL,
   125         TypeAssertionWordBoundary,
   126         TypePatternCharacter,
   127         TypeCharacterClass,
   128         TypeBackReference,
   129         TypeForwardReference,
   130         TypeParenthesesSubpattern,
   131         TypeParentheticalAssertion,
   132         TypeDotStarEnclosure
   133     } type;
   134     bool m_capture :1;
   135     bool m_invert :1;
   136     union {
   137         UChar patternCharacter;
   138         CharacterClass* characterClass;
   139         unsigned backReferenceSubpatternId;
   140         struct {
   141             PatternDisjunction* disjunction;
   142             unsigned subpatternId;
   143             unsigned lastSubpatternId;
   144             bool isCopy;
   145             bool isTerminal;
   146         } parentheses;
   147         struct {
   148             bool bolAnchor : 1;
   149             bool eolAnchor : 1;
   150         } anchors;
   151     };
   152     QuantifierType quantityType;
   153     Checked<unsigned> quantityCount;
   154     int inputPosition;
   155     unsigned frameLocation;
   157     PatternTerm(UChar ch)
   158         : type(PatternTerm::TypePatternCharacter)
   159         , m_capture(false)
   160         , m_invert(false)
   161     {
   162         patternCharacter = ch;
   163         quantityType = QuantifierFixedCount;
   164         quantityCount = 1;
   165     }
   167     PatternTerm(CharacterClass* charClass, bool invert)
   168         : type(PatternTerm::TypeCharacterClass)
   169         , m_capture(false)
   170         , m_invert(invert)
   171     {
   172         characterClass = charClass;
   173         quantityType = QuantifierFixedCount;
   174         quantityCount = 1;
   175     }
   177     PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
   178         : type(type)
   179         , m_capture(capture)
   180         , m_invert(invert)
   181     {
   182         parentheses.disjunction = disjunction;
   183         parentheses.subpatternId = subpatternId;
   184         parentheses.isCopy = false;
   185         parentheses.isTerminal = false;
   186         quantityType = QuantifierFixedCount;
   187         quantityCount = 1;
   188     }
   190     PatternTerm(Type type, bool invert = false)
   191         : type(type)
   192         , m_capture(false)
   193         , m_invert(invert)
   194     {
   195         quantityType = QuantifierFixedCount;
   196         quantityCount = 1;
   197     }
   199     PatternTerm(unsigned spatternId)
   200         : type(TypeBackReference)
   201         , m_capture(false)
   202         , m_invert(false)
   203     {
   204         backReferenceSubpatternId = spatternId;
   205         quantityType = QuantifierFixedCount;
   206         quantityCount = 1;
   207     }
   209     PatternTerm(bool bolAnchor, bool eolAnchor)
   210         : type(TypeDotStarEnclosure)
   211         , m_capture(false)
   212         , m_invert(false)
   213     {
   214         anchors.bolAnchor = bolAnchor;
   215         anchors.eolAnchor = eolAnchor;
   216         quantityType = QuantifierFixedCount;
   217         quantityCount = 1;
   218     }
   220     // No-argument constructor for js::Vector.
   221     PatternTerm()
   222         : type(PatternTerm::TypePatternCharacter)
   223         , m_capture(false)
   224         , m_invert(false)
   225     {
   226         patternCharacter = 0;
   227         quantityType = QuantifierFixedCount;
   228         quantityCount = 1;
   229     }
   231     static PatternTerm ForwardReference()
   232     {
   233         return PatternTerm(TypeForwardReference);
   234     }
   236     static PatternTerm BOL()
   237     {
   238         return PatternTerm(TypeAssertionBOL);
   239     }
   241     static PatternTerm EOL()
   242     {
   243         return PatternTerm(TypeAssertionEOL);
   244     }
   246     static PatternTerm WordBoundary(bool invert)
   247     {
   248         return PatternTerm(TypeAssertionWordBoundary, invert);
   249     }
   251     bool invert()
   252     {
   253         return m_invert;
   254     }
   256     bool capture()
   257     {
   258         return m_capture;
   259     }
   261     void quantify(unsigned count, QuantifierType type)
   262     {
   263         quantityCount = count;
   264         quantityType = type;
   265     }
   266 };
   268 struct PatternAlternative {
   269     WTF_MAKE_FAST_ALLOCATED;
   270 public:
   271     PatternAlternative(PatternDisjunction* disjunction)
   272         : m_parent(disjunction)
   273         , m_onceThrough(false)
   274         , m_hasFixedSize(false)
   275         , m_startsWithBOL(false)
   276         , m_containsBOL(false)
   277     {
   278     }
   280     PatternTerm& lastTerm()
   281     {
   282         ASSERT(m_terms.size());
   283         return m_terms[m_terms.size() - 1];
   284     }
   286     void removeLastTerm()
   287     {
   288         ASSERT(m_terms.size());
   289         m_terms.shrink(m_terms.size() - 1);
   290     }
   292     void setOnceThrough()
   293     {
   294         m_onceThrough = true;
   295     }
   297     bool onceThrough()
   298     {
   299         return m_onceThrough;
   300     }
   302     Vector<PatternTerm> m_terms;
   303     PatternDisjunction* m_parent;
   304     unsigned m_minimumSize;
   305     bool m_onceThrough : 1;
   306     bool m_hasFixedSize : 1;
   307     bool m_startsWithBOL : 1;
   308     bool m_containsBOL : 1;
   309 };
   311 struct PatternDisjunction {
   312     WTF_MAKE_FAST_ALLOCATED;
   313 public:
   314     PatternDisjunction(PatternAlternative* parent = 0)
   315         : m_parent(parent)
   316         , m_hasFixedSize(false)
   317     {
   318     }
   320     ~PatternDisjunction()
   321     {
   322         deleteAllValues(m_alternatives);
   323     }
   325     PatternAlternative* addNewAlternative()
   326     {
   327         PatternAlternative* alternative = newOrCrash<PatternAlternative>(this);
   328         m_alternatives.append(alternative);
   329         return alternative;
   330     }
   332     Vector<PatternAlternative*> m_alternatives;
   333     PatternAlternative* m_parent;
   334     unsigned m_minimumSize;
   335     unsigned m_callFrameSize;
   336     bool m_hasFixedSize;
   337 };
   339 // You probably don't want to be calling these functions directly
   340 // (please to be calling newlineCharacterClass() et al on your
   341 // friendly neighborhood YarrPattern instance to get nicely
   342 // cached copies).
   343 CharacterClass* newlineCreate();
   344 CharacterClass* digitsCreate();
   345 CharacterClass* spacesCreate();
   346 CharacterClass* wordcharCreate();
   347 CharacterClass* nondigitsCreate();
   348 CharacterClass* nonspacesCreate();
   349 CharacterClass* nonwordcharCreate();
   351 struct TermChain {
   352     TermChain(PatternTerm term)
   353         : term(term)
   354     {}
   356     PatternTerm term;
   357     Vector<TermChain> hotTerms;
   358 };
   360 struct YarrPattern {
   361     YarrPattern(const String& pattern, bool ignoreCase, bool multiline, ErrorCode* error);
   363     ~YarrPattern()
   364     {
   365         deleteAllValues(m_disjunctions);
   366         deleteAllValues(m_userCharacterClasses);
   367     }
   369     void reset()
   370     {
   371         m_numSubpatterns = 0;
   372         m_maxBackReference = 0;
   374         m_containsBackreferences = false;
   375         m_containsBOL = false;
   377         newlineCached = 0;
   378         digitsCached = 0;
   379         spacesCached = 0;
   380         wordcharCached = 0;
   381         nondigitsCached = 0;
   382         nonspacesCached = 0;
   383         nonwordcharCached = 0;
   385         deleteAllValues(m_disjunctions);
   386         m_disjunctions.clear();
   387         deleteAllValues(m_userCharacterClasses);
   388         m_userCharacterClasses.clear();
   389     }
   391     bool containsIllegalBackReference()
   392     {
   393         return m_maxBackReference > m_numSubpatterns;
   394     }
   396     CharacterClass* newlineCharacterClass()
   397     {
   398         if (!newlineCached)
   399             m_userCharacterClasses.append(newlineCached = newlineCreate());
   400         return newlineCached;
   401     }
   402     CharacterClass* digitsCharacterClass()
   403     {
   404         if (!digitsCached)
   405             m_userCharacterClasses.append(digitsCached = digitsCreate());
   406         return digitsCached;
   407     }
   408     CharacterClass* spacesCharacterClass()
   409     {
   410         if (!spacesCached)
   411             m_userCharacterClasses.append(spacesCached = spacesCreate());
   412         return spacesCached;
   413     }
   414     CharacterClass* wordcharCharacterClass()
   415     {
   416         if (!wordcharCached)
   417             m_userCharacterClasses.append(wordcharCached = wordcharCreate());
   418         return wordcharCached;
   419     }
   420     CharacterClass* nondigitsCharacterClass()
   421     {
   422         if (!nondigitsCached)
   423             m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
   424         return nondigitsCached;
   425     }
   426     CharacterClass* nonspacesCharacterClass()
   427     {
   428         if (!nonspacesCached)
   429             m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
   430         return nonspacesCached;
   431     }
   432     CharacterClass* nonwordcharCharacterClass()
   433     {
   434         if (!nonwordcharCached)
   435             m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
   436         return nonwordcharCached;
   437     }
   439     bool m_ignoreCase : 1;
   440     bool m_multiline : 1;
   441     bool m_containsBackreferences : 1;
   442     bool m_containsBOL : 1;
   443     unsigned m_numSubpatterns;
   444     unsigned m_maxBackReference;
   445     PatternDisjunction* m_body;
   446     Vector<PatternDisjunction*, 4> m_disjunctions;
   447     Vector<CharacterClass*> m_userCharacterClasses;
   449 private:
   450     ErrorCode compile(const String& patternString);
   452     CharacterClass* newlineCached;
   453     CharacterClass* digitsCached;
   454     CharacterClass* spacesCached;
   455     CharacterClass* wordcharCached;
   456     CharacterClass* nondigitsCached;
   457     CharacterClass* nonspacesCached;
   458     CharacterClass* nonwordcharCached;
   459 };
   461 } } // namespace JSC::Yarr
   463 #endif /* yarr_YarrPattern_h */

mercurial