js/src/yarr/YarrInterpreter.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  * vim: set ts=8 sts=4 et sw=4 tw=99:
     3  *
     4  * Copyright (C) 2009 Apple Inc. All rights reserved.
     5  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
     6  *
     7  * Redistribution and use in source and binary forms, with or without
     8  * modification, are permitted provided that the following conditions
     9  * are met:
    10  * 1. Redistributions of source code must retain the above copyright
    11  *    notice, this list of conditions and the following disclaimer.
    12  * 2. Redistributions in binary form must reproduce the above copyright
    13  *    notice, this list of conditions and the following disclaimer in the
    14  *    documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
    17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
    20  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    21  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    24  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "yarr/YarrInterpreter.h"
    31 #include "jscntxt.h"
    33 #include "yarr/Yarr.h"
    34 #include "yarr/YarrCanonicalizeUCS2.h"
    35 #include "yarr/BumpPointerAllocator.h"
    37 using namespace WTF;
    39 namespace JSC { namespace Yarr {
    41 template<typename CharType>
    42 class Interpreter {
    43 public:
    44     struct ParenthesesDisjunctionContext;
    46     struct BackTrackInfoPatternCharacter {
    47         uintptr_t matchAmount;
    48     };
    49     struct BackTrackInfoCharacterClass {
    50         uintptr_t matchAmount;
    51     };
    52     struct BackTrackInfoBackReference {
    53         uintptr_t begin; // Not really needed for greedy quantifiers.
    54         uintptr_t matchAmount; // Not really needed for fixed quantifiers.
    55     };
    56     struct BackTrackInfoAlternative {
    57         uintptr_t offset;
    58     };
    59     struct BackTrackInfoParentheticalAssertion {
    60         uintptr_t begin;
    61     };
    62     struct BackTrackInfoParenthesesOnce {
    63         uintptr_t begin;
    64     };
    65     struct BackTrackInfoParenthesesTerminal {
    66         uintptr_t begin;
    67     };
    68     struct BackTrackInfoParentheses {
    69         uintptr_t matchAmount;
    70         ParenthesesDisjunctionContext* lastContext;
    71     };
    73     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
    74     {
    75         context->next = backTrack->lastContext;
    76         backTrack->lastContext = context;
    77         ++backTrack->matchAmount;
    78     }
    80     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
    81     {
    82         ASSERT(backTrack->matchAmount);
    83         ASSERT(backTrack->lastContext);
    84         backTrack->lastContext = backTrack->lastContext->next;
    85         --backTrack->matchAmount;
    86     }
    88     struct DisjunctionContext
    89     {
    90         DisjunctionContext()
    91             : term(0)
    92         {
    93         }
    95         void* operator new(size_t, void* where)
    96         {
    97             return where;
    98         }
   100         int term;
   101         unsigned matchBegin;
   102         unsigned matchEnd;
   103         uintptr_t frame[1];
   104     };
   106     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
   107     {
   108         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
   109         allocatorPool = allocatorPool->ensureCapacity(size);
   110         if (!allocatorPool)
   111             CRASH();
   112         return new (allocatorPool->alloc(size)) DisjunctionContext();
   113     }
   115     void freeDisjunctionContext(DisjunctionContext* context)
   116     {
   117         allocatorPool = allocatorPool->dealloc(context);
   118     }
   120     struct ParenthesesDisjunctionContext
   121     {
   122         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
   123             : next(0)
   124         {
   125             unsigned firstSubpatternId = term.atom.subpatternId;
   126             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
   128             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
   129                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
   130                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
   131             }
   133             new (getDisjunctionContext(term)) DisjunctionContext();
   134         }
   136         void* operator new(size_t, void* where)
   137         {
   138             return where;
   139         }
   141         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
   142         {
   143             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
   144                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
   145         }
   147         DisjunctionContext* getDisjunctionContext(ByteTerm& term)
   148         {
   149             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
   150         }
   152         ParenthesesDisjunctionContext* next;
   153         unsigned subpatternBackup[1];
   154     };
   156     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
   157     {
   158         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
   159         size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext));
   160         allocatorPool = allocatorPool->ensureCapacity(size);
   161         if (!allocatorPool)
   162             CRASH();
   163         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
   164     }
   166     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
   167     {
   168         allocatorPool = allocatorPool->dealloc(context);
   169     }
   171     class InputStream {
   172     public:
   173         InputStream(const CharType* input, unsigned start, unsigned length)
   174             : input(input)
   175             , pos(start)
   176             , length(length)
   177         {
   178         }
   180         void next()
   181         {
   182             ++pos;
   183         }
   185         void rewind(unsigned amount)
   186         {
   187             ASSERT(pos >= amount);
   188             pos -= amount;
   189         }
   191         int read()
   192         {
   193             ASSERT(pos < length);
   194             if (pos < length)
   195                 return input[pos];
   196             return -1;
   197         }
   199         int readPair()
   200         {
   201             ASSERT(pos + 1 < length);
   202             return input[pos] | input[pos + 1] << 16;
   203         }
   205         int readChecked(unsigned negativePositionOffest)
   206         {
   207             if (pos < negativePositionOffest)
   208                 CRASH();
   209             unsigned p = pos - negativePositionOffest;
   210             ASSERT(p < length);
   211             return input[p];
   212         }
   214         int reread(unsigned from)
   215         {
   216             ASSERT(from < length);
   217             return input[from];
   218         }
   220         int prev()
   221         {
   222             ASSERT(!(pos > length));
   223             if (pos && length)
   224                 return input[pos - 1];
   225             return -1;
   226         }
   228         unsigned getPos()
   229         {
   230             return pos;
   231         }
   233         void setPos(unsigned p)
   234         {
   235             pos = p;
   236         }
   238         bool atStart()
   239         {
   240             return pos == 0;
   241         }
   243         bool atEnd()
   244         {
   245             return pos == length;
   246         }
   248         unsigned end()
   249         {
   250             return length;
   251         }
   253         bool checkInput(unsigned count)
   254         {
   255             if (((pos + count) <= length) && ((pos + count) >= pos)) {
   256                 pos += count;
   257                 return true;
   258             }
   259             return false;
   260         }
   262         void uncheckInput(unsigned count)
   263         {
   264             if (pos < count)
   265                 CRASH();
   266             pos -= count;
   267         }
   269         bool atStart(unsigned negativePositionOffest)
   270         {
   271             return pos == negativePositionOffest;
   272         }
   274         bool atEnd(unsigned negativePositionOffest)
   275         {
   276             if (pos < negativePositionOffest)
   277                 CRASH();
   278             return (pos - negativePositionOffest) == length;
   279         }
   281         bool isAvailableInput(unsigned offset)
   282         {
   283             return (((pos + offset) <= length) && ((pos + offset) >= pos));
   284         }
   286     private:
   287         const CharType* input;
   288         unsigned pos;
   289         unsigned length;
   290     };
   292     bool testCharacterClass(CharacterClass* characterClass, int ch)
   293     {
   294         if (ch & 0xFF80) {
   295             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
   296                 if (ch == characterClass->m_matchesUnicode[i])
   297                     return true;
   298             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
   299                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
   300                     return true;
   301         } else {
   302             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
   303                 if (ch == characterClass->m_matches[i])
   304                     return true;
   305             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
   306                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
   307                     return true;
   308         }
   310         return false;
   311     }
   313     bool checkCharacter(int testChar, unsigned negativeInputOffset)
   314     {
   315         return testChar == input.readChecked(negativeInputOffset);
   316     }
   318     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
   319     {
   320         int ch = input.readChecked(negativeInputOffset);
   321         return (loChar == ch) || (hiChar == ch);
   322     }
   324     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
   325     {
   326         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
   327         return invert ? !match : match;
   328     }
   330     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
   331     {
   332         unsigned matchSize = (unsigned)(matchEnd - matchBegin);
   334         if (!input.checkInput(matchSize))
   335             return false;
   337         if (pattern->m_ignoreCase) {
   338             for (unsigned i = 0; i < matchSize; ++i) {
   339                 int oldCh = input.reread(matchBegin + i);
   340                 int ch = input.readChecked(negativeInputOffset + matchSize - i);
   342                 if (oldCh == ch)
   343                     continue;
   345                 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
   346                 // unicode values are never allowed to match against ascii ones.
   347                 if (isASCII(oldCh) || isASCII(ch)) {
   348                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
   349                         continue;
   350                 } else if (areCanonicallyEquivalent(oldCh, ch))
   351                     continue;
   353                 input.uncheckInput(matchSize);
   354                 return false;
   355             }
   356         } else {
   357             for (unsigned i = 0; i < matchSize; ++i) {
   358                 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
   359                     input.uncheckInput(matchSize);
   360                     return false;
   361                 }
   362             }
   363         }
   365         return true;
   366     }
   368     bool matchAssertionBOL(ByteTerm& term)
   369     {
   370         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
   371     }
   373     bool matchAssertionEOL(ByteTerm& term)
   374     {
   375         if (term.inputPosition)
   376             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
   378         return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
   379     }
   381     bool matchAssertionWordBoundary(ByteTerm& term)
   382     {
   383         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
   384         bool readIsWordchar;
   385         if (term.inputPosition)
   386             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
   387         else
   388             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
   390         bool wordBoundary = prevIsWordchar != readIsWordchar;
   391         return term.invert() ? !wordBoundary : wordBoundary;
   392     }
   394     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
   395     {
   396         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
   398         switch (term.atom.quantityType) {
   399         case QuantifierFixedCount:
   400             break;
   402         case QuantifierGreedy:
   403             if (backTrack->matchAmount) {
   404                 --backTrack->matchAmount;
   405                 input.uncheckInput(1);
   406                 return true;
   407             }
   408             break;
   410         case QuantifierNonGreedy:
   411             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
   412                 ++backTrack->matchAmount;
   413                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
   414                     return true;
   415             }
   416             input.uncheckInput(backTrack->matchAmount);
   417             break;
   418         }
   420         return false;
   421     }
   423     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
   424     {
   425         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
   427         switch (term.atom.quantityType) {
   428         case QuantifierFixedCount:
   429             break;
   431         case QuantifierGreedy:
   432             if (backTrack->matchAmount) {
   433                 --backTrack->matchAmount;
   434                 input.uncheckInput(1);
   435                 return true;
   436             }
   437             break;
   439         case QuantifierNonGreedy:
   440             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
   441                 ++backTrack->matchAmount;
   442                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
   443                     return true;
   444             }
   445             input.uncheckInput(backTrack->matchAmount);
   446             break;
   447         }
   449         return false;
   450     }
   452     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
   453     {
   454         ASSERT(term.type == ByteTerm::TypeCharacterClass);
   455         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
   457         switch (term.atom.quantityType) {
   458         case QuantifierFixedCount: {
   459             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
   460                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
   461                     return false;
   462             }
   463             return true;
   464         }
   466         case QuantifierGreedy: {
   467             unsigned matchAmount = 0;
   468             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
   469                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
   470                     input.uncheckInput(1);
   471                     break;
   472                 }
   473                 ++matchAmount;
   474             }
   475             backTrack->matchAmount = matchAmount;
   477             return true;
   478         }
   480         case QuantifierNonGreedy:
   481             backTrack->matchAmount = 0;
   482             return true;
   483         }
   485         ASSERT_NOT_REACHED();
   486         return false;
   487     }
   489     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
   490     {
   491         ASSERT(term.type == ByteTerm::TypeCharacterClass);
   492         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
   494         switch (term.atom.quantityType) {
   495         case QuantifierFixedCount:
   496             break;
   498         case QuantifierGreedy:
   499             if (backTrack->matchAmount) {
   500                 --backTrack->matchAmount;
   501                 input.uncheckInput(1);
   502                 return true;
   503             }
   504             break;
   506         case QuantifierNonGreedy:
   507             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
   508                 ++backTrack->matchAmount;
   509                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
   510                     return true;
   511             }
   512             input.uncheckInput(backTrack->matchAmount);
   513             break;
   514         }
   516         return false;
   517     }
   519     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
   520     {
   521         ASSERT(term.type == ByteTerm::TypeBackReference);
   522         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
   524         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
   525         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
   527         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
   528         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
   529         // Eg.: /(a\1)/
   530         if (matchEnd == offsetNoMatch)
   531             return true;
   533         if (matchBegin == offsetNoMatch)
   534             return true;
   536         ASSERT(matchBegin <= matchEnd);
   538         if (matchBegin == matchEnd)
   539             return true;
   541         switch (term.atom.quantityType) {
   542         case QuantifierFixedCount: {
   543             backTrack->begin = input.getPos();
   544             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
   545                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
   546                     input.setPos(backTrack->begin);
   547                     return false;
   548                 }
   549             }
   550             return true;
   551         }
   553         case QuantifierGreedy: {
   554             unsigned matchAmount = 0;
   555             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
   556                 ++matchAmount;
   557             backTrack->matchAmount = matchAmount;
   558             return true;
   559         }
   561         case QuantifierNonGreedy:
   562             backTrack->begin = input.getPos();
   563             backTrack->matchAmount = 0;
   564             return true;
   565         }
   567         ASSERT_NOT_REACHED();
   568         return false;
   569     }
   571     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
   572     {
   573         ASSERT(term.type == ByteTerm::TypeBackReference);
   574         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
   576         unsigned matchBegin = output[(term.atom.subpatternId << 1)];
   577         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
   579         if (matchBegin == offsetNoMatch)
   580             return false;
   582         ASSERT(matchBegin <= matchEnd);
   584         if (matchBegin == matchEnd)
   585             return false;
   587         switch (term.atom.quantityType) {
   588         case QuantifierFixedCount:
   589             // for quantityCount == 1, could rewind.
   590             input.setPos(backTrack->begin);
   591             break;
   593         case QuantifierGreedy:
   594             if (backTrack->matchAmount) {
   595                 --backTrack->matchAmount;
   596                 input.rewind(matchEnd - matchBegin);
   597                 return true;
   598             }
   599             break;
   601         case QuantifierNonGreedy:
   602             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
   603                 ++backTrack->matchAmount;
   604                 return true;
   605             }
   606             input.setPos(backTrack->begin);
   607             break;
   608         }
   610         return false;
   611     }
   613     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
   614     {
   615         if (term.capture()) {
   616             unsigned subpatternId = term.atom.subpatternId;
   617             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
   618             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
   619         }
   620     }
   621     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
   622     {
   623         unsigned firstSubpatternId = term.atom.subpatternId;
   624         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
   625         context->restoreOutput(output, firstSubpatternId, count);
   626     }
   627     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
   628     {
   629         while (backTrack->matchAmount) {
   630             ParenthesesDisjunctionContext* context = backTrack->lastContext;
   632             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
   633             if (result == JSRegExpMatch)
   634                 return JSRegExpMatch;
   636             resetMatches(term, context);
   637             popParenthesesDisjunctionContext(backTrack);
   638             freeParenthesesDisjunctionContext(context);
   640             if (result != JSRegExpNoMatch)
   641                 return result;
   642         }
   644         return JSRegExpNoMatch;
   645     }
   647     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
   648     {
   649         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
   650         ASSERT(term.atom.quantityCount == 1);
   652         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
   654         switch (term.atom.quantityType) {
   655         case QuantifierGreedy: {
   656             // set this speculatively; if we get to the parens end this will be true.
   657             backTrack->begin = input.getPos();
   658             break;
   659         }
   660         case QuantifierNonGreedy: {
   661             backTrack->begin = notFound;
   662             context->term += term.atom.parenthesesWidth;
   663             return true;
   664         }
   665         case QuantifierFixedCount:
   666             break;
   667         }
   669         if (term.capture()) {
   670             unsigned subpatternId = term.atom.subpatternId;
   671             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
   672         }
   674         return true;
   675     }
   677     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
   678     {
   679         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
   680         ASSERT(term.atom.quantityCount == 1);
   682         if (term.capture()) {
   683             unsigned subpatternId = term.atom.subpatternId;
   684             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
   685         }
   687         if (term.atom.quantityType == QuantifierFixedCount)
   688             return true;
   690         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
   691         return backTrack->begin != input.getPos();
   692     }
   694     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
   695     {
   696         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
   697         ASSERT(term.atom.quantityCount == 1);
   699         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
   701         if (term.capture()) {
   702             unsigned subpatternId = term.atom.subpatternId;
   703             output[(subpatternId << 1)] = offsetNoMatch;
   704             output[(subpatternId << 1) + 1] = offsetNoMatch;
   705         }
   707         switch (term.atom.quantityType) {
   708         case QuantifierGreedy:
   709             // if we backtrack to this point, there is another chance - try matching nothing.
   710             ASSERT(backTrack->begin != notFound);
   711             backTrack->begin = notFound;
   712             context->term += term.atom.parenthesesWidth;
   713             return true;
   714         case QuantifierNonGreedy:
   715             ASSERT(backTrack->begin != notFound);
   716         case QuantifierFixedCount:
   717             break;
   718         }
   720         return false;
   721     }
   723     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
   724     {
   725         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
   726         ASSERT(term.atom.quantityCount == 1);
   728         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
   730         switch (term.atom.quantityType) {
   731         case QuantifierGreedy:
   732             if (backTrack->begin == notFound) {
   733                 context->term -= term.atom.parenthesesWidth;
   734                 return false;
   735             }
   736         case QuantifierNonGreedy:
   737             if (backTrack->begin == notFound) {
   738                 backTrack->begin = input.getPos();
   739                 if (term.capture()) {
   740                     // Technically this access to inputPosition should be accessing the begin term's
   741                     // inputPosition, but for repeats other than fixed these values should be
   742                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
   743                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
   745 		    // Disabled, see bug 808478
   746 #if 0
   747                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
   748 #endif
   749                     unsigned subpatternId = term.atom.subpatternId;
   750                     output[subpatternId << 1] = input.getPos() + term.inputPosition;
   751                 }
   752                 context->term -= term.atom.parenthesesWidth;
   753                 return true;
   754             }
   755         case QuantifierFixedCount:
   756             break;
   757         }
   759         return false;
   760     }
   762     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
   763     {
   764         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
   765         ASSERT(term.atom.quantityType == QuantifierGreedy);
   766         ASSERT(term.atom.quantityCount == quantifyInfinite);
   767         ASSERT(!term.capture());
   769         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
   770         backTrack->begin = input.getPos();
   771         return true;
   772     }
   774     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
   775     {
   776         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
   778         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
   779         // Empty match is a failed match.
   780         if (backTrack->begin == input.getPos())
   781             return false;
   783         // Successful match! Okay, what's next? - loop around and try to match moar!
   784         context->term -= (term.atom.parenthesesWidth + 1);
   785         return true;
   786     }
   788     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
   789     {
   790         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
   791         ASSERT(term.atom.quantityType == QuantifierGreedy);
   792         ASSERT(term.atom.quantityCount == quantifyInfinite);
   793         ASSERT(!term.capture());
   795         // If we backtrack to this point, we have failed to match this iteration of the parens.
   796         // Since this is greedy / zero minimum a failed is also accepted as a match!
   797         context->term += term.atom.parenthesesWidth;
   798         return true;
   799     }
   801     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
   802     {
   803         // 'Terminal' parentheses are at the end of the regex, and as such a match past end
   804         // should always be returned as a successful match - we should never backtrack to here.
   805         ASSERT_NOT_REACHED();
   806         return false;
   807     }
   809     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
   810     {
   811         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
   812         ASSERT(term.atom.quantityCount == 1);
   814         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
   816         backTrack->begin = input.getPos();
   817         return true;
   818     }
   820     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
   821     {
   822         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
   823         ASSERT(term.atom.quantityCount == 1);
   825         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
   827         input.setPos(backTrack->begin);
   829         // We've reached the end of the parens; if they are inverted, this is failure.
   830         if (term.invert()) {
   831             context->term -= term.atom.parenthesesWidth;
   832             return false;
   833         }
   835         return true;
   836     }
   838     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
   839     {
   840         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
   841         ASSERT(term.atom.quantityCount == 1);
   843         // We've failed to match parens; if they are inverted, this is win!
   844         if (term.invert()) {
   845             context->term += term.atom.parenthesesWidth;
   846             return true;
   847         }
   849         return false;
   850     }
   852     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
   853     {
   854         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
   855         ASSERT(term.atom.quantityCount == 1);
   857         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
   859         input.setPos(backTrack->begin);
   861         context->term -= term.atom.parenthesesWidth;
   862         return false;
   863     }
   865     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
   866     {
   867         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
   869         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
   870         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
   872         backTrack->matchAmount = 0;
   873         backTrack->lastContext = 0;
   875         switch (term.atom.quantityType) {
   876         case QuantifierFixedCount: {
   877             // While we haven't yet reached our fixed limit,
   878             while (backTrack->matchAmount < term.atom.quantityCount) {
   879                 // Try to do a match, and it it succeeds, add it to the list.
   880                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
   881                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
   882                 if (result == JSRegExpMatch)
   883                     appendParenthesesDisjunctionContext(backTrack, context);
   884                 else {
   885                     // The match failed; try to find an alternate point to carry on from.
   886                     resetMatches(term, context);
   887                     freeParenthesesDisjunctionContext(context);
   889                     if (result != JSRegExpNoMatch)
   890                         return result;
   891                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
   892                     if (backtrackResult != JSRegExpMatch)
   893                         return backtrackResult;
   894                 }
   895             }
   897             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
   898             ParenthesesDisjunctionContext* context = backTrack->lastContext;
   899             recordParenthesesMatch(term, context);
   900             return JSRegExpMatch;
   901         }
   903         case QuantifierGreedy: {
   904             while (backTrack->matchAmount < term.atom.quantityCount) {
   905                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
   906                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
   907                 if (result == JSRegExpMatch)
   908                     appendParenthesesDisjunctionContext(backTrack, context);
   909                 else {
   910                     resetMatches(term, context);
   911                     freeParenthesesDisjunctionContext(context);
   913                     if (result != JSRegExpNoMatch)
   914                         return result;
   916                     break;
   917                 }
   918             }
   920             if (backTrack->matchAmount) {
   921                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
   922                 recordParenthesesMatch(term, context);
   923             }
   924             return JSRegExpMatch;
   925         }
   927         case QuantifierNonGreedy:
   928             return JSRegExpMatch;
   929         }
   931         ASSERT_NOT_REACHED();
   932         return JSRegExpErrorNoMatch;
   933     }
   935     // Rules for backtracking differ depending on whether this is greedy or non-greedy.
   936     //
   937     // Greedy matches never should try just adding more - you should already have done
   938     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
   939     // you backtrack an item off the list needs checking, since we'll never have matched
   940     // the one less case.  Tracking forwards, still add as much as possible.
   941     //
   942     // Non-greedy, we've already done the one less case, so don't match on popping.
   943     // We haven't done the one more case, so always try to add that.
   944     //
   945     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
   946     {
   947         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
   949         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
   950         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
   952         switch (term.atom.quantityType) {
   953         case QuantifierFixedCount: {
   954             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
   956             ParenthesesDisjunctionContext* context = 0;
   957             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
   959             if (result != JSRegExpMatch)
   960                 return result;
   962             // While we haven't yet reached our fixed limit,
   963             while (backTrack->matchAmount < term.atom.quantityCount) {
   964                 // Try to do a match, and it it succeeds, add it to the list.
   965                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
   966                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
   968                 if (result == JSRegExpMatch)
   969                     appendParenthesesDisjunctionContext(backTrack, context);
   970                 else {
   971                     // The match failed; try to find an alternate point to carry on from.
   972                     resetMatches(term, context);
   973                     freeParenthesesDisjunctionContext(context);
   975                     if (result != JSRegExpNoMatch)
   976                         return result;
   977                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
   978                     if (backtrackResult != JSRegExpMatch)
   979                         return backtrackResult;
   980                 }
   981             }
   983             ASSERT(backTrack->matchAmount == term.atom.quantityCount);
   984             context = backTrack->lastContext;
   985             recordParenthesesMatch(term, context);
   986             return JSRegExpMatch;
   987         }
   989         case QuantifierGreedy: {
   990             if (!backTrack->matchAmount)
   991                 return JSRegExpNoMatch;
   993             ParenthesesDisjunctionContext* context = backTrack->lastContext;
   994             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
   995             if (result == JSRegExpMatch) {
   996                 while (backTrack->matchAmount < term.atom.quantityCount) {
   997                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
   998                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
   999                     if (parenthesesResult == JSRegExpMatch)
  1000                         appendParenthesesDisjunctionContext(backTrack, context);
  1001                     else {
  1002                         resetMatches(term, context);
  1003                         freeParenthesesDisjunctionContext(context);
  1005                         if (parenthesesResult != JSRegExpNoMatch)
  1006                             return parenthesesResult;
  1008                         break;
  1011             } else {
  1012                 // Avoid a topcrash before it occurs.
  1013                 if (!backTrack->lastContext) {
  1014                     ASSERT(!"Tripped Bug 856796!");
  1015                     return JSRegExpErrorInternal;
  1018                 resetMatches(term, context);
  1019                 popParenthesesDisjunctionContext(backTrack);
  1020                 freeParenthesesDisjunctionContext(context);
  1022                 if (result != JSRegExpNoMatch)
  1023                     return result;
  1026             if (backTrack->matchAmount) {
  1027                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
  1028                 recordParenthesesMatch(term, context);
  1030             return JSRegExpMatch;
  1033         case QuantifierNonGreedy: {
  1034             // If we've not reached the limit, try to add one more match.
  1035             if (backTrack->matchAmount < term.atom.quantityCount) {
  1036                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
  1037                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
  1038                 if (result == JSRegExpMatch) {
  1039                     appendParenthesesDisjunctionContext(backTrack, context);
  1040                     recordParenthesesMatch(term, context);
  1041                     return JSRegExpMatch;
  1044                 resetMatches(term, context);
  1045                 freeParenthesesDisjunctionContext(context);
  1047                 if (result != JSRegExpNoMatch)
  1048                     return result;
  1051             // Nope - okay backtrack looking for an alternative.
  1052             while (backTrack->matchAmount) {
  1053                 ParenthesesDisjunctionContext* context = backTrack->lastContext;
  1054                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
  1055                 if (result == JSRegExpMatch) {
  1056                     // successful backtrack! we're back in the game!
  1057                     if (backTrack->matchAmount) {
  1058                         context = backTrack->lastContext;
  1059                         recordParenthesesMatch(term, context);
  1061                     return JSRegExpMatch;
  1064                 // Avoid a topcrash before it occurs.
  1065                 if (!backTrack->lastContext) {
  1066                     ASSERT(!"Tripped Bug 856796!");
  1067                     return JSRegExpErrorInternal;
  1070                 // pop a match off the stack
  1071                 resetMatches(term, context);
  1072                 popParenthesesDisjunctionContext(backTrack);
  1073                 freeParenthesesDisjunctionContext(context);
  1075                 if (result != JSRegExpNoMatch)
  1076                     return result;
  1079             return JSRegExpNoMatch;
  1083         ASSERT_NOT_REACHED();
  1084         return JSRegExpErrorNoMatch;
  1087     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
  1089         UNUSED_PARAM(term);
  1090         unsigned matchBegin = context->matchBegin;
  1092         if (matchBegin) {
  1093             for (matchBegin--; true; matchBegin--) {
  1094                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
  1095                     ++matchBegin;
  1096                     break;
  1099                 if (!matchBegin)
  1100                     break;
  1104         unsigned matchEnd = input.getPos();
  1106         for (; (matchEnd != input.end())
  1107              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
  1109         if (((matchBegin && term.anchors.m_bol)
  1110              || ((matchEnd != input.end()) && term.anchors.m_eol))
  1111             && !pattern->m_multiline)
  1112             return false;
  1114         context->matchBegin = matchBegin;
  1115         context->matchEnd = matchEnd;
  1116         return true;
  1119 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
  1120 #define BACKTRACK() { --context->term; goto backtrack; }
  1121 #define currentTerm() (disjunction->terms[context->term])
  1122     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
  1124         if (!--remainingMatchCount)
  1125             return JSRegExpErrorHitLimit;
  1127         if (btrack)
  1128             BACKTRACK();
  1130         context->matchBegin = input.getPos();
  1131         context->term = 0;
  1133     matchAgain:
  1134         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
  1136         // Prevent jank resulting from getting stuck in Yarr for a long time.
  1137         if (!CheckForInterrupt(this->cx))
  1138             return JSRegExpErrorInternal;
  1140         switch (currentTerm().type) {
  1141         case ByteTerm::TypeSubpatternBegin:
  1142             MATCH_NEXT();
  1143         case ByteTerm::TypeSubpatternEnd:
  1144             context->matchEnd = input.getPos();
  1145             return JSRegExpMatch;
  1147         case ByteTerm::TypeBodyAlternativeBegin:
  1148             MATCH_NEXT();
  1149         case ByteTerm::TypeBodyAlternativeDisjunction:
  1150         case ByteTerm::TypeBodyAlternativeEnd:
  1151             context->matchEnd = input.getPos();
  1152             return JSRegExpMatch;
  1154         case ByteTerm::TypeAlternativeBegin:
  1155             MATCH_NEXT();
  1156         case ByteTerm::TypeAlternativeDisjunction:
  1157         case ByteTerm::TypeAlternativeEnd: {
  1158             int offset = currentTerm().alternative.end;
  1159             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
  1160             backTrack->offset = offset;
  1161             context->term += offset;
  1162             MATCH_NEXT();
  1165         case ByteTerm::TypeAssertionBOL:
  1166             if (matchAssertionBOL(currentTerm()))
  1167                 MATCH_NEXT();
  1168             BACKTRACK();
  1169         case ByteTerm::TypeAssertionEOL:
  1170             if (matchAssertionEOL(currentTerm()))
  1171                 MATCH_NEXT();
  1172             BACKTRACK();
  1173         case ByteTerm::TypeAssertionWordBoundary:
  1174             if (matchAssertionWordBoundary(currentTerm()))
  1175                 MATCH_NEXT();
  1176             BACKTRACK();
  1178         case ByteTerm::TypePatternCharacterOnce:
  1179         case ByteTerm::TypePatternCharacterFixed: {
  1180             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
  1181                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
  1182                     BACKTRACK();
  1184             MATCH_NEXT();
  1186         case ByteTerm::TypePatternCharacterGreedy: {
  1187             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
  1188             unsigned matchAmount = 0;
  1189             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
  1190                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
  1191                     input.uncheckInput(1);
  1192                     break;
  1194                 ++matchAmount;
  1196             backTrack->matchAmount = matchAmount;
  1198             MATCH_NEXT();
  1200         case ByteTerm::TypePatternCharacterNonGreedy: {
  1201             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
  1202             backTrack->matchAmount = 0;
  1203             MATCH_NEXT();
  1206         case ByteTerm::TypePatternCasedCharacterOnce:
  1207         case ByteTerm::TypePatternCasedCharacterFixed: {
  1208             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
  1209                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
  1210                     BACKTRACK();
  1212             MATCH_NEXT();
  1214         case ByteTerm::TypePatternCasedCharacterGreedy: {
  1215             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
  1216             unsigned matchAmount = 0;
  1217             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
  1218                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
  1219                     input.uncheckInput(1);
  1220                     break;
  1222                 ++matchAmount;
  1224             backTrack->matchAmount = matchAmount;
  1226             MATCH_NEXT();
  1228         case ByteTerm::TypePatternCasedCharacterNonGreedy: {
  1229             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
  1230             backTrack->matchAmount = 0;
  1231             MATCH_NEXT();
  1234         case ByteTerm::TypeCharacterClass:
  1235             if (matchCharacterClass(currentTerm(), context))
  1236                 MATCH_NEXT();
  1237             BACKTRACK();
  1238         case ByteTerm::TypeBackReference:
  1239             if (matchBackReference(currentTerm(), context))
  1240                 MATCH_NEXT();
  1241             BACKTRACK();
  1242         case ByteTerm::TypeParenthesesSubpattern: {
  1243             JSRegExpResult result = matchParentheses(currentTerm(), context);
  1245             if (result == JSRegExpMatch) {
  1246                 MATCH_NEXT();
  1247             }  else if (result != JSRegExpNoMatch)
  1248                 return result;
  1250             BACKTRACK();
  1252         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
  1253             if (matchParenthesesOnceBegin(currentTerm(), context))
  1254                 MATCH_NEXT();
  1255             BACKTRACK();
  1256         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
  1257             if (matchParenthesesOnceEnd(currentTerm(), context))
  1258                 MATCH_NEXT();
  1259             BACKTRACK();
  1260         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
  1261             if (matchParenthesesTerminalBegin(currentTerm(), context))
  1262                 MATCH_NEXT();
  1263             BACKTRACK();
  1264         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
  1265             if (matchParenthesesTerminalEnd(currentTerm(), context))
  1266                 MATCH_NEXT();
  1267             BACKTRACK();
  1268         case ByteTerm::TypeParentheticalAssertionBegin:
  1269             if (matchParentheticalAssertionBegin(currentTerm(), context))
  1270                 MATCH_NEXT();
  1271             BACKTRACK();
  1272         case ByteTerm::TypeParentheticalAssertionEnd:
  1273             if (matchParentheticalAssertionEnd(currentTerm(), context))
  1274                 MATCH_NEXT();
  1275             BACKTRACK();
  1277         case ByteTerm::TypeCheckInput:
  1278             if (input.checkInput(currentTerm().checkInputCount))
  1279                 MATCH_NEXT();
  1280             BACKTRACK();
  1282         case ByteTerm::TypeUncheckInput:
  1283             input.uncheckInput(currentTerm().checkInputCount);
  1284             MATCH_NEXT();
  1286         case ByteTerm::TypeDotStarEnclosure:
  1287             if (matchDotStarEnclosure(currentTerm(), context))
  1288                 return JSRegExpMatch;
  1289             BACKTRACK();
  1292         // We should never fall-through to here.
  1293         ASSERT_NOT_REACHED();
  1295     backtrack:
  1296         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
  1298         // Prevent jank resulting from getting stuck in Yarr for a long time.
  1299         if (!CheckForInterrupt(this->cx))
  1300             return JSRegExpErrorInternal;
  1302         switch (currentTerm().type) {
  1303         case ByteTerm::TypeSubpatternBegin:
  1304             return JSRegExpNoMatch;
  1305         case ByteTerm::TypeSubpatternEnd:
  1306             ASSERT_NOT_REACHED();
  1308         case ByteTerm::TypeBodyAlternativeBegin:
  1309         case ByteTerm::TypeBodyAlternativeDisjunction: {
  1310             int offset = currentTerm().alternative.next;
  1311             context->term += offset;
  1312             if (offset > 0)
  1313                 MATCH_NEXT();
  1315             if (input.atEnd())
  1316                 return JSRegExpNoMatch;
  1318             input.next();
  1320             context->matchBegin = input.getPos();
  1322             if (currentTerm().alternative.onceThrough)
  1323                 context->term += currentTerm().alternative.next;
  1325             MATCH_NEXT();
  1327         case ByteTerm::TypeBodyAlternativeEnd:
  1328             ASSERT_NOT_REACHED();
  1330         case ByteTerm::TypeAlternativeBegin:
  1331         case ByteTerm::TypeAlternativeDisjunction: {
  1332             int offset = currentTerm().alternative.next;
  1333             context->term += offset;
  1334             if (offset > 0)
  1335                 MATCH_NEXT();
  1336             BACKTRACK();
  1338         case ByteTerm::TypeAlternativeEnd: {
  1339             // We should never backtrack back into an alternative of the main body of the regex.
  1340             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
  1341             unsigned offset = backTrack->offset;
  1342             context->term -= offset;
  1343             BACKTRACK();
  1346         case ByteTerm::TypeAssertionBOL:
  1347         case ByteTerm::TypeAssertionEOL:
  1348         case ByteTerm::TypeAssertionWordBoundary:
  1349             BACKTRACK();
  1351         case ByteTerm::TypePatternCharacterOnce:
  1352         case ByteTerm::TypePatternCharacterFixed:
  1353         case ByteTerm::TypePatternCharacterGreedy:
  1354         case ByteTerm::TypePatternCharacterNonGreedy:
  1355             if (backtrackPatternCharacter(currentTerm(), context))
  1356                 MATCH_NEXT();
  1357             BACKTRACK();
  1358         case ByteTerm::TypePatternCasedCharacterOnce:
  1359         case ByteTerm::TypePatternCasedCharacterFixed:
  1360         case ByteTerm::TypePatternCasedCharacterGreedy:
  1361         case ByteTerm::TypePatternCasedCharacterNonGreedy:
  1362             if (backtrackPatternCasedCharacter(currentTerm(), context))
  1363                 MATCH_NEXT();
  1364             BACKTRACK();
  1365         case ByteTerm::TypeCharacterClass:
  1366             if (backtrackCharacterClass(currentTerm(), context))
  1367                 MATCH_NEXT();
  1368             BACKTRACK();
  1369         case ByteTerm::TypeBackReference:
  1370             if (backtrackBackReference(currentTerm(), context))
  1371                 MATCH_NEXT();
  1372             BACKTRACK();
  1373         case ByteTerm::TypeParenthesesSubpattern: {
  1374             JSRegExpResult result = backtrackParentheses(currentTerm(), context);
  1376             if (result == JSRegExpMatch) {
  1377                 MATCH_NEXT();
  1378             } else if (result != JSRegExpNoMatch)
  1379                 return result;
  1381             BACKTRACK();
  1383         case ByteTerm::TypeParenthesesSubpatternOnceBegin:
  1384             if (backtrackParenthesesOnceBegin(currentTerm(), context))
  1385                 MATCH_NEXT();
  1386             BACKTRACK();
  1387         case ByteTerm::TypeParenthesesSubpatternOnceEnd:
  1388             if (backtrackParenthesesOnceEnd(currentTerm(), context))
  1389                 MATCH_NEXT();
  1390             BACKTRACK();
  1391         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
  1392             if (backtrackParenthesesTerminalBegin(currentTerm(), context))
  1393                 MATCH_NEXT();
  1394             BACKTRACK();
  1395         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
  1396             if (backtrackParenthesesTerminalEnd(currentTerm(), context))
  1397                 MATCH_NEXT();
  1398             BACKTRACK();
  1399         case ByteTerm::TypeParentheticalAssertionBegin:
  1400             if (backtrackParentheticalAssertionBegin(currentTerm(), context))
  1401                 MATCH_NEXT();
  1402             BACKTRACK();
  1403         case ByteTerm::TypeParentheticalAssertionEnd:
  1404             if (backtrackParentheticalAssertionEnd(currentTerm(), context))
  1405                 MATCH_NEXT();
  1406             BACKTRACK();
  1408         case ByteTerm::TypeCheckInput:
  1409             input.uncheckInput(currentTerm().checkInputCount);
  1410             BACKTRACK();
  1412         case ByteTerm::TypeUncheckInput:
  1413             input.checkInput(currentTerm().checkInputCount);
  1414             BACKTRACK();
  1416         case ByteTerm::TypeDotStarEnclosure:
  1417             ASSERT_NOT_REACHED();
  1420         ASSERT_NOT_REACHED();
  1421         return JSRegExpErrorNoMatch;
  1424     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
  1426         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
  1428         if (result == JSRegExpMatch) {
  1429             while (context->matchBegin == context->matchEnd) {
  1430                 result = matchDisjunction(disjunction, context, true);
  1431                 if (result != JSRegExpMatch)
  1432                     return result;
  1434             return JSRegExpMatch;
  1437         return result;
  1440     unsigned interpret()
  1442         if (!input.isAvailableInput(0))
  1443             return offsetNoMatch;
  1445         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
  1446             output[i << 1] = offsetNoMatch;
  1448         allocatorPool = pattern->m_allocator->startAllocator();
  1449         if (!allocatorPool)
  1450             CRASH();
  1452         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
  1454         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
  1455         if (result == JSRegExpMatch) {
  1456             output[0] = context->matchBegin;
  1457             output[1] = context->matchEnd;
  1460         freeDisjunctionContext(context);
  1462         pattern->m_allocator->stopAllocator();
  1464         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
  1465         return output[0];
  1468     Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
  1469         : cx(cx)
  1470         , pattern(pattern)
  1471         , output(output)
  1472         , input(input, start, length)
  1473         , allocatorPool(0)
  1474         , remainingMatchCount(matchLimit)
  1478 private:
  1479     JSContext *cx;
  1480     BytecodePattern* pattern;
  1481     unsigned* output;
  1482     InputStream input;
  1483     BumpPointerPool* allocatorPool;
  1484     unsigned remainingMatchCount;
  1485 };
  1489 class ByteCompiler {
  1490     struct ParenthesesStackEntry {
  1491         unsigned beginTerm;
  1492         unsigned savedAlternativeIndex;
  1494         // For js::Vector. Does not create a valid object.
  1495         ParenthesesStackEntry() { }
  1497         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
  1498             : beginTerm(beginTerm)
  1499             , savedAlternativeIndex(savedAlternativeIndex)
  1502     };
  1504 public:
  1505     ByteCompiler(YarrPattern& pattern)
  1506         : m_pattern(pattern)
  1508         m_currentAlternativeIndex = 0;
  1511     PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
  1513         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
  1514         emitDisjunction(m_pattern.m_body);
  1515         regexEnd();
  1517         return adoptPtr(newOrCrash<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator));
  1520     void checkInput(unsigned count)
  1522         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
  1525     void uncheckInput(unsigned count)
  1527         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
  1530     void assertionBOL(unsigned inputPosition)
  1532         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
  1535     void assertionEOL(unsigned inputPosition)
  1537         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
  1540     void assertionWordBoundary(bool invert, unsigned inputPosition)
  1542         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
  1545     void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1547         if (m_pattern.m_ignoreCase) {
  1548             UChar lo = Unicode::toLower(ch);
  1549             UChar hi = Unicode::toUpper(ch);
  1551             if (lo != hi) {
  1552                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
  1553                 return;
  1557         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
  1560     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1562         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
  1564         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
  1565         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
  1566         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1569     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1571         ASSERT(subpatternId);
  1573         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
  1575         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
  1576         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
  1577         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1580     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
  1582         int beginTerm = m_bodyDisjunction->terms.size();
  1584         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
  1585         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1586         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
  1587         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
  1589         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
  1590         m_currentAlternativeIndex = beginTerm + 1;
  1593     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
  1595         int beginTerm = m_bodyDisjunction->terms.size();
  1597         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
  1598         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1599         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
  1600         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
  1602         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
  1603         m_currentAlternativeIndex = beginTerm + 1;
  1606     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
  1608         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
  1609         // then fix this up at the end! - simplifying this should make it much clearer.
  1610         // https://bugs.webkit.org/show_bug.cgi?id=50136
  1612         int beginTerm = m_bodyDisjunction->terms.size();
  1614         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
  1615         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1616         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
  1617         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
  1619         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
  1620         m_currentAlternativeIndex = beginTerm + 1;
  1623     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
  1625         int beginTerm = m_bodyDisjunction->terms.size();
  1627         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
  1628         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1629         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
  1630         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
  1632         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
  1633         m_currentAlternativeIndex = beginTerm + 1;
  1636     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1638         unsigned beginTerm = popParenthesesStack();
  1639         closeAlternative(beginTerm + 1);
  1640         unsigned endTerm = m_bodyDisjunction->terms.size();
  1642         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
  1644         bool invert = m_bodyDisjunction->terms[beginTerm].invert();
  1645         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
  1647         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
  1648         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1649         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1650         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
  1652         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
  1653         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
  1654         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
  1655         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
  1658     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
  1660         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
  1663     unsigned popParenthesesStack()
  1665         ASSERT(m_parenthesesStack.size());
  1666         int stackEnd = m_parenthesesStack.size() - 1;
  1667         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
  1668         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
  1669         m_parenthesesStack.shrink(stackEnd);
  1671         ASSERT(beginTerm < m_bodyDisjunction->terms.size());
  1672         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
  1674         return beginTerm;
  1677 #ifndef NDEBUG
  1678     void dumpDisjunction(ByteDisjunction* disjunction)
  1680         dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction);
  1681         for (unsigned i = 0; i < disjunction->terms.size(); ++i)
  1682             dataLogF("{ %d } ", disjunction->terms[i].type);
  1683         dataLogF("\n");
  1685 #endif
  1687     void closeAlternative(int beginTerm)
  1689         int origBeginTerm = beginTerm;
  1690         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
  1691         int endIndex = m_bodyDisjunction->terms.size();
  1693         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
  1695         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
  1696             m_bodyDisjunction->terms.remove(beginTerm);
  1697         else {
  1698             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
  1699                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
  1700                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
  1701                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
  1702                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
  1705             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
  1707             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
  1708             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
  1712     void closeBodyAlternative()
  1714         int beginTerm = 0;
  1715         int origBeginTerm = 0;
  1716         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
  1717         int endIndex = m_bodyDisjunction->terms.size();
  1719         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
  1721         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
  1722             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
  1723             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
  1724             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
  1725             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
  1728         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
  1730         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
  1731         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
  1734     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
  1736         unsigned beginTerm = popParenthesesStack();
  1737         closeAlternative(beginTerm + 1);
  1738         unsigned endTerm = m_bodyDisjunction->terms.size();
  1740         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
  1742         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
  1744         bool capture = parenthesesBegin.capture();
  1745         unsigned subpatternId = parenthesesBegin.atom.subpatternId;
  1747         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
  1748         ByteDisjunction* parenthesesDisjunction = newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize);
  1750         parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1);
  1751         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
  1752         for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
  1753             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
  1754         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
  1756         m_bodyDisjunction->terms.shrink(beginTerm);
  1758         m_allParenthesesInfo.append(parenthesesDisjunction);
  1759         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
  1761         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
  1762         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
  1763         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
  1766     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1768         unsigned beginTerm = popParenthesesStack();
  1769         closeAlternative(beginTerm + 1);
  1770         unsigned endTerm = m_bodyDisjunction->terms.size();
  1772         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
  1774         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
  1775         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
  1777         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
  1778         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1779         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1780         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
  1782         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
  1783         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
  1784         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
  1785         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
  1788     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1790         unsigned beginTerm = popParenthesesStack();
  1791         closeAlternative(beginTerm + 1);
  1792         unsigned endTerm = m_bodyDisjunction->terms.size();
  1794         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
  1796         bool capture = m_bodyDisjunction->terms[beginTerm].capture();
  1797         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
  1799         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
  1800         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1801         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1802         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
  1804         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
  1805         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
  1806         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
  1807         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
  1810     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
  1812         m_bodyDisjunction = adoptPtr(newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize));
  1813         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
  1814         m_bodyDisjunction->terms[0].frameLocation = 0;
  1815         m_currentAlternativeIndex = 0;
  1818     void regexEnd()
  1820         closeBodyAlternative();
  1823     void alternativeBodyDisjunction(bool onceThrough)
  1825         int newAlternativeIndex = m_bodyDisjunction->terms.size();
  1826         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
  1827         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
  1829         m_currentAlternativeIndex = newAlternativeIndex;
  1832     void alternativeDisjunction()
  1834         int newAlternativeIndex = m_bodyDisjunction->terms.size();
  1835         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
  1836         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
  1838         m_currentAlternativeIndex = newAlternativeIndex;
  1841     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
  1843         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
  1844             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
  1846             PatternAlternative* alternative = disjunction->m_alternatives[alt];
  1848             if (alt) {
  1849                 if (disjunction == m_pattern.m_body)
  1850                     alternativeBodyDisjunction(alternative->onceThrough());
  1851                 else
  1852                     alternativeDisjunction();
  1855             unsigned minimumSize = alternative->m_minimumSize;
  1856             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
  1857             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
  1859             if (countToCheck) {
  1860                 checkInput(countToCheck);
  1861                 currentCountAlreadyChecked += countToCheck;
  1864             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
  1865                 PatternTerm& term = alternative->m_terms[i];
  1867                 switch (term.type) {
  1868                 case PatternTerm::TypeAssertionBOL:
  1869                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);
  1870                     break;
  1872                 case PatternTerm::TypeAssertionEOL:
  1873                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);
  1874                     break;
  1876                 case PatternTerm::TypeAssertionWordBoundary:
  1877                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
  1878                     break;
  1880                 case PatternTerm::TypePatternCharacter:
  1881                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
  1882                     break;
  1884                 case PatternTerm::TypeCharacterClass:
  1885                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
  1886                     break;
  1888                 case PatternTerm::TypeBackReference:
  1889                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
  1890                         break;
  1892                 case PatternTerm::TypeForwardReference:
  1893                     break;
  1895                 case PatternTerm::TypeParenthesesSubpattern: {
  1896                     unsigned disjunctionAlreadyCheckedCount = 0;
  1897                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {
  1898                         unsigned alternativeFrameLocation = term.frameLocation;
  1899                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
  1900                         if (term.quantityType == QuantifierFixedCount)
  1901                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
  1902                         else
  1903                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
  1904                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
  1905                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
  1906                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
  1907                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
  1908                     } else if (term.parentheses.isTerminal) {
  1909                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
  1910                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
  1911                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
  1912                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
  1913                     } else {
  1914                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
  1915                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
  1916                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
  1917                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
  1919                     break;
  1922                 case PatternTerm::TypeParentheticalAssertion: {
  1923                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
  1925                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
  1926                     unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
  1927                     unsigned uncheckAmount = 0;
  1928                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
  1929                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
  1930                         uncheckInput(uncheckAmount);
  1931                         currentCountAlreadyChecked -= uncheckAmount;
  1934                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
  1935                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
  1936                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
  1937                     if (uncheckAmount) {
  1938                         checkInput(uncheckAmount);
  1939                         currentCountAlreadyChecked += uncheckAmount;
  1941                     break;
  1944                 case PatternTerm::TypeDotStarEnclosure:
  1945                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
  1946                     break;
  1952 private:
  1953     YarrPattern& m_pattern;
  1954     OwnPtr<ByteDisjunction> m_bodyDisjunction;
  1955     unsigned m_currentAlternativeIndex;
  1956     Vector<ParenthesesStackEntry> m_parenthesesStack;
  1957     Vector<ByteDisjunction*> m_allParenthesesInfo;
  1958 };
  1960 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
  1962     return ByteCompiler(pattern).compile(allocator);
  1965 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
  1967 #if YARR_8BIT_CHAR_SUPPORT
  1968     if (input.is8Bit())
  1969         return Interpreter<LChar>(cx, bytecode, output, input.characters8(), input.length(), start).interpret();
  1970 #endif
  1971     return Interpreter<UChar>(cx, bytecode, output, input.chars(), input.length(), start).interpret();
  1974 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
  1976     return Interpreter<LChar>(cx, bytecode, output, input, length, start).interpret();
  1979 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
  1981     return Interpreter<UChar>(cx, bytecode, output, input, length, start).interpret();
  1984 // These should be the same for both UChar & LChar.
  1985 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
  1986 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
  1987 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
  1988 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
  1989 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
  1990 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
  1991 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
  1994 } }

mercurial