The Tor Browser: js/src/yarr/YarrInterpreter.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-

     2  * vim: set ts=8 sts=4 et sw=4 tw=99:

     3  *

     4  * Copyright (C) 2009 Apple Inc. All rights reserved.

     5  * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged

     6  *

     7  * Redistribution and use in source and binary forms, with or without

     8  * modification, are permitted provided that the following conditions

     9  * are met:

    10  * 1. Redistributions of source code must retain the above copyright

    11  *    notice, this list of conditions and the following disclaimer.

    12  * 2. Redistributions in binary form must reproduce the above copyright

    13  *    notice, this list of conditions and the following disclaimer in the

    14  *    documentation and/or other materials provided with the distribution.

    15  *

    16  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY

    17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

    19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR

    20  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

    21  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

    22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

    23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY

    24  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

    25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

    26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    27  */

    29 #include "yarr/YarrInterpreter.h"

    31 #include "jscntxt.h"

    33 #include "yarr/Yarr.h"

    34 #include "yarr/YarrCanonicalizeUCS2.h"

    35 #include "yarr/BumpPointerAllocator.h"

    37 using namespace WTF;

    39 namespace JSC { namespace Yarr {

    41 template<typename CharType>

    42 class Interpreter {

    43 public:

    44     struct ParenthesesDisjunctionContext;

    46     struct BackTrackInfoPatternCharacter {

    47         uintptr_t matchAmount;

    48     };

    49     struct BackTrackInfoCharacterClass {

    50         uintptr_t matchAmount;

    51     };

    52     struct BackTrackInfoBackReference {

    53         uintptr_t begin; // Not really needed for greedy quantifiers.

    54         uintptr_t matchAmount; // Not really needed for fixed quantifiers.

    55     };

    56     struct BackTrackInfoAlternative {

    57         uintptr_t offset;

    58     };

    59     struct BackTrackInfoParentheticalAssertion {

    60         uintptr_t begin;

    61     };

    62     struct BackTrackInfoParenthesesOnce {

    63         uintptr_t begin;

    64     };

    65     struct BackTrackInfoParenthesesTerminal {

    66         uintptr_t begin;

    67     };

    68     struct BackTrackInfoParentheses {

    69         uintptr_t matchAmount;

    70         ParenthesesDisjunctionContext* lastContext;

    71     };

    73     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)

    74     {

    75         context->next = backTrack->lastContext;

    76         backTrack->lastContext = context;

    77         ++backTrack->matchAmount;

    78     }

    80     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)

    81     {

    82         ASSERT(backTrack->matchAmount);

    83         ASSERT(backTrack->lastContext);

    84         backTrack->lastContext = backTrack->lastContext->next;

    85         --backTrack->matchAmount;

    86     }

    88     struct DisjunctionContext

    89     {

    90         DisjunctionContext()

    91             : term(0)

    92         {

    93         }

    95         void* operator new(size_t, void* where)

    96         {

    97             return where;

    98         }

   100         int term;

   101         unsigned matchBegin;

   102         unsigned matchEnd;

   103         uintptr_t frame[1];

   104     };

   106     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)

   107     {

   108         size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);

   109         allocatorPool = allocatorPool->ensureCapacity(size);

   110         if (!allocatorPool)

   111             CRASH();

   112         return new (allocatorPool->alloc(size)) DisjunctionContext();

   113     }

   115     void freeDisjunctionContext(DisjunctionContext* context)

   116     {

   117         allocatorPool = allocatorPool->dealloc(context);

   118     }

   120     struct ParenthesesDisjunctionContext

   121     {

   122         ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)

   123             : next(0)

   124         {

   125             unsigned firstSubpatternId = term.atom.subpatternId;

   126             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;

   128             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {

   129                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];

   130                 output[(firstSubpatternId << 1) + i] = offsetNoMatch;

   131             }

   133             new (getDisjunctionContext(term)) DisjunctionContext();

   134         }

   136         void* operator new(size_t, void* where)

   137         {

   138             return where;

   139         }

   141         void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)

   142         {

   143             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)

   144                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];

   145         }

   147         DisjunctionContext* getDisjunctionContext(ByteTerm& term)

   148         {

   149             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));

   150         }

   152         ParenthesesDisjunctionContext* next;

   153         unsigned subpatternBackup[1];

   154     };

   156     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)

   157     {

   158         size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);

   159         size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext));

   160         allocatorPool = allocatorPool->ensureCapacity(size);

   161         if (!allocatorPool)

   162             CRASH();

   163         return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);

   164     }

   166     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)

   167     {

   168         allocatorPool = allocatorPool->dealloc(context);

   169     }

   171     class InputStream {

   172     public:

   173         InputStream(const CharType* input, unsigned start, unsigned length)

   174             : input(input)

   175             , pos(start)

   176             , length(length)

   177         {

   178         }

   180         void next()

   181         {

   182             ++pos;

   183         }

   185         void rewind(unsigned amount)

   186         {

   187             ASSERT(pos >= amount);

   188             pos -= amount;

   189         }

   191         int read()

   192         {

   193             ASSERT(pos < length);

   194             if (pos < length)

   195                 return input[pos];

   196             return -1;

   197         }

   199         int readPair()

   200         {

   201             ASSERT(pos + 1 < length);

   202             return input[pos] | input[pos + 1] << 16;

   203         }

   205         int readChecked(unsigned negativePositionOffest)

   206         {

   207             if (pos < negativePositionOffest)

   208                 CRASH();

   209             unsigned p = pos - negativePositionOffest;

   210             ASSERT(p < length);

   211             return input[p];

   212         }

   214         int reread(unsigned from)

   215         {

   216             ASSERT(from < length);

   217             return input[from];

   218         }

   220         int prev()

   221         {

   222             ASSERT(!(pos > length));

   223             if (pos && length)

   224                 return input[pos - 1];

   225             return -1;

   226         }

   228         unsigned getPos()

   229         {

   230             return pos;

   231         }

   233         void setPos(unsigned p)

   234         {

   235             pos = p;

   236         }

   238         bool atStart()

   239         {

   240             return pos == 0;

   241         }

   243         bool atEnd()

   244         {

   245             return pos == length;

   246         }

   248         unsigned end()

   249         {

   250             return length;

   251         }

   253         bool checkInput(unsigned count)

   254         {

   255             if (((pos + count) <= length) && ((pos + count) >= pos)) {

   256                 pos += count;

   257                 return true;

   258             }

   259             return false;

   260         }

   262         void uncheckInput(unsigned count)

   263         {

   264             if (pos < count)

   265                 CRASH();

   266             pos -= count;

   267         }

   269         bool atStart(unsigned negativePositionOffest)

   270         {

   271             return pos == negativePositionOffest;

   272         }

   274         bool atEnd(unsigned negativePositionOffest)

   275         {

   276             if (pos < negativePositionOffest)

   277                 CRASH();

   278             return (pos - negativePositionOffest) == length;

   279         }

   281         bool isAvailableInput(unsigned offset)

   282         {

   283             return (((pos + offset) <= length) && ((pos + offset) >= pos));

   284         }

   286     private:

   287         const CharType* input;

   288         unsigned pos;

   289         unsigned length;

   290     };

   292     bool testCharacterClass(CharacterClass* characterClass, int ch)

   293     {

   294         if (ch & 0xFF80) {

   295             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)

   296                 if (ch == characterClass->m_matchesUnicode[i])

   297                     return true;

   298             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)

   299                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))

   300                     return true;

   301         } else {

   302             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)

   303                 if (ch == characterClass->m_matches[i])

   304                     return true;

   305             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)

   306                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))

   307                     return true;

   308         }

   310         return false;

   311     }

   313     bool checkCharacter(int testChar, unsigned negativeInputOffset)

   314     {

   315         return testChar == input.readChecked(negativeInputOffset);

   316     }

   318     bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)

   319     {

   320         int ch = input.readChecked(negativeInputOffset);

   321         return (loChar == ch) || (hiChar == ch);

   322     }

   324     bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)

   325     {

   326         bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));

   327         return invert ? !match : match;

   328     }

   330     bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)

   331     {

   332         unsigned matchSize = (unsigned)(matchEnd - matchBegin);

   334         if (!input.checkInput(matchSize))

   335             return false;

   337         if (pattern->m_ignoreCase) {

   338             for (unsigned i = 0; i < matchSize; ++i) {

   339                 int oldCh = input.reread(matchBegin + i);

   340                 int ch = input.readChecked(negativeInputOffset + matchSize - i);

   342                 if (oldCh == ch)

   343                     continue;

   345                 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that

   346                 // unicode values are never allowed to match against ascii ones.

   347                 if (isASCII(oldCh) || isASCII(ch)) {

   348                     if (toASCIIUpper(oldCh) == toASCIIUpper(ch))

   349                         continue;

   350                 } else if (areCanonicallyEquivalent(oldCh, ch))

   351                     continue;

   353                 input.uncheckInput(matchSize);

   354                 return false;

   355             }

   356         } else {

   357             for (unsigned i = 0; i < matchSize; ++i) {

   358                 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {

   359                     input.uncheckInput(matchSize);

   360                     return false;

   361                 }

   362             }

   363         }

   365         return true;

   366     }

   368     bool matchAssertionBOL(ByteTerm& term)

   369     {

   370         return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));

   371     }

   373     bool matchAssertionEOL(ByteTerm& term)

   374     {

   375         if (term.inputPosition)

   376             return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));

   378         return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));

   379     }

   381     bool matchAssertionWordBoundary(ByteTerm& term)

   382     {

   383         bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));

   384         bool readIsWordchar;

   385         if (term.inputPosition)

   386             readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));

   387         else

   388             readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());

   390         bool wordBoundary = prevIsWordchar != readIsWordchar;

   391         return term.invert() ? !wordBoundary : wordBoundary;

   392     }

   394     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)

   395     {

   396         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);

   398         switch (term.atom.quantityType) {

   399         case QuantifierFixedCount:

   400             break;

   402         case QuantifierGreedy:

   403             if (backTrack->matchAmount) {

   404                 --backTrack->matchAmount;

   405                 input.uncheckInput(1);

   406                 return true;

   407             }

   408             break;

   410         case QuantifierNonGreedy:

   411             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {

   412                 ++backTrack->matchAmount;

   413                 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))

   414                     return true;

   415             }

   416             input.uncheckInput(backTrack->matchAmount);

   417             break;

   418         }

   420         return false;

   421     }

   423     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)

   424     {

   425         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);

   427         switch (term.atom.quantityType) {

   428         case QuantifierFixedCount:

   429             break;

   431         case QuantifierGreedy:

   432             if (backTrack->matchAmount) {

   433                 --backTrack->matchAmount;

   434                 input.uncheckInput(1);

   435                 return true;

   436             }

   437             break;

   439         case QuantifierNonGreedy:

   440             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {

   441                 ++backTrack->matchAmount;

   442                 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))

   443                     return true;

   444             }

   445             input.uncheckInput(backTrack->matchAmount);

   446             break;

   447         }

   449         return false;

   450     }

   452     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)

   453     {

   454         ASSERT(term.type == ByteTerm::TypeCharacterClass);

   455         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);

   457         switch (term.atom.quantityType) {

   458         case QuantifierFixedCount: {

   459             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {

   460                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))

   461                     return false;

   462             }

   463             return true;

   464         }

   466         case QuantifierGreedy: {

   467             unsigned matchAmount = 0;

   468             while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {

   469                 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {

   470                     input.uncheckInput(1);

   471                     break;

   472                 }

   473                 ++matchAmount;

   474             }

   475             backTrack->matchAmount = matchAmount;

   477             return true;

   478         }

   480         case QuantifierNonGreedy:

   481             backTrack->matchAmount = 0;

   482             return true;

   483         }

   485         ASSERT_NOT_REACHED();

   486         return false;

   487     }

   489     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)

   490     {

   491         ASSERT(term.type == ByteTerm::TypeCharacterClass);

   492         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);

   494         switch (term.atom.quantityType) {

   495         case QuantifierFixedCount:

   496             break;

   498         case QuantifierGreedy:

   499             if (backTrack->matchAmount) {

   500                 --backTrack->matchAmount;

   501                 input.uncheckInput(1);

   502                 return true;

   503             }

   504             break;

   506         case QuantifierNonGreedy:

   507             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {

   508                 ++backTrack->matchAmount;

   509                 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))

   510                     return true;

   511             }

   512             input.uncheckInput(backTrack->matchAmount);

   513             break;

   514         }

   516         return false;

   517     }

   519     bool matchBackReference(ByteTerm& term, DisjunctionContext* context)

   520     {

   521         ASSERT(term.type == ByteTerm::TypeBackReference);

   522         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);

   524         unsigned matchBegin = output[(term.atom.subpatternId << 1)];

   525         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];

   527         // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.

   528         // In this case the result of match is empty string like when it references to a parentheses with zero-width match.

   529         // Eg.: /(a\1)/

   530         if (matchEnd == offsetNoMatch)

   531             return true;

   533         if (matchBegin == offsetNoMatch)

   534             return true;

   536         ASSERT(matchBegin <= matchEnd);

   538         if (matchBegin == matchEnd)

   539             return true;

   541         switch (term.atom.quantityType) {

   542         case QuantifierFixedCount: {

   543             backTrack->begin = input.getPos();

   544             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {

   545                 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {

   546                     input.setPos(backTrack->begin);

   547                     return false;

   548                 }

   549             }

   550             return true;

   551         }

   553         case QuantifierGreedy: {

   554             unsigned matchAmount = 0;

   555             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))

   556                 ++matchAmount;

   557             backTrack->matchAmount = matchAmount;

   558             return true;

   559         }

   561         case QuantifierNonGreedy:

   562             backTrack->begin = input.getPos();

   563             backTrack->matchAmount = 0;

   564             return true;

   565         }

   567         ASSERT_NOT_REACHED();

   568         return false;

   569     }

   571     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)

   572     {

   573         ASSERT(term.type == ByteTerm::TypeBackReference);

   574         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);

   576         unsigned matchBegin = output[(term.atom.subpatternId << 1)];

   577         unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];

   579         if (matchBegin == offsetNoMatch)

   580             return false;

   582         ASSERT(matchBegin <= matchEnd);

   584         if (matchBegin == matchEnd)

   585             return false;

   587         switch (term.atom.quantityType) {

   588         case QuantifierFixedCount:

   589             // for quantityCount == 1, could rewind.

   590             input.setPos(backTrack->begin);

   591             break;

   593         case QuantifierGreedy:

   594             if (backTrack->matchAmount) {

   595                 --backTrack->matchAmount;

   596                 input.rewind(matchEnd - matchBegin);

   597                 return true;

   598             }

   599             break;

   601         case QuantifierNonGreedy:

   602             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {

   603                 ++backTrack->matchAmount;

   604                 return true;

   605             }

   606             input.setPos(backTrack->begin);

   607             break;

   608         }

   610         return false;

   611     }

   613     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)

   614     {

   615         if (term.capture()) {

   616             unsigned subpatternId = term.atom.subpatternId;

   617             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;

   618             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;

   619         }

   620     }

   621     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)

   622     {

   623         unsigned firstSubpatternId = term.atom.subpatternId;

   624         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;

   625         context->restoreOutput(output, firstSubpatternId, count);

   626     }

   627     JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)

   628     {

   629         while (backTrack->matchAmount) {

   630             ParenthesesDisjunctionContext* context = backTrack->lastContext;

   632             JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);

   633             if (result == JSRegExpMatch)

   634                 return JSRegExpMatch;

   636             resetMatches(term, context);

   637             popParenthesesDisjunctionContext(backTrack);

   638             freeParenthesesDisjunctionContext(context);

   640             if (result != JSRegExpNoMatch)

   641                 return result;

   642         }

   644         return JSRegExpNoMatch;

   645     }

   647     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)

   648     {

   649         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);

   650         ASSERT(term.atom.quantityCount == 1);

   652         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);

   654         switch (term.atom.quantityType) {

   655         case QuantifierGreedy: {

   656             // set this speculatively; if we get to the parens end this will be true.

   657             backTrack->begin = input.getPos();

   658             break;

   659         }

   660         case QuantifierNonGreedy: {

   661             backTrack->begin = notFound;

   662             context->term += term.atom.parenthesesWidth;

   663             return true;

   664         }

   665         case QuantifierFixedCount:

   666             break;

   667         }

   669         if (term.capture()) {

   670             unsigned subpatternId = term.atom.subpatternId;

   671             output[(subpatternId << 1)] = input.getPos() - term.inputPosition;

   672         }

   674         return true;

   675     }

   677     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)

   678     {

   679         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);

   680         ASSERT(term.atom.quantityCount == 1);

   682         if (term.capture()) {

   683             unsigned subpatternId = term.atom.subpatternId;

   684             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;

   685         }

   687         if (term.atom.quantityType == QuantifierFixedCount)

   688             return true;

   690         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);

   691         return backTrack->begin != input.getPos();

   692     }

   694     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)

   695     {

   696         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);

   697         ASSERT(term.atom.quantityCount == 1);

   699         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);

   701         if (term.capture()) {

   702             unsigned subpatternId = term.atom.subpatternId;

   703             output[(subpatternId << 1)] = offsetNoMatch;

   704             output[(subpatternId << 1) + 1] = offsetNoMatch;

   705         }

   707         switch (term.atom.quantityType) {

   708         case QuantifierGreedy:

   709             // if we backtrack to this point, there is another chance - try matching nothing.

   710             ASSERT(backTrack->begin != notFound);

   711             backTrack->begin = notFound;

   712             context->term += term.atom.parenthesesWidth;

   713             return true;

   714         case QuantifierNonGreedy:

   715             ASSERT(backTrack->begin != notFound);

   716         case QuantifierFixedCount:

   717             break;

   718         }

   720         return false;

   721     }

   723     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)

   724     {

   725         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);

   726         ASSERT(term.atom.quantityCount == 1);

   728         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);

   730         switch (term.atom.quantityType) {

   731         case QuantifierGreedy:

   732             if (backTrack->begin == notFound) {

   733                 context->term -= term.atom.parenthesesWidth;

   734                 return false;

   735             }

   736         case QuantifierNonGreedy:

   737             if (backTrack->begin == notFound) {

   738                 backTrack->begin = input.getPos();

   739                 if (term.capture()) {

   740                     // Technically this access to inputPosition should be accessing the begin term's

   741                     // inputPosition, but for repeats other than fixed these values should be

   742                     // the same anyway! (We don't pre-check for greedy or non-greedy matches.)

   743                     ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);

   745 		    // Disabled, see bug 808478

   746 #if 0

   747                     ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);

   748 #endif

   749                     unsigned subpatternId = term.atom.subpatternId;

   750                     output[subpatternId << 1] = input.getPos() + term.inputPosition;

   751                 }

   752                 context->term -= term.atom.parenthesesWidth;

   753                 return true;

   754             }

   755         case QuantifierFixedCount:

   756             break;

   757         }

   759         return false;

   760     }

   762     bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)

   763     {

   764         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);

   765         ASSERT(term.atom.quantityType == QuantifierGreedy);

   766         ASSERT(term.atom.quantityCount == quantifyInfinite);

   767         ASSERT(!term.capture());

   769         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);

   770         backTrack->begin = input.getPos();

   771         return true;

   772     }

   774     bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)

   775     {

   776         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);

   778         BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);

   779         // Empty match is a failed match.

   780         if (backTrack->begin == input.getPos())

   781             return false;

   783         // Successful match! Okay, what's next? - loop around and try to match moar!

   784         context->term -= (term.atom.parenthesesWidth + 1);

   785         return true;

   786     }

   788     bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)

   789     {

   790         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);

   791         ASSERT(term.atom.quantityType == QuantifierGreedy);

   792         ASSERT(term.atom.quantityCount == quantifyInfinite);

   793         ASSERT(!term.capture());

   795         // If we backtrack to this point, we have failed to match this iteration of the parens.

   796         // Since this is greedy / zero minimum a failed is also accepted as a match!

   797         context->term += term.atom.parenthesesWidth;

   798         return true;

   799     }

   801     bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)

   802     {

   803         // 'Terminal' parentheses are at the end of the regex, and as such a match past end

   804         // should always be returned as a successful match - we should never backtrack to here.

   805         ASSERT_NOT_REACHED();

   806         return false;

   807     }

   809     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)

   810     {

   811         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);

   812         ASSERT(term.atom.quantityCount == 1);

   814         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);

   816         backTrack->begin = input.getPos();

   817         return true;

   818     }

   820     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)

   821     {

   822         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);

   823         ASSERT(term.atom.quantityCount == 1);

   825         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);

   827         input.setPos(backTrack->begin);

   829         // We've reached the end of the parens; if they are inverted, this is failure.

   830         if (term.invert()) {

   831             context->term -= term.atom.parenthesesWidth;

   832             return false;

   833         }

   835         return true;

   836     }

   838     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)

   839     {

   840         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);

   841         ASSERT(term.atom.quantityCount == 1);

   843         // We've failed to match parens; if they are inverted, this is win!

   844         if (term.invert()) {

   845             context->term += term.atom.parenthesesWidth;

   846             return true;

   847         }

   849         return false;

   850     }

   852     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)

   853     {

   854         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);

   855         ASSERT(term.atom.quantityCount == 1);

   857         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);

   859         input.setPos(backTrack->begin);

   861         context->term -= term.atom.parenthesesWidth;

   862         return false;

   863     }

   865     JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)

   866     {

   867         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);

   869         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);

   870         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;

   872         backTrack->matchAmount = 0;

   873         backTrack->lastContext = 0;

   875         switch (term.atom.quantityType) {

   876         case QuantifierFixedCount: {

   877             // While we haven't yet reached our fixed limit,

   878             while (backTrack->matchAmount < term.atom.quantityCount) {

   879                 // Try to do a match, and it it succeeds, add it to the list.

   880                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);

   881                 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));

   882                 if (result == JSRegExpMatch)

   883                     appendParenthesesDisjunctionContext(backTrack, context);

   884                 else {

   885                     // The match failed; try to find an alternate point to carry on from.

   886                     resetMatches(term, context);

   887                     freeParenthesesDisjunctionContext(context);

   889                     if (result != JSRegExpNoMatch)

   890                         return result;

   891                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);

   892                     if (backtrackResult != JSRegExpMatch)

   893                         return backtrackResult;

   894                 }

   895             }

   897             ASSERT(backTrack->matchAmount == term.atom.quantityCount);

   898             ParenthesesDisjunctionContext* context = backTrack->lastContext;

   899             recordParenthesesMatch(term, context);

   900             return JSRegExpMatch;

   901         }

   903         case QuantifierGreedy: {

   904             while (backTrack->matchAmount < term.atom.quantityCount) {

   905                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);

   906                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));

   907                 if (result == JSRegExpMatch)

   908                     appendParenthesesDisjunctionContext(backTrack, context);

   909                 else {

   910                     resetMatches(term, context);

   911                     freeParenthesesDisjunctionContext(context);

   913                     if (result != JSRegExpNoMatch)

   914                         return result;

   916                     break;

   917                 }

   918             }

   920             if (backTrack->matchAmount) {

   921                 ParenthesesDisjunctionContext* context = backTrack->lastContext;

   922                 recordParenthesesMatch(term, context);

   923             }

   924             return JSRegExpMatch;

   925         }

   927         case QuantifierNonGreedy:

   928             return JSRegExpMatch;

   929         }

   931         ASSERT_NOT_REACHED();

   932         return JSRegExpErrorNoMatch;

   933     }

   935     // Rules for backtracking differ depending on whether this is greedy or non-greedy.

   936     //

   937     // Greedy matches never should try just adding more - you should already have done

   938     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where

   939     // you backtrack an item off the list needs checking, since we'll never have matched

   940     // the one less case.  Tracking forwards, still add as much as possible.

   941     //

   942     // Non-greedy, we've already done the one less case, so don't match on popping.

   943     // We haven't done the one more case, so always try to add that.

   944     //

   945     JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)

   946     {

   947         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);

   949         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);

   950         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;

   952         switch (term.atom.quantityType) {

   953         case QuantifierFixedCount: {

   954             ASSERT(backTrack->matchAmount == term.atom.quantityCount);

   956             ParenthesesDisjunctionContext* context = 0;

   957             JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);

   959             if (result != JSRegExpMatch)

   960                 return result;

   962             // While we haven't yet reached our fixed limit,

   963             while (backTrack->matchAmount < term.atom.quantityCount) {

   964                 // Try to do a match, and it it succeeds, add it to the list.

   965                 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);

   966                 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));

   968                 if (result == JSRegExpMatch)

   969                     appendParenthesesDisjunctionContext(backTrack, context);

   970                 else {

   971                     // The match failed; try to find an alternate point to carry on from.

   972                     resetMatches(term, context);

   973                     freeParenthesesDisjunctionContext(context);

   975                     if (result != JSRegExpNoMatch)

   976                         return result;

   977                     JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);

   978                     if (backtrackResult != JSRegExpMatch)

   979                         return backtrackResult;

   980                 }

   981             }

   983             ASSERT(backTrack->matchAmount == term.atom.quantityCount);

   984             context = backTrack->lastContext;

   985             recordParenthesesMatch(term, context);

   986             return JSRegExpMatch;

   987         }

   989         case QuantifierGreedy: {

   990             if (!backTrack->matchAmount)

   991                 return JSRegExpNoMatch;

   993             ParenthesesDisjunctionContext* context = backTrack->lastContext;

   994             JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);

   995             if (result == JSRegExpMatch) {

   996                 while (backTrack->matchAmount < term.atom.quantityCount) {

   997                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);

   998                     JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));

   999                     if (parenthesesResult == JSRegExpMatch)

  1000                         appendParenthesesDisjunctionContext(backTrack, context);

  1001                     else {

  1002                         resetMatches(term, context);

  1003                         freeParenthesesDisjunctionContext(context);

  1005                         if (parenthesesResult != JSRegExpNoMatch)

  1006                             return parenthesesResult;

  1008                         break;

  1009                     }

  1010                 }

  1011             } else {

  1012                 // Avoid a topcrash before it occurs.

  1013                 if (!backTrack->lastContext) {

  1014                     ASSERT(!"Tripped Bug 856796!");

  1015                     return JSRegExpErrorInternal;

  1016                 }

  1018                 resetMatches(term, context);

  1019                 popParenthesesDisjunctionContext(backTrack);

  1020                 freeParenthesesDisjunctionContext(context);

  1022                 if (result != JSRegExpNoMatch)

  1023                     return result;

  1024             }

  1026             if (backTrack->matchAmount) {

  1027                 ParenthesesDisjunctionContext* context = backTrack->lastContext;

  1028                 recordParenthesesMatch(term, context);

  1029             }

  1030             return JSRegExpMatch;

  1031         }

  1033         case QuantifierNonGreedy: {

  1034             // If we've not reached the limit, try to add one more match.

  1035             if (backTrack->matchAmount < term.atom.quantityCount) {

  1036                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);

  1037                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));

  1038                 if (result == JSRegExpMatch) {

  1039                     appendParenthesesDisjunctionContext(backTrack, context);

  1040                     recordParenthesesMatch(term, context);

  1041                     return JSRegExpMatch;

  1042                 }

  1044                 resetMatches(term, context);

  1045                 freeParenthesesDisjunctionContext(context);

  1047                 if (result != JSRegExpNoMatch)

  1048                     return result;

  1049             }

  1051             // Nope - okay backtrack looking for an alternative.

  1052             while (backTrack->matchAmount) {

  1053                 ParenthesesDisjunctionContext* context = backTrack->lastContext;

  1054                 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);

  1055                 if (result == JSRegExpMatch) {

  1056                     // successful backtrack! we're back in the game!

  1057                     if (backTrack->matchAmount) {

  1058                         context = backTrack->lastContext;

  1059                         recordParenthesesMatch(term, context);

  1060                     }

  1061                     return JSRegExpMatch;

  1062                 }

  1064                 // Avoid a topcrash before it occurs.

  1065                 if (!backTrack->lastContext) {

  1066                     ASSERT(!"Tripped Bug 856796!");

  1067                     return JSRegExpErrorInternal;

  1068                 }

  1070                 // pop a match off the stack

  1071                 resetMatches(term, context);

  1072                 popParenthesesDisjunctionContext(backTrack);

  1073                 freeParenthesesDisjunctionContext(context);

  1075                 if (result != JSRegExpNoMatch)

  1076                     return result;

  1077             }

  1079             return JSRegExpNoMatch;

  1080         }

  1081         }

  1083         ASSERT_NOT_REACHED();

  1084         return JSRegExpErrorNoMatch;

  1085     }

  1087     bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)

  1088     {

  1089         UNUSED_PARAM(term);

  1090         unsigned matchBegin = context->matchBegin;

  1092         if (matchBegin) {

  1093             for (matchBegin--; true; matchBegin--) {

  1094                 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {

  1095                     ++matchBegin;

  1096                     break;

  1097                 }

  1099                 if (!matchBegin)

  1100                     break;

  1101             }

  1102         }

  1104         unsigned matchEnd = input.getPos();

  1106         for (; (matchEnd != input.end())

  1107              && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }

  1109         if (((matchBegin && term.anchors.m_bol)

  1110              || ((matchEnd != input.end()) && term.anchors.m_eol))

  1111             && !pattern->m_multiline)

  1112             return false;

  1114         context->matchBegin = matchBegin;

  1115         context->matchEnd = matchEnd;

  1116         return true;

  1117     }

  1119 #define MATCH_NEXT() { ++context->term; goto matchAgain; }

  1120 #define BACKTRACK() { --context->term; goto backtrack; }

  1121 #define currentTerm() (disjunction->terms[context->term])

  1122     JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)

  1123     {

  1124         if (!--remainingMatchCount)

  1125             return JSRegExpErrorHitLimit;

  1127         if (btrack)

  1128             BACKTRACK();

  1130         context->matchBegin = input.getPos();

  1131         context->term = 0;

  1133     matchAgain:

  1134         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));

  1136         // Prevent jank resulting from getting stuck in Yarr for a long time.

  1137         if (!CheckForInterrupt(this->cx))

  1138             return JSRegExpErrorInternal;

  1140         switch (currentTerm().type) {

  1141         case ByteTerm::TypeSubpatternBegin:

  1142             MATCH_NEXT();

  1143         case ByteTerm::TypeSubpatternEnd:

  1144             context->matchEnd = input.getPos();

  1145             return JSRegExpMatch;

  1147         case ByteTerm::TypeBodyAlternativeBegin:

  1148             MATCH_NEXT();

  1149         case ByteTerm::TypeBodyAlternativeDisjunction:

  1150         case ByteTerm::TypeBodyAlternativeEnd:

  1151             context->matchEnd = input.getPos();

  1152             return JSRegExpMatch;

  1154         case ByteTerm::TypeAlternativeBegin:

  1155             MATCH_NEXT();

  1156         case ByteTerm::TypeAlternativeDisjunction:

  1157         case ByteTerm::TypeAlternativeEnd: {

  1158             int offset = currentTerm().alternative.end;

  1159             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);

  1160             backTrack->offset = offset;

  1161             context->term += offset;

  1162             MATCH_NEXT();

  1163         }

  1165         case ByteTerm::TypeAssertionBOL:

  1166             if (matchAssertionBOL(currentTerm()))

  1167                 MATCH_NEXT();

  1168             BACKTRACK();

  1169         case ByteTerm::TypeAssertionEOL:

  1170             if (matchAssertionEOL(currentTerm()))

  1171                 MATCH_NEXT();

  1172             BACKTRACK();

  1173         case ByteTerm::TypeAssertionWordBoundary:

  1174             if (matchAssertionWordBoundary(currentTerm()))

  1175                 MATCH_NEXT();

  1176             BACKTRACK();

  1178         case ByteTerm::TypePatternCharacterOnce:

  1179         case ByteTerm::TypePatternCharacterFixed: {

  1180             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {

  1181                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))

  1182                     BACKTRACK();

  1183             }

  1184             MATCH_NEXT();

  1185         }

  1186         case ByteTerm::TypePatternCharacterGreedy: {

  1187             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);

  1188             unsigned matchAmount = 0;

  1189             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {

  1190                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {

  1191                     input.uncheckInput(1);

  1192                     break;

  1193                 }

  1194                 ++matchAmount;

  1195             }

  1196             backTrack->matchAmount = matchAmount;

  1198             MATCH_NEXT();

  1199         }

  1200         case ByteTerm::TypePatternCharacterNonGreedy: {

  1201             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);

  1202             backTrack->matchAmount = 0;

  1203             MATCH_NEXT();

  1204         }

  1206         case ByteTerm::TypePatternCasedCharacterOnce:

  1207         case ByteTerm::TypePatternCasedCharacterFixed: {

  1208             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {

  1209                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))

  1210                     BACKTRACK();

  1211             }

  1212             MATCH_NEXT();

  1213         }

  1214         case ByteTerm::TypePatternCasedCharacterGreedy: {

  1215             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);

  1216             unsigned matchAmount = 0;

  1217             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {

  1218                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {

  1219                     input.uncheckInput(1);

  1220                     break;

  1221                 }

  1222                 ++matchAmount;

  1223             }

  1224             backTrack->matchAmount = matchAmount;

  1226             MATCH_NEXT();

  1227         }

  1228         case ByteTerm::TypePatternCasedCharacterNonGreedy: {

  1229             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);

  1230             backTrack->matchAmount = 0;

  1231             MATCH_NEXT();

  1232         }

  1234         case ByteTerm::TypeCharacterClass:

  1235             if (matchCharacterClass(currentTerm(), context))

  1236                 MATCH_NEXT();

  1237             BACKTRACK();

  1238         case ByteTerm::TypeBackReference:

  1239             if (matchBackReference(currentTerm(), context))

  1240                 MATCH_NEXT();

  1241             BACKTRACK();

  1242         case ByteTerm::TypeParenthesesSubpattern: {

  1243             JSRegExpResult result = matchParentheses(currentTerm(), context);

  1245             if (result == JSRegExpMatch) {

  1246                 MATCH_NEXT();

  1247             }  else if (result != JSRegExpNoMatch)

  1248                 return result;

  1250             BACKTRACK();

  1251         }

  1252         case ByteTerm::TypeParenthesesSubpatternOnceBegin:

  1253             if (matchParenthesesOnceBegin(currentTerm(), context))

  1254                 MATCH_NEXT();

  1255             BACKTRACK();

  1256         case ByteTerm::TypeParenthesesSubpatternOnceEnd:

  1257             if (matchParenthesesOnceEnd(currentTerm(), context))

  1258                 MATCH_NEXT();

  1259             BACKTRACK();

  1260         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:

  1261             if (matchParenthesesTerminalBegin(currentTerm(), context))

  1262                 MATCH_NEXT();

  1263             BACKTRACK();

  1264         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:

  1265             if (matchParenthesesTerminalEnd(currentTerm(), context))

  1266                 MATCH_NEXT();

  1267             BACKTRACK();

  1268         case ByteTerm::TypeParentheticalAssertionBegin:

  1269             if (matchParentheticalAssertionBegin(currentTerm(), context))

  1270                 MATCH_NEXT();

  1271             BACKTRACK();

  1272         case ByteTerm::TypeParentheticalAssertionEnd:

  1273             if (matchParentheticalAssertionEnd(currentTerm(), context))

  1274                 MATCH_NEXT();

  1275             BACKTRACK();

  1277         case ByteTerm::TypeCheckInput:

  1278             if (input.checkInput(currentTerm().checkInputCount))

  1279                 MATCH_NEXT();

  1280             BACKTRACK();

  1282         case ByteTerm::TypeUncheckInput:

  1283             input.uncheckInput(currentTerm().checkInputCount);

  1284             MATCH_NEXT();

  1286         case ByteTerm::TypeDotStarEnclosure:

  1287             if (matchDotStarEnclosure(currentTerm(), context))

  1288                 return JSRegExpMatch;

  1289             BACKTRACK();

  1290         }

  1292         // We should never fall-through to here.

  1293         ASSERT_NOT_REACHED();

  1295     backtrack:

  1296         ASSERT(context->term < static_cast<int>(disjunction->terms.size()));

  1298         // Prevent jank resulting from getting stuck in Yarr for a long time.

  1299         if (!CheckForInterrupt(this->cx))

  1300             return JSRegExpErrorInternal;

  1302         switch (currentTerm().type) {

  1303         case ByteTerm::TypeSubpatternBegin:

  1304             return JSRegExpNoMatch;

  1305         case ByteTerm::TypeSubpatternEnd:

  1306             ASSERT_NOT_REACHED();

  1308         case ByteTerm::TypeBodyAlternativeBegin:

  1309         case ByteTerm::TypeBodyAlternativeDisjunction: {

  1310             int offset = currentTerm().alternative.next;

  1311             context->term += offset;

  1312             if (offset > 0)

  1313                 MATCH_NEXT();

  1315             if (input.atEnd())

  1316                 return JSRegExpNoMatch;

  1318             input.next();

  1320             context->matchBegin = input.getPos();

  1322             if (currentTerm().alternative.onceThrough)

  1323                 context->term += currentTerm().alternative.next;

  1325             MATCH_NEXT();

  1326         }

  1327         case ByteTerm::TypeBodyAlternativeEnd:

  1328             ASSERT_NOT_REACHED();

  1330         case ByteTerm::TypeAlternativeBegin:

  1331         case ByteTerm::TypeAlternativeDisjunction: {

  1332             int offset = currentTerm().alternative.next;

  1333             context->term += offset;

  1334             if (offset > 0)

  1335                 MATCH_NEXT();

  1336             BACKTRACK();

  1337         }

  1338         case ByteTerm::TypeAlternativeEnd: {

  1339             // We should never backtrack back into an alternative of the main body of the regex.

  1340             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);

  1341             unsigned offset = backTrack->offset;

  1342             context->term -= offset;

  1343             BACKTRACK();

  1344         }

  1346         case ByteTerm::TypeAssertionBOL:

  1347         case ByteTerm::TypeAssertionEOL:

  1348         case ByteTerm::TypeAssertionWordBoundary:

  1349             BACKTRACK();

  1351         case ByteTerm::TypePatternCharacterOnce:

  1352         case ByteTerm::TypePatternCharacterFixed:

  1353         case ByteTerm::TypePatternCharacterGreedy:

  1354         case ByteTerm::TypePatternCharacterNonGreedy:

  1355             if (backtrackPatternCharacter(currentTerm(), context))

  1356                 MATCH_NEXT();

  1357             BACKTRACK();

  1358         case ByteTerm::TypePatternCasedCharacterOnce:

  1359         case ByteTerm::TypePatternCasedCharacterFixed:

  1360         case ByteTerm::TypePatternCasedCharacterGreedy:

  1361         case ByteTerm::TypePatternCasedCharacterNonGreedy:

  1362             if (backtrackPatternCasedCharacter(currentTerm(), context))

  1363                 MATCH_NEXT();

  1364             BACKTRACK();

  1365         case ByteTerm::TypeCharacterClass:

  1366             if (backtrackCharacterClass(currentTerm(), context))

  1367                 MATCH_NEXT();

  1368             BACKTRACK();

  1369         case ByteTerm::TypeBackReference:

  1370             if (backtrackBackReference(currentTerm(), context))

  1371                 MATCH_NEXT();

  1372             BACKTRACK();

  1373         case ByteTerm::TypeParenthesesSubpattern: {

  1374             JSRegExpResult result = backtrackParentheses(currentTerm(), context);

  1376             if (result == JSRegExpMatch) {

  1377                 MATCH_NEXT();

  1378             } else if (result != JSRegExpNoMatch)

  1379                 return result;

  1381             BACKTRACK();

  1382         }

  1383         case ByteTerm::TypeParenthesesSubpatternOnceBegin:

  1384             if (backtrackParenthesesOnceBegin(currentTerm(), context))

  1385                 MATCH_NEXT();

  1386             BACKTRACK();

  1387         case ByteTerm::TypeParenthesesSubpatternOnceEnd:

  1388             if (backtrackParenthesesOnceEnd(currentTerm(), context))

  1389                 MATCH_NEXT();

  1390             BACKTRACK();

  1391         case ByteTerm::TypeParenthesesSubpatternTerminalBegin:

  1392             if (backtrackParenthesesTerminalBegin(currentTerm(), context))

  1393                 MATCH_NEXT();

  1394             BACKTRACK();

  1395         case ByteTerm::TypeParenthesesSubpatternTerminalEnd:

  1396             if (backtrackParenthesesTerminalEnd(currentTerm(), context))

  1397                 MATCH_NEXT();

  1398             BACKTRACK();

  1399         case ByteTerm::TypeParentheticalAssertionBegin:

  1400             if (backtrackParentheticalAssertionBegin(currentTerm(), context))

  1401                 MATCH_NEXT();

  1402             BACKTRACK();

  1403         case ByteTerm::TypeParentheticalAssertionEnd:

  1404             if (backtrackParentheticalAssertionEnd(currentTerm(), context))

  1405                 MATCH_NEXT();

  1406             BACKTRACK();

  1408         case ByteTerm::TypeCheckInput:

  1409             input.uncheckInput(currentTerm().checkInputCount);

  1410             BACKTRACK();

  1412         case ByteTerm::TypeUncheckInput:

  1413             input.checkInput(currentTerm().checkInputCount);

  1414             BACKTRACK();

  1416         case ByteTerm::TypeDotStarEnclosure:

  1417             ASSERT_NOT_REACHED();

  1418         }

  1420         ASSERT_NOT_REACHED();

  1421         return JSRegExpErrorNoMatch;

  1422     }

  1424     JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)

  1425     {

  1426         JSRegExpResult result = matchDisjunction(disjunction, context, btrack);

  1428         if (result == JSRegExpMatch) {

  1429             while (context->matchBegin == context->matchEnd) {

  1430                 result = matchDisjunction(disjunction, context, true);

  1431                 if (result != JSRegExpMatch)

  1432                     return result;

  1433             }

  1434             return JSRegExpMatch;

  1435         }

  1437         return result;

  1438     }

  1440     unsigned interpret()

  1441     {

  1442         if (!input.isAvailableInput(0))

  1443             return offsetNoMatch;

  1445         for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)

  1446             output[i << 1] = offsetNoMatch;

  1448         allocatorPool = pattern->m_allocator->startAllocator();

  1449         if (!allocatorPool)

  1450             CRASH();

  1452         DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());

  1454         JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);

  1455         if (result == JSRegExpMatch) {

  1456             output[0] = context->matchBegin;

  1457             output[1] = context->matchEnd;

  1458         }

  1460         freeDisjunctionContext(context);

  1462         pattern->m_allocator->stopAllocator();

  1464         ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));

  1465         return output[0];

  1466     }

  1468     Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)

  1469         : cx(cx)

  1470         , pattern(pattern)

  1471         , output(output)

  1472         , input(input, start, length)

  1473         , allocatorPool(0)

  1474         , remainingMatchCount(matchLimit)

  1475     {

  1476     }

  1478 private:

  1479     JSContext *cx;

  1480     BytecodePattern* pattern;

  1481     unsigned* output;

  1482     InputStream input;

  1483     BumpPointerPool* allocatorPool;

  1484     unsigned remainingMatchCount;

  1485 };

  1489 class ByteCompiler {

  1490     struct ParenthesesStackEntry {

  1491         unsigned beginTerm;

  1492         unsigned savedAlternativeIndex;

  1494         // For js::Vector. Does not create a valid object.

  1495         ParenthesesStackEntry() { }

  1497         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)

  1498             : beginTerm(beginTerm)

  1499             , savedAlternativeIndex(savedAlternativeIndex)

  1500         {

  1501         }

  1502     };

  1504 public:

  1505     ByteCompiler(YarrPattern& pattern)

  1506         : m_pattern(pattern)

  1507     {

  1508         m_currentAlternativeIndex = 0;

  1509     }

  1511     PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)

  1512     {

  1513         regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());

  1514         emitDisjunction(m_pattern.m_body);

  1515         regexEnd();

  1517         return adoptPtr(newOrCrash<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator));

  1518     }

  1520     void checkInput(unsigned count)

  1521     {

  1522         m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));

  1523     }

  1525     void uncheckInput(unsigned count)

  1526     {

  1527         m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));

  1528     }

  1530     void assertionBOL(unsigned inputPosition)

  1531     {

  1532         m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));

  1533     }

  1535     void assertionEOL(unsigned inputPosition)

  1536     {

  1537         m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));

  1538     }

  1540     void assertionWordBoundary(bool invert, unsigned inputPosition)

  1541     {

  1542         m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));

  1543     }

  1545     void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)

  1546     {

  1547         if (m_pattern.m_ignoreCase) {

  1548             UChar lo = Unicode::toLower(ch);

  1549             UChar hi = Unicode::toUpper(ch);

  1551             if (lo != hi) {

  1552                 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));

  1553                 return;

  1554             }

  1555         }

  1557         m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));

  1558     }

  1560     void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)

  1561     {

  1562         m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));

  1564         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();

  1565         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;

  1566         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;

  1567     }

  1569     void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)

  1570     {

  1571         ASSERT(subpatternId);

  1573         m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));

  1575         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();

  1576         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;

  1577         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;

  1578     }

  1580     void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)

  1581     {

  1582         int beginTerm = m_bodyDisjunction->terms.size();

  1584         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));

  1585         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;

  1586         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());

  1587         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;

  1589         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));

  1590         m_currentAlternativeIndex = beginTerm + 1;

  1591     }

  1593     void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)

  1594     {

  1595         int beginTerm = m_bodyDisjunction->terms.size();

  1597         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));

  1598         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;

  1599         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());

  1600         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;

  1602         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));

  1603         m_currentAlternativeIndex = beginTerm + 1;

  1604     }

  1606     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)

  1607     {

  1608         // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,

  1609         // then fix this up at the end! - simplifying this should make it much clearer.

  1610         // https://bugs.webkit.org/show_bug.cgi?id=50136

  1612         int beginTerm = m_bodyDisjunction->terms.size();

  1614         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));

  1615         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;

  1616         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());

  1617         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;

  1619         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));

  1620         m_currentAlternativeIndex = beginTerm + 1;

  1621     }

  1623     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)

  1624     {

  1625         int beginTerm = m_bodyDisjunction->terms.size();

  1627         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));

  1628         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;

  1629         m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());

  1630         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;

  1632         m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));

  1633         m_currentAlternativeIndex = beginTerm + 1;

  1634     }

  1636     void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)

  1637     {

  1638         unsigned beginTerm = popParenthesesStack();

  1639         closeAlternative(beginTerm + 1);

  1640         unsigned endTerm = m_bodyDisjunction->terms.size();

  1642         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);

  1644         bool invert = m_bodyDisjunction->terms[beginTerm].invert();

  1645         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;

  1647         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));

  1648         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;

  1649         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;

  1650         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;

  1652         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();

  1653         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;

  1654         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();

  1655         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;

  1656     }

  1658     void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)

  1659     {

  1660         m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));

  1661     }

  1663     unsigned popParenthesesStack()

  1664     {

  1665         ASSERT(m_parenthesesStack.size());

  1666         int stackEnd = m_parenthesesStack.size() - 1;

  1667         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;

  1668         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;

  1669         m_parenthesesStack.shrink(stackEnd);

  1671         ASSERT(beginTerm < m_bodyDisjunction->terms.size());

  1672         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());

  1674         return beginTerm;

  1675     }

  1677 #ifndef NDEBUG

  1678     void dumpDisjunction(ByteDisjunction* disjunction)

  1679     {

  1680         dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction);

  1681         for (unsigned i = 0; i < disjunction->terms.size(); ++i)

  1682             dataLogF("{ %d } ", disjunction->terms[i].type);

  1683         dataLogF("\n");

  1684     }

  1685 #endif

  1687     void closeAlternative(int beginTerm)

  1688     {

  1689         int origBeginTerm = beginTerm;

  1690         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);

  1691         int endIndex = m_bodyDisjunction->terms.size();

  1693         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;

  1695         if (!m_bodyDisjunction->terms[beginTerm].alternative.next)

  1696             m_bodyDisjunction->terms.remove(beginTerm);

  1697         else {

  1698             while (m_bodyDisjunction->terms[beginTerm].alternative.next) {

  1699                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;

  1700                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);

  1701                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;

  1702                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;

  1703             }

  1705             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;

  1707             m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());

  1708             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;

  1709         }

  1710     }

  1712     void closeBodyAlternative()

  1713     {

  1714         int beginTerm = 0;

  1715         int origBeginTerm = 0;

  1716         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);

  1717         int endIndex = m_bodyDisjunction->terms.size();

  1719         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;

  1721         while (m_bodyDisjunction->terms[beginTerm].alternative.next) {

  1722             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;

  1723             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);

  1724             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;

  1725             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;

  1726         }

  1728         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;

  1730         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());

  1731         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;

  1732     }

  1734     void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)

  1735     {

  1736         unsigned beginTerm = popParenthesesStack();

  1737         closeAlternative(beginTerm + 1);

  1738         unsigned endTerm = m_bodyDisjunction->terms.size();

  1740         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);

  1742         ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];

  1744         bool capture = parenthesesBegin.capture();

  1745         unsigned subpatternId = parenthesesBegin.atom.subpatternId;

  1747         unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;

  1748         ByteDisjunction* parenthesesDisjunction = newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize);

  1750         parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1);

  1751         parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());

  1752         for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)

  1753             parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);

  1754         parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());

  1756         m_bodyDisjunction->terms.shrink(beginTerm);

  1758         m_allParenthesesInfo.append(parenthesesDisjunction);

  1759         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));

  1761         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();

  1762         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;

  1763         m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;

  1764     }

  1766     void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)

  1767     {

  1768         unsigned beginTerm = popParenthesesStack();

  1769         closeAlternative(beginTerm + 1);

  1770         unsigned endTerm = m_bodyDisjunction->terms.size();

  1772         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);

  1774         bool capture = m_bodyDisjunction->terms[beginTerm].capture();

  1775         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;

  1777         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));

  1778         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;

  1779         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;

  1780         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;

  1782         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();

  1783         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;

  1784         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();

  1785         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;

  1786     }

  1788     void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)

  1789     {

  1790         unsigned beginTerm = popParenthesesStack();

  1791         closeAlternative(beginTerm + 1);

  1792         unsigned endTerm = m_bodyDisjunction->terms.size();

  1794         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);

  1796         bool capture = m_bodyDisjunction->terms[beginTerm].capture();

  1797         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;

  1799         m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));

  1800         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;

  1801         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;

  1802         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;

  1804         m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();

  1805         m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;

  1806         m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();

  1807         m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;

  1808     }

  1810     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)

  1811     {

  1812         m_bodyDisjunction = adoptPtr(newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize));

  1813         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));

  1814         m_bodyDisjunction->terms[0].frameLocation = 0;

  1815         m_currentAlternativeIndex = 0;

  1816     }

  1818     void regexEnd()

  1819     {

  1820         closeBodyAlternative();

  1821     }

  1823     void alternativeBodyDisjunction(bool onceThrough)

  1824     {

  1825         int newAlternativeIndex = m_bodyDisjunction->terms.size();

  1826         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;

  1827         m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));

  1829         m_currentAlternativeIndex = newAlternativeIndex;

  1830     }

  1832     void alternativeDisjunction()

  1833     {

  1834         int newAlternativeIndex = m_bodyDisjunction->terms.size();

  1835         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;

  1836         m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());

  1838         m_currentAlternativeIndex = newAlternativeIndex;

  1839     }

  1841     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)

  1842     {

  1843         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {

  1844             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;

  1846             PatternAlternative* alternative = disjunction->m_alternatives[alt];

  1848             if (alt) {

  1849                 if (disjunction == m_pattern.m_body)

  1850                     alternativeBodyDisjunction(alternative->onceThrough());

  1851                 else

  1852                     alternativeDisjunction();

  1853             }

  1855             unsigned minimumSize = alternative->m_minimumSize;

  1856             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);

  1857             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;

  1859             if (countToCheck) {

  1860                 checkInput(countToCheck);

  1861                 currentCountAlreadyChecked += countToCheck;

  1862             }

  1864             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {

  1865                 PatternTerm& term = alternative->m_terms[i];

  1867                 switch (term.type) {

  1868                 case PatternTerm::TypeAssertionBOL:

  1869                     assertionBOL(currentCountAlreadyChecked - term.inputPosition);

  1870                     break;

  1872                 case PatternTerm::TypeAssertionEOL:

  1873                     assertionEOL(currentCountAlreadyChecked - term.inputPosition);

  1874                     break;

  1876                 case PatternTerm::TypeAssertionWordBoundary:

  1877                     assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);

  1878                     break;

  1880                 case PatternTerm::TypePatternCharacter:

  1881                     atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);

  1882                     break;

  1884                 case PatternTerm::TypeCharacterClass:

  1885                     atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);

  1886                     break;

  1888                 case PatternTerm::TypeBackReference:

  1889                     atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);

  1890                         break;

  1892                 case PatternTerm::TypeForwardReference:

  1893                     break;

  1895                 case PatternTerm::TypeParenthesesSubpattern: {

  1896                     unsigned disjunctionAlreadyCheckedCount = 0;

  1897                     if (term.quantityCount == 1 && !term.parentheses.isCopy) {

  1898                         unsigned alternativeFrameLocation = term.frameLocation;

  1899                         // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.

  1900                         if (term.quantityType == QuantifierFixedCount)

  1901                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;

  1902                         else

  1903                             alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;

  1904                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;

  1905                         atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);

  1906                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);

  1907                         atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);

  1908                     } else if (term.parentheses.isTerminal) {

  1909                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;

  1910                         atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);

  1911                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);

  1912                         atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);

  1913                     } else {

  1914                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;

  1915                         atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);

  1916                         emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);

  1917                         atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);

  1918                     }

  1919                     break;

  1920                 }

  1922                 case PatternTerm::TypeParentheticalAssertion: {

  1923                     unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;

  1925                     ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));

  1926                     unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);

  1927                     unsigned uncheckAmount = 0;

  1928                     if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {

  1929                         uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;

  1930                         uncheckInput(uncheckAmount);

  1931                         currentCountAlreadyChecked -= uncheckAmount;

  1932                     }

  1934                     atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);

  1935                     emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);

  1936                     atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);

  1937                     if (uncheckAmount) {

  1938                         checkInput(uncheckAmount);

  1939                         currentCountAlreadyChecked += uncheckAmount;

  1940                     }

  1941                     break;

  1942                 }

  1944                 case PatternTerm::TypeDotStarEnclosure:

  1945                     assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);

  1946                     break;

  1947                 }

  1948             }

  1949         }

  1950     }

  1952 private:

  1953     YarrPattern& m_pattern;

  1954     OwnPtr<ByteDisjunction> m_bodyDisjunction;

  1955     unsigned m_currentAlternativeIndex;

  1956     Vector<ParenthesesStackEntry> m_parenthesesStack;

  1957     Vector<ByteDisjunction*> m_allParenthesesInfo;

  1958 };

  1960 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)

  1961 {

  1962     return ByteCompiler(pattern).compile(allocator);

  1963 }

  1965 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)

  1966 {

  1967 #if YARR_8BIT_CHAR_SUPPORT

  1968     if (input.is8Bit())

  1969         return Interpreter<LChar>(cx, bytecode, output, input.characters8(), input.length(), start).interpret();

  1970 #endif

  1971     return Interpreter<UChar>(cx, bytecode, output, input.chars(), input.length(), start).interpret();

  1972 }

  1974 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)

  1975 {

  1976     return Interpreter<LChar>(cx, bytecode, output, input, length, start).interpret();

  1977 }

  1979 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)

  1980 {

  1981     return Interpreter<UChar>(cx, bytecode, output, input, length, start).interpret();

  1982 }

  1984 // These should be the same for both UChar & LChar.

  1985 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);

  1986 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);

  1987 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);

  1988 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);

  1989 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);

  1990 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);

  1991 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);

  1994 } }

The Tor Browser / file revision

js/src/yarr/YarrInterpreter.cpp@6474c204b198

js/src/yarr/YarrInterpreter.cpp