js/src/yarr/YarrJIT.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  * vim: set ts=8 sts=4 et sw=4 tw=99:
     3  *
     4  * Copyright (C) 2009 Apple Inc. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions
     8  * are met:
     9  * 1. Redistributions of source code must retain the above copyright
    10  *    notice, this list of conditions and the following disclaimer.
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  *    notice, this list of conditions and the following disclaimer in the
    13  *    documentation and/or other materials provided with the distribution.
    14  *
    15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
    16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
    19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    26  */
    28 #include "yarr/YarrJIT.h"
    30 #include "assembler/assembler/LinkBuffer.h"
    31 #include "yarr/Yarr.h"
    32 #include "yarr/YarrCanonicalizeUCS2.h"
    34 #if ENABLE_YARR_JIT
    36 using namespace WTF;
    38 namespace JSC { namespace Yarr {
    40 template<YarrJITCompileMode compileMode>
    41 class YarrGenerator : private MacroAssembler {
    42     friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
    44 #if WTF_CPU_ARM
    45     static const RegisterID input = ARMRegisters::r0;
    46     static const RegisterID index = ARMRegisters::r1;
    47     static const RegisterID length = ARMRegisters::r2;
    48     static const RegisterID output = ARMRegisters::r4;
    50     static const RegisterID regT0 = ARMRegisters::r5;
    51     static const RegisterID regT1 = ARMRegisters::r6;
    53     static const RegisterID returnRegister = ARMRegisters::r0;
    54     static const RegisterID returnRegister2 = ARMRegisters::r1;
    55 #elif WTF_CPU_MIPS
    56     static const RegisterID input = MIPSRegisters::a0;
    57     static const RegisterID index = MIPSRegisters::a1;
    58     static const RegisterID length = MIPSRegisters::a2;
    59     static const RegisterID output = MIPSRegisters::a3;
    61     static const RegisterID regT0 = MIPSRegisters::t4;
    62     static const RegisterID regT1 = MIPSRegisters::t5;
    64     static const RegisterID returnRegister = MIPSRegisters::v0;
    65     static const RegisterID returnRegister2 = MIPSRegisters::v1;
    66 #elif WTF_CPU_SH4
    67     static const RegisterID input = SH4Registers::r4;
    68     static const RegisterID index = SH4Registers::r5;
    69     static const RegisterID length = SH4Registers::r6;
    70     static const RegisterID output = SH4Registers::r7;
    72     static const RegisterID regT0 = SH4Registers::r0;
    73     static const RegisterID regT1 = SH4Registers::r1;
    75     static const RegisterID returnRegister = SH4Registers::r0;
    76     static const RegisterID returnRegister2 = SH4Registers::r1;
    77 #elif WTF_CPU_SPARC
    78     static const RegisterID input = SparcRegisters::i0;
    79     static const RegisterID index = SparcRegisters::i1;
    80     static const RegisterID length = SparcRegisters::i2;
    81     static const RegisterID output = SparcRegisters::i3;
    83     static const RegisterID regT0 = SparcRegisters::i4;
    84     static const RegisterID regT1 = SparcRegisters::i5;
    86     static const RegisterID returnRegister = SparcRegisters::i0;
    87 #elif WTF_CPU_X86
    88     static const RegisterID input = X86Registers::eax;
    89     static const RegisterID index = X86Registers::edx;
    90     static const RegisterID length = X86Registers::ecx;
    91     static const RegisterID output = X86Registers::edi;
    93     static const RegisterID regT0 = X86Registers::ebx;
    94     static const RegisterID regT1 = X86Registers::esi;
    96     static const RegisterID returnRegister = X86Registers::eax;
    97     static const RegisterID returnRegister2 = X86Registers::edx;
    98 #elif WTF_CPU_X86_64
    99 # if WTF_PLATFORM_WIN
   100     static const RegisterID input = X86Registers::ecx;
   101     static const RegisterID index = X86Registers::edx;
   102     static const RegisterID length = X86Registers::r8;
   103     static const RegisterID output = X86Registers::r9;
   104 # else
   105     static const RegisterID input = X86Registers::edi;
   106     static const RegisterID index = X86Registers::esi;
   107     static const RegisterID length = X86Registers::edx;
   108     static const RegisterID output = X86Registers::ecx;
   109 # endif
   111     static const RegisterID regT0 = X86Registers::eax;
   112     static const RegisterID regT1 = X86Registers::ebx;
   114     static const RegisterID returnRegister = X86Registers::eax;
   116 # if !WTF_PLATFORM_WIN
   117     // no way to use int128_t as return value on Win64 ABI
   118     static const RegisterID returnRegister2 = X86Registers::edx;
   119 # endif
   120 #endif
   122     void optimizeAlternative(PatternAlternative* alternative)
   123     {
   124         if (!alternative->m_terms.size())
   125             return;
   127         for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
   128             PatternTerm& term = alternative->m_terms[i];
   129             PatternTerm& nextTerm = alternative->m_terms[i + 1];
   131             if ((term.type == PatternTerm::TypeCharacterClass)
   132                 && (term.quantityType == QuantifierFixedCount)
   133                 && (nextTerm.type == PatternTerm::TypePatternCharacter)
   134                 && (nextTerm.quantityType == QuantifierFixedCount)) {
   135                 PatternTerm termCopy = term;
   136                 alternative->m_terms[i] = nextTerm;
   137                 alternative->m_terms[i + 1] = termCopy;
   138             }
   139         }
   140     }
   142     void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
   143     {
   144         do {
   145             // pick which range we're going to generate
   146             int which = count >> 1;
   147             char lo = ranges[which].begin;
   148             char hi = ranges[which].end;
   150             // check if there are any ranges or matches below lo.  If not, just jl to failure -
   151             // if there is anything else to check, check that first, if it falls through jmp to failure.
   152             if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
   153                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
   155                 // generate code for all ranges before this one
   156                 if (which)
   157                     matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
   159                 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
   160                     matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
   161                     ++*matchIndex;
   162                 }
   163                 failures.append(jump());
   165                 loOrAbove.link(this);
   166             } else if (which) {
   167                 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
   169                 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
   170                 failures.append(jump());
   172                 loOrAbove.link(this);
   173             } else
   174                 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
   176             while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
   177                 ++*matchIndex;
   179             matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
   180             // fall through to here, the value is above hi.
   182             // shuffle along & loop around if there are any more matches to handle.
   183             unsigned next = which + 1;
   184             ranges += next;
   185             count -= next;
   186         } while (count);
   187     }
   189     void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
   190     {
   191         if (charClass->m_table) {
   192             ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
   193             matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
   194             return;
   195         }
   196         Jump unicodeFail;
   197         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
   198             Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f));
   200             if (charClass->m_matchesUnicode.size()) {
   201                 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
   202                     UChar ch = charClass->m_matchesUnicode[i];
   203                     matchDest.append(branch32(Equal, character, Imm32(ch)));
   204                 }
   205             }
   207             if (charClass->m_rangesUnicode.size()) {
   208                 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
   209                     UChar lo = charClass->m_rangesUnicode[i].begin;
   210                     UChar hi = charClass->m_rangesUnicode[i].end;
   212                     Jump below = branch32(LessThan, character, Imm32(lo));
   213                     matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
   214                     below.link(this);
   215                 }
   216             }
   218             unicodeFail = jump();
   219             isAscii.link(this);
   220         }
   222         if (charClass->m_ranges.size()) {
   223             unsigned matchIndex = 0;
   224             JumpList failures;
   225             matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
   226             while (matchIndex < charClass->m_matches.size())
   227                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
   229             failures.link(this);
   230         } else if (charClass->m_matches.size()) {
   231             // optimization: gather 'a','A' etc back together, can mask & test once.
   232             Vector<char> matchesAZaz;
   234             for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
   235                 char ch = charClass->m_matches[i];
   236                 if (m_pattern.m_ignoreCase) {
   237                     if (isASCIILower(ch)) {
   238                         matchesAZaz.append(ch);
   239                         continue;
   240                     }
   241                     if (isASCIIUpper(ch))
   242                         continue;
   243                 }
   244                 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
   245             }
   247             if (unsigned countAZaz = matchesAZaz.size()) {
   248                 or32(TrustedImm32(32), character);
   249                 for (unsigned i = 0; i < countAZaz; ++i)
   250                     matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
   251             }
   252         }
   254         if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
   255             unicodeFail.link(this);
   256     }
   258     // Jumps if input not available; will have (incorrectly) incremented already!
   259     Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
   260     {
   261         if (countToCheck)
   262             add32(Imm32(countToCheck), index);
   263         return branch32(Above, index, length);
   264     }
   266     Jump jumpIfAvailableInput(unsigned countToCheck)
   267     {
   268         add32(Imm32(countToCheck), index);
   269         return branch32(BelowOrEqual, index, length);
   270     }
   272     Jump checkInput()
   273     {
   274         return branch32(BelowOrEqual, index, length);
   275     }
   277     Jump atEndOfInput()
   278     {
   279         return branch32(Equal, index, length);
   280     }
   282     Jump notAtEndOfInput()
   283     {
   284         return branch32(NotEqual, index, length);
   285     }
   287     Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)
   288     {
   289         readCharacter(inputPosition, character);
   291         // For case-insesitive compares, non-ascii characters that have different
   292         // upper & lower case representations are converted to a character class.
   293         ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
   294         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
   295             or32(TrustedImm32(0x20), character);
   296             ch |= 0x20;
   297         }
   299         return branch32(NotEqual, character, Imm32(ch));
   300     }
   302     void readCharacter(int inputPosition, RegisterID reg)
   303     {
   304         if (m_charSize == Char8)
   305             load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
   306         else
   307             load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
   308     }
   310     void storeToFrame(RegisterID reg, unsigned frameLocation)
   311     {
   312         poke(reg, frameLocation);
   313     }
   315     void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
   316     {
   317         poke(imm, frameLocation);
   318     }
   320     DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
   321     {
   322         return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
   323     }
   325     void loadFromFrame(unsigned frameLocation, RegisterID reg)
   326     {
   327         peek(reg, frameLocation);
   328     }
   330     void loadFromFrameAndJump(unsigned frameLocation)
   331     {
   332         jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
   333     }
   335     void initCallFrame()
   336     {
   337         unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
   338         if (callFrameSize)
   339             subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
   340     }
   341     void removeCallFrame()
   342     {
   343         unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
   344         if (callFrameSize)
   345             addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister);
   346     }
   348     // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
   349     void setSubpatternStart(RegisterID reg, unsigned subpattern)
   350     {
   351         ASSERT(subpattern);
   352         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
   353         store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
   354     }
   355     void setSubpatternEnd(RegisterID reg, unsigned subpattern)
   356     {
   357         ASSERT(subpattern);
   358         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
   359         store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
   360     }
   361     void clearSubpatternStart(unsigned subpattern)
   362     {
   363         ASSERT(subpattern);
   364         // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
   365         store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
   366     }
   368     // We use one of three different strategies to track the start of the current match,
   369     // while matching.
   370     // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
   371     //    at the end of matching. This is irrespective of compileMode, and in this case
   372     //    these methods should never be called.
   373     // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
   374     //    vector, store the match start in the output vector.
   375     // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
   376     //    in this register.
   377     void setMatchStart(RegisterID reg)
   378     {
   379         ASSERT(!m_pattern.m_body->m_hasFixedSize);
   380         if (compileMode == IncludeSubpatterns)
   381             store32(reg, output);
   382         else
   383             move(reg, output);
   384     }
   385     void getMatchStart(RegisterID reg)
   386     {
   387         ASSERT(!m_pattern.m_body->m_hasFixedSize);
   388         if (compileMode == IncludeSubpatterns)
   389             load32(output, reg);
   390         else
   391             move(output, reg);
   392     }
   394     enum YarrOpCode {
   395         // These nodes wrap body alternatives - those in the main disjunction,
   396         // rather than subpatterns or assertions. These are chained together in
   397         // a doubly linked list, with a 'begin' node for the first alternative,
   398         // a 'next' node for each subsequent alternative, and an 'end' node at
   399         // the end. In the case of repeating alternatives, the 'end' node also
   400         // has a reference back to 'begin'.
   401         OpBodyAlternativeBegin,
   402         OpBodyAlternativeNext,
   403         OpBodyAlternativeEnd,
   404         // Similar to the body alternatives, but used for subpatterns with two
   405         // or more alternatives.
   406         OpNestedAlternativeBegin,
   407         OpNestedAlternativeNext,
   408         OpNestedAlternativeEnd,
   409         // Used for alternatives in subpatterns where there is only a single
   410         // alternative (backtrackingis easier in these cases), or for alternatives
   411         // which never need to be backtracked (those in parenthetical assertions,
   412         // terminal subpatterns).
   413         OpSimpleNestedAlternativeBegin,
   414         OpSimpleNestedAlternativeNext,
   415         OpSimpleNestedAlternativeEnd,
   416         // Used to wrap 'Once' subpattern matches (quantityCount == 1).
   417         OpParenthesesSubpatternOnceBegin,
   418         OpParenthesesSubpatternOnceEnd,
   419         // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
   420         OpParenthesesSubpatternTerminalBegin,
   421         OpParenthesesSubpatternTerminalEnd,
   422         // Used to wrap parenthetical assertions.
   423         OpParentheticalAssertionBegin,
   424         OpParentheticalAssertionEnd,
   425         // Wraps all simple terms (pattern characters, character classes).
   426         OpTerm,
   427         // Where an expression contains only 'once through' body alternatives
   428         // and no repeating ones, this op is used to return match failure.
   429         OpMatchFailed
   430     };
   432     // This structure is used to hold the compiled opcode information,
   433     // including reference back to the original PatternTerm/PatternAlternatives,
   434     // and JIT compilation data structures.
   435     struct YarrOp {
   436         explicit YarrOp(PatternTerm* term)
   437             : m_op(OpTerm)
   438             , m_term(term)
   439             , m_isDeadCode(false)
   440         {
   441         }
   443         explicit YarrOp(YarrOpCode op)
   444             : m_op(op)
   445             , m_isDeadCode(false)
   446         {
   447         }
   449         // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
   450         YarrOpCode m_op;
   451         PatternTerm* m_term;
   453         // For alternatives, this holds the PatternAlternative and doubly linked
   454         // references to this alternative's siblings. In the case of the
   455         // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
   456         // m_nextOp will reference the OpBodyAlternativeBegin node of the first
   457         // repeating alternative.
   458         PatternAlternative* m_alternative;
   459         size_t m_previousOp;
   460         size_t m_nextOp;
   462         // Used to record a set of Jumps out of the generated code, typically
   463         // used for jumps out to backtracking code, and a single reentry back
   464         // into the code for a node (likely where a backtrack will trigger
   465         // rematching).
   466         Label m_reentry;
   467         JumpList m_jumps;
   469         // Used for backtracking when the prior alternative did not consume any
   470         // characters but matched.
   471         Jump m_zeroLengthMatch;
   473         // This flag is used to null out the second pattern character, when
   474         // two are fused to match a pair together.
   475         bool m_isDeadCode;
   477         // Currently used in the case of some of the more complex management of
   478         // 'm_checked', to cache the offset used in this alternative, to avoid
   479         // recalculating it.
   480         int m_checkAdjust;
   482         // Used by OpNestedAlternativeNext/End to hold the pointer to the
   483         // value that will be pushed into the pattern's frame to return to,
   484         // upon backtracking back into the disjunction.
   485         DataLabelPtr m_returnAddress;
   486     };
   488     // BacktrackingState
   489     // This class encapsulates information about the state of code generation
   490     // whilst generating the code for backtracking, when a term fails to match.
   491     // Upon entry to code generation of the backtracking code for a given node,
   492     // the Backtracking state will hold references to all control flow sources
   493     // that are outputs in need of further backtracking from the prior node
   494     // generated (which is the subsequent operation in the regular expression,
   495     // and in the m_ops Vector, since we generated backtracking backwards).
   496     // These references to control flow take the form of:
   497     //  - A jump list of jumps, to be linked to code that will backtrack them
   498     //    further.
   499     //  - A set of DataLabelPtr values, to be populated with values to be
   500     //    treated effectively as return addresses backtracking into complex
   501     //    subpatterns.
   502     //  - A flag indicating that the current sequence of generated code up to
   503     //    this point requires backtracking.
   504     class BacktrackingState {
   505     public:
   506         BacktrackingState()
   507             : m_pendingFallthrough(false)
   508         {
   509         }
   511         // Add a jump or jumps, a return address, or set the flag indicating
   512         // that the current 'fallthrough' control flow requires backtracking.
   513         void append(const Jump& jump)
   514         {
   515             m_laterFailures.append(jump);
   516         }
   517         void append(JumpList& jumpList)
   518         {
   519             m_laterFailures.append(jumpList);
   520         }
   521         void append(const DataLabelPtr& returnAddress)
   522         {
   523             m_pendingReturns.append(returnAddress);
   524         }
   525         void fallthrough()
   526         {
   527             ASSERT(!m_pendingFallthrough);
   528             m_pendingFallthrough = true;
   529         }
   531         // These methods clear the backtracking state, either linking to the
   532         // current location, a provided label, or copying the backtracking out
   533         // to a JumpList. All actions may require code generation to take place,
   534         // and as such are passed a pointer to the assembler.
   535         void link(MacroAssembler* assembler)
   536         {
   537             if (m_pendingReturns.size()) {
   538                 Label here(assembler);
   539                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
   540                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
   541                 m_pendingReturns.clear();
   542             }
   543             m_laterFailures.link(assembler);
   544             m_laterFailures.clear();
   545             m_pendingFallthrough = false;
   546         }
   547         void linkTo(Label label, MacroAssembler* assembler)
   548         {
   549             if (m_pendingReturns.size()) {
   550                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
   551                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
   552                 m_pendingReturns.clear();
   553             }
   554             if (m_pendingFallthrough)
   555                 assembler->jump(label);
   556             m_laterFailures.linkTo(label, assembler);
   557             m_laterFailures.clear();
   558             m_pendingFallthrough = false;
   559         }
   560         void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
   561         {
   562             if (m_pendingReturns.size()) {
   563                 Label here(assembler);
   564                 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
   565                     m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
   566                 m_pendingReturns.clear();
   567                 m_pendingFallthrough = true;
   568             }
   569             if (m_pendingFallthrough)
   570                 jumpList.append(assembler->jump());
   571             jumpList.append(m_laterFailures);
   572             m_laterFailures.clear();
   573             m_pendingFallthrough = false;
   574         }
   576         bool isEmpty()
   577         {
   578             return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
   579         }
   581         // Called at the end of code generation to link all return addresses.
   582         void linkDataLabels(LinkBuffer& linkBuffer)
   583         {
   584             ASSERT(isEmpty());
   585             for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
   586                 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
   587         }
   589     private:
   590         struct ReturnAddressRecord {
   591             ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
   592                 : m_dataLabel(dataLabel)
   593                 , m_backtrackLocation(backtrackLocation)
   594             {
   595             }
   597             DataLabelPtr m_dataLabel;
   598             Label m_backtrackLocation;
   599         };
   601         JumpList m_laterFailures;
   602         bool m_pendingFallthrough;
   603         Vector<DataLabelPtr, 4> m_pendingReturns;
   604         Vector<ReturnAddressRecord, 4> m_backtrackRecords;
   605     };
   607     // Generation methods:
   608     // ===================
   610     // This method provides a default implementation of backtracking common
   611     // to many terms; terms commonly jump out of the forwards  matching path
   612     // on any failed conditions, and add these jumps to the m_jumps list. If
   613     // no special handling is required we can often just backtrack to m_jumps.
   614     bool backtrackTermDefault(size_t opIndex)
   615     {
   616         YarrOp& op = m_ops[opIndex];
   617         m_backtrackingState.append(op.m_jumps);
   618         return true;
   619     }
   621     bool generateAssertionBOL(size_t opIndex)
   622     {
   623         YarrOp& op = m_ops[opIndex];
   624         PatternTerm* term = op.m_term;
   626         if (m_pattern.m_multiline) {
   627             const RegisterID character = regT0;
   629             JumpList matchDest;
   630             if (!term->inputPosition)
   631                 matchDest.append(branch32(Equal, index, Imm32(m_checked)));
   633             readCharacter((term->inputPosition - m_checked) - 1, character);
   634             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
   635             op.m_jumps.append(jump());
   637             matchDest.link(this);
   638         } else {
   639             // Erk, really should poison out these alternatives early. :-/
   640             if (term->inputPosition)
   641                 op.m_jumps.append(jump());
   642             else
   643                 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
   644         }
   645         return true;
   646     }
   647     bool backtrackAssertionBOL(size_t opIndex)
   648     {
   649         return backtrackTermDefault(opIndex);
   650     }
   652     bool generateAssertionEOL(size_t opIndex)
   653     {
   654         YarrOp& op = m_ops[opIndex];
   655         PatternTerm* term = op.m_term;
   657         if (m_pattern.m_multiline) {
   658             const RegisterID character = regT0;
   660             JumpList matchDest;
   661             if (term->inputPosition == m_checked)
   662                 matchDest.append(atEndOfInput());
   664             readCharacter(term->inputPosition - m_checked, character);
   665             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
   666             op.m_jumps.append(jump());
   668             matchDest.link(this);
   669         } else {
   670             if (term->inputPosition == m_checked)
   671                 op.m_jumps.append(notAtEndOfInput());
   672             // Erk, really should poison out these alternatives early. :-/
   673             else
   674                 op.m_jumps.append(jump());
   675         }
   676         return true;
   677     }
   678     bool backtrackAssertionEOL(size_t opIndex)
   679     {
   680         return backtrackTermDefault(opIndex);
   681     }
   683     // Also falls though on nextIsNotWordChar.
   684     void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
   685     {
   686         YarrOp& op = m_ops[opIndex];
   687         PatternTerm* term = op.m_term;
   689         const RegisterID character = regT0;
   691         if (term->inputPosition == m_checked)
   692             nextIsNotWordChar.append(atEndOfInput());
   694         readCharacter((term->inputPosition - m_checked), character);
   695         matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
   696     }
   698     bool generateAssertionWordBoundary(size_t opIndex)
   699     {
   700         YarrOp& op = m_ops[opIndex];
   701         PatternTerm* term = op.m_term;
   703         const RegisterID character = regT0;
   705         Jump atBegin;
   706         JumpList matchDest;
   707         if (!term->inputPosition)
   708             atBegin = branch32(Equal, index, Imm32(m_checked));
   709         readCharacter((term->inputPosition - m_checked) - 1, character);
   710         matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
   711         if (!term->inputPosition)
   712             atBegin.link(this);
   714         // We fall through to here if the last character was not a wordchar.
   715         JumpList nonWordCharThenWordChar;
   716         JumpList nonWordCharThenNonWordChar;
   717         if (term->invert()) {
   718             matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
   719             nonWordCharThenWordChar.append(jump());
   720         } else {
   721             matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
   722             nonWordCharThenNonWordChar.append(jump());
   723         }
   724         op.m_jumps.append(nonWordCharThenNonWordChar);
   726         // We jump here if the last character was a wordchar.
   727         matchDest.link(this);
   728         JumpList wordCharThenWordChar;
   729         JumpList wordCharThenNonWordChar;
   730         if (term->invert()) {
   731             matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
   732             wordCharThenWordChar.append(jump());
   733         } else {
   734             matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
   735             // This can fall-though!
   736         }
   738         op.m_jumps.append(wordCharThenWordChar);
   740         nonWordCharThenWordChar.link(this);
   741         wordCharThenNonWordChar.link(this);
   742         return true;
   743     }
   744     bool backtrackAssertionWordBoundary(size_t opIndex)
   745     {
   746         return backtrackTermDefault(opIndex);
   747     }
   749     bool generatePatternCharacterOnce(size_t opIndex)
   750     {
   751         YarrOp& op = m_ops[opIndex];
   753         if (op.m_isDeadCode)
   754             return true;
   756         // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
   757         // node, so there must always be at least one more node.
   758         ASSERT(opIndex + 1 < m_ops.size());
   759         YarrOp* nextOp = &m_ops[opIndex + 1];
   761         PatternTerm* term = op.m_term;
   762         UChar ch = term->patternCharacter;
   764         if ((ch > 0xff) && (m_charSize == Char8)) {
   765             // Have a 16 bit pattern character and an 8 bit string - short circuit
   766             op.m_jumps.append(jump());
   767             return true;
   768         }
   770         const RegisterID character = regT0;
   771         int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
   772         unsigned ignoreCaseMask = 0;
   773 #if CPU(BIG_ENDIAN)
   774         int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
   775 #else
   776         int allCharacters = ch;
   777 #endif
   778         int numberCharacters;
   779         int startTermPosition = term->inputPosition;
   781         // For case-insesitive compares, non-ascii characters that have different
   782         // upper & lower case representations are converted to a character class.
   783         ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
   785         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
   786 #if CPU(BIG_ENDIAN)
   787             ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
   788 #else
   789             ignoreCaseMask |= 32;
   790 #endif
   792         for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
   793             PatternTerm* nextTerm = nextOp->m_term;
   795             if (nextTerm->type != PatternTerm::TypePatternCharacter
   796                 || nextTerm->quantityType != QuantifierFixedCount
   797                 || nextTerm->quantityCount != 1
   798                 || nextTerm->inputPosition != (startTermPosition + numberCharacters))
   799                 break;
   801             nextOp->m_isDeadCode = true;
   803 #if CPU(BIG_ENDIAN)
   804             int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
   805 #else
   806             int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
   807 #endif
   809             UChar currentCharacter = nextTerm->patternCharacter;
   811             if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
   812                 // Have a 16 bit pattern character and an 8 bit string - short circuit
   813                 op.m_jumps.append(jump());
   814                 return true;
   815             }
   817             // For case-insesitive compares, non-ascii characters that have different
   818             // upper & lower case representations are converted to a character class.
   819             ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter));
   821             allCharacters |= (currentCharacter << shiftAmount);
   823             if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter)))
   824                 ignoreCaseMask |= 32 << shiftAmount;
   825         }
   827         if (m_charSize == Char8) {
   828             switch (numberCharacters) {
   829             case 1:
   830                 op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character));
   831                 return true;
   832             case 2: {
   833                 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
   834                 load16Unaligned(address, character);
   835                 break;
   836             }
   837             case 3: {
   838                 BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
   839                 load16Unaligned(highAddress, character);
   840                 if (ignoreCaseMask)
   841                     or32(Imm32(ignoreCaseMask), character);
   842                 op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
   843                 op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character));
   844                 return true;
   845             }
   846             case 4: {
   847                 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
   848                 load32WithUnalignedHalfWords(address, character);
   849                 break;
   850             }
   851             }
   852         } else {
   853             switch (numberCharacters) {
   854             case 1:
   855                 op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
   856                 return true;
   857             case 2:
   858                 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
   859                 load32WithUnalignedHalfWords(address, character);
   860                 break;
   861             }
   862         }
   864         if (ignoreCaseMask)
   865             or32(Imm32(ignoreCaseMask), character);
   866         op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
   867         return true;
   868     }
   869     bool backtrackPatternCharacterOnce(size_t opIndex)
   870     {
   871         return backtrackTermDefault(opIndex);
   872     }
   874     bool generatePatternCharacterFixed(size_t opIndex)
   875     {
   876         YarrOp& op = m_ops[opIndex];
   877         PatternTerm* term = op.m_term;
   878         UChar ch = term->patternCharacter;
   880         const RegisterID character = regT0;
   881         const RegisterID countRegister = regT1;
   883         move(index, countRegister);
   884         if (term->quantityCount.hasOverflowed())
   885             return false;
   886         sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
   888         Label loop(this);
   889         int offset;
   890         if ((Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).safeGet(offset))
   891             return false;
   892         BaseIndex address(input, countRegister, m_charScale, offset);
   894         if (m_charSize == Char8)
   895             load8(address, character);
   896         else
   897             load16(address, character);
   899         // For case-insesitive compares, non-ascii characters that have different
   900         // upper & lower case representations are converted to a character class.
   901         ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
   902         if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
   903             or32(TrustedImm32(0x20), character);
   904             ch |= 0x20;
   905         }
   907         op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
   908         add32(TrustedImm32(1), countRegister);
   909         branch32(NotEqual, countRegister, index).linkTo(loop, this);
   911         return true;
   912     }
   913     bool backtrackPatternCharacterFixed(size_t opIndex)
   914     {
   915         return backtrackTermDefault(opIndex);
   916     }
   918     bool generatePatternCharacterGreedy(size_t opIndex)
   919     {
   920         YarrOp& op = m_ops[opIndex];
   921         PatternTerm* term = op.m_term;
   922         UChar ch = term->patternCharacter;
   924         const RegisterID character = regT0;
   925         const RegisterID countRegister = regT1;
   927         move(TrustedImm32(0), countRegister);
   929         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
   930         if (!((ch > 0xff) && (m_charSize == Char8))) {
   931             JumpList failures;
   932             Label loop(this);
   933             failures.append(atEndOfInput());
   934             failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
   936             add32(TrustedImm32(1), countRegister);
   937             add32(TrustedImm32(1), index);
   938             if (term->quantityCount == quantifyInfinite) {
   939                 jump(loop);
   940             } else {
   941                 if (term->quantityCount.hasOverflowed())
   942                     return false;
   943                 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
   944             }
   946             failures.link(this);
   947         }
   948         op.m_reentry = label();
   950         storeToFrame(countRegister, term->frameLocation);
   951         return true;
   952     }
   953     bool backtrackPatternCharacterGreedy(size_t opIndex)
   954     {
   955         YarrOp& op = m_ops[opIndex];
   956         PatternTerm* term = op.m_term;
   958         const RegisterID countRegister = regT1;
   960         m_backtrackingState.link(this);
   962         loadFromFrame(term->frameLocation, countRegister);
   963         m_backtrackingState.append(branchTest32(Zero, countRegister));
   964         sub32(TrustedImm32(1), countRegister);
   965         sub32(TrustedImm32(1), index);
   966         jump(op.m_reentry);
   968         return true;
   969     }
   971     bool generatePatternCharacterNonGreedy(size_t opIndex)
   972     {
   973         YarrOp& op = m_ops[opIndex];
   974         PatternTerm* term = op.m_term;
   976         const RegisterID countRegister = regT1;
   978         move(TrustedImm32(0), countRegister);
   979         op.m_reentry = label();
   980         storeToFrame(countRegister, term->frameLocation);
   981         return true;
   982     }
   983     bool backtrackPatternCharacterNonGreedy(size_t opIndex)
   984     {
   985         YarrOp& op = m_ops[opIndex];
   986         PatternTerm* term = op.m_term;
   987         UChar ch = term->patternCharacter;
   989         const RegisterID character = regT0;
   990         const RegisterID countRegister = regT1;
   992         m_backtrackingState.link(this);
   994         loadFromFrame(term->frameLocation, countRegister);
   996         // Unless have a 16 bit pattern character and an 8 bit string - short circuit
   997         if (!((ch > 0xff) && (m_charSize == Char8))) {
   998             JumpList nonGreedyFailures;
   999             nonGreedyFailures.append(atEndOfInput());
  1000             if (term->quantityCount != quantifyInfinite) {
  1001                 if (term->quantityCount.hasOverflowed())
  1002                     return false;
  1003                 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
  1005             nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
  1007             add32(TrustedImm32(1), countRegister);
  1008             add32(TrustedImm32(1), index);
  1010             jump(op.m_reentry);
  1011             nonGreedyFailures.link(this);
  1014         sub32(countRegister, index);
  1015         m_backtrackingState.fallthrough();
  1017         return true;
  1020     bool generateCharacterClassOnce(size_t opIndex)
  1022         YarrOp& op = m_ops[opIndex];
  1023         PatternTerm* term = op.m_term;
  1025         const RegisterID character = regT0;
  1027         JumpList matchDest;
  1028         readCharacter(term->inputPosition - m_checked, character);
  1029         matchCharacterClass(character, matchDest, term->characterClass);
  1031         if (term->invert())
  1032             op.m_jumps.append(matchDest);
  1033         else {
  1034             op.m_jumps.append(jump());
  1035             matchDest.link(this);
  1037         return true;
  1039     bool backtrackCharacterClassOnce(size_t opIndex)
  1041         return backtrackTermDefault(opIndex);
  1044     bool generateCharacterClassFixed(size_t opIndex)
  1046         YarrOp& op = m_ops[opIndex];
  1047         PatternTerm* term = op.m_term;
  1049         const RegisterID character = regT0;
  1050         const RegisterID countRegister = regT1;
  1052         move(index, countRegister);
  1053         if (term->quantityCount.hasOverflowed())
  1054             return false;
  1055         sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
  1057         Label loop(this);
  1058         JumpList matchDest;
  1060         int offset;
  1061         Checked<int64_t> checkedOffset(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount));
  1063         if (m_charSize == Char8) {
  1064             if ((Checked<int>(checkedOffset) * static_cast<int>(sizeof(char))).safeGet(offset))
  1065                 return false;
  1066             load8(BaseIndex(input, countRegister, TimesOne, offset), character);
  1067         } else {
  1068             if ((Checked<int>(checkedOffset) * static_cast<int>(sizeof(UChar))).safeGet(offset))
  1069                 return false;
  1070             load16(BaseIndex(input, countRegister, TimesTwo, offset), character);
  1072         matchCharacterClass(character, matchDest, term->characterClass);
  1074         if (term->invert())
  1075             op.m_jumps.append(matchDest);
  1076         else {
  1077             op.m_jumps.append(jump());
  1078             matchDest.link(this);
  1081         add32(TrustedImm32(1), countRegister);
  1082         branch32(NotEqual, countRegister, index).linkTo(loop, this);
  1083         return true;
  1085     bool backtrackCharacterClassFixed(size_t opIndex)
  1087         return backtrackTermDefault(opIndex);
  1090     bool generateCharacterClassGreedy(size_t opIndex)
  1092         YarrOp& op = m_ops[opIndex];
  1093         PatternTerm* term = op.m_term;
  1095         const RegisterID character = regT0;
  1096         const RegisterID countRegister = regT1;
  1098         move(TrustedImm32(0), countRegister);
  1100         JumpList failures;
  1101         Label loop(this);
  1102         failures.append(atEndOfInput());
  1104         if (term->invert()) {
  1105             readCharacter(term->inputPosition - m_checked, character);
  1106             matchCharacterClass(character, failures, term->characterClass);
  1107         } else {
  1108             JumpList matchDest;
  1109             readCharacter(term->inputPosition - m_checked, character);
  1110             matchCharacterClass(character, matchDest, term->characterClass);
  1111             failures.append(jump());
  1112             matchDest.link(this);
  1115         add32(TrustedImm32(1), countRegister);
  1116         add32(TrustedImm32(1), index);
  1117         if (term->quantityCount != quantifyInfinite) {
  1118             unsigned quantityCount;
  1119             if (term->quantityCount.safeGet(quantityCount))
  1120                 return false;
  1121             branch32(NotEqual, countRegister, Imm32(quantityCount)).linkTo(loop, this);
  1122             failures.append(jump());
  1123         } else
  1124             jump(loop);
  1126         failures.link(this);
  1127         op.m_reentry = label();
  1129         storeToFrame(countRegister, term->frameLocation);
  1130         return true;
  1132     bool backtrackCharacterClassGreedy(size_t opIndex)
  1134         YarrOp& op = m_ops[opIndex];
  1135         PatternTerm* term = op.m_term;
  1137         const RegisterID countRegister = regT1;
  1139         m_backtrackingState.link(this);
  1141         loadFromFrame(term->frameLocation, countRegister);
  1142         m_backtrackingState.append(branchTest32(Zero, countRegister));
  1143         sub32(TrustedImm32(1), countRegister);
  1144         sub32(TrustedImm32(1), index);
  1145         jump(op.m_reentry);
  1147         return true;
  1150     bool generateCharacterClassNonGreedy(size_t opIndex)
  1152         YarrOp& op = m_ops[opIndex];
  1153         PatternTerm* term = op.m_term;
  1155         const RegisterID countRegister = regT1;
  1157         move(TrustedImm32(0), countRegister);
  1158         op.m_reentry = label();
  1159         storeToFrame(countRegister, term->frameLocation);
  1160         return true;
  1162     bool backtrackCharacterClassNonGreedy(size_t opIndex)
  1164         YarrOp& op = m_ops[opIndex];
  1165         PatternTerm* term = op.m_term;
  1167         const RegisterID character = regT0;
  1168         const RegisterID countRegister = regT1;
  1170         JumpList nonGreedyFailures;
  1172         m_backtrackingState.link(this);
  1174         loadFromFrame(term->frameLocation, countRegister);
  1176         nonGreedyFailures.append(atEndOfInput());
  1177         if (term->quantityCount.hasOverflowed())
  1178             return false;
  1179         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
  1181         JumpList matchDest;
  1182         readCharacter(term->inputPosition - m_checked, character);
  1183         matchCharacterClass(character, matchDest, term->characterClass);
  1185         if (term->invert())
  1186             nonGreedyFailures.append(matchDest);
  1187         else {
  1188             nonGreedyFailures.append(jump());
  1189             matchDest.link(this);
  1192         add32(TrustedImm32(1), countRegister);
  1193         add32(TrustedImm32(1), index);
  1195         jump(op.m_reentry);
  1197         nonGreedyFailures.link(this);
  1198         sub32(countRegister, index);
  1199         m_backtrackingState.fallthrough();
  1201         return true;
  1204     bool generateDotStarEnclosure(size_t opIndex)
  1206         YarrOp& op = m_ops[opIndex];
  1207         PatternTerm* term = op.m_term;
  1209         const RegisterID character = regT0;
  1210         const RegisterID matchPos = regT1;
  1212         JumpList foundBeginningNewLine;
  1213         JumpList saveStartIndex;
  1214         JumpList foundEndingNewLine;
  1216         ASSERT(!m_pattern.m_body->m_hasFixedSize);
  1217         getMatchStart(matchPos);
  1219         saveStartIndex.append(branchTest32(Zero, matchPos));
  1220         Label findBOLLoop(this);
  1221         sub32(TrustedImm32(1), matchPos);
  1222         if (m_charSize == Char8)
  1223             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
  1224         else
  1225             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
  1226         matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
  1227         branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
  1228         saveStartIndex.append(jump());
  1230         foundBeginningNewLine.link(this);
  1231         add32(TrustedImm32(1), matchPos); // Advance past newline
  1232         saveStartIndex.link(this);
  1234         if (!m_pattern.m_multiline && term->anchors.bolAnchor)
  1235             op.m_jumps.append(branchTest32(NonZero, matchPos));
  1237         ASSERT(!m_pattern.m_body->m_hasFixedSize);
  1238         setMatchStart(matchPos);
  1240         move(index, matchPos);
  1242         Label findEOLLoop(this);
  1243         foundEndingNewLine.append(branch32(Equal, matchPos, length));
  1244         if (m_charSize == Char8)
  1245             load8(BaseIndex(input, matchPos, TimesOne, 0), character);
  1246         else
  1247             load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
  1248         matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
  1249         add32(TrustedImm32(1), matchPos);
  1250         jump(findEOLLoop);
  1252         foundEndingNewLine.link(this);
  1254         if (!m_pattern.m_multiline && term->anchors.eolAnchor)
  1255             op.m_jumps.append(branch32(NotEqual, matchPos, length));
  1257         move(matchPos, index);
  1258         return true;
  1261     bool backtrackDotStarEnclosure(size_t opIndex)
  1263         return backtrackTermDefault(opIndex);
  1266     // Code generation/backtracking for simple terms
  1267     // (pattern characters, character classes, and assertions).
  1268     // These methods farm out work to the set of functions above.
  1269     bool generateTerm(size_t opIndex)
  1271         YarrOp& op = m_ops[opIndex];
  1272         PatternTerm* term = op.m_term;
  1274         switch (term->type) {
  1275         case PatternTerm::TypePatternCharacter:
  1276             switch (term->quantityType) {
  1277             case QuantifierFixedCount:
  1278                 if (term->quantityCount == 1)
  1279                     return generatePatternCharacterOnce(opIndex);
  1280                 else
  1281                     return generatePatternCharacterFixed(opIndex);
  1282                 break;
  1283             case QuantifierGreedy:
  1284                 return generatePatternCharacterGreedy(opIndex);
  1285             case QuantifierNonGreedy:
  1286                 return generatePatternCharacterNonGreedy(opIndex);
  1288             break;
  1290         case PatternTerm::TypeCharacterClass:
  1291             switch (term->quantityType) {
  1292             case QuantifierFixedCount:
  1293                 if (term->quantityCount == 1)
  1294                     return generateCharacterClassOnce(opIndex);
  1295                 else
  1296                     return generateCharacterClassFixed(opIndex);
  1297                 break;
  1298             case QuantifierGreedy:
  1299                 return generateCharacterClassGreedy(opIndex);
  1300             case QuantifierNonGreedy:
  1301                 return generateCharacterClassNonGreedy(opIndex);
  1303             break;
  1305         case PatternTerm::TypeAssertionBOL:
  1306             return generateAssertionBOL(opIndex);
  1308         case PatternTerm::TypeAssertionEOL:
  1309             return generateAssertionEOL(opIndex);
  1311         case PatternTerm::TypeAssertionWordBoundary:
  1312             return generateAssertionWordBoundary(opIndex);
  1314         case PatternTerm::TypeForwardReference:
  1315             return true;
  1317         case PatternTerm::TypeParenthesesSubpattern:
  1318         case PatternTerm::TypeParentheticalAssertion:
  1319             ASSERT_NOT_REACHED();
  1320             return false;
  1321         case PatternTerm::TypeBackReference:
  1322             return false;
  1323         case PatternTerm::TypeDotStarEnclosure:
  1324             return generateDotStarEnclosure(opIndex);
  1327         return false;
  1329     bool backtrackTerm(size_t opIndex)
  1331         YarrOp& op = m_ops[opIndex];
  1332         PatternTerm* term = op.m_term;
  1334         switch (term->type) {
  1335         case PatternTerm::TypePatternCharacter:
  1336             switch (term->quantityType) {
  1337             case QuantifierFixedCount:
  1338                 if (term->quantityCount == 1)
  1339                     return backtrackPatternCharacterOnce(opIndex);
  1340                 else
  1341                     return backtrackPatternCharacterFixed(opIndex);
  1342             case QuantifierGreedy:
  1343                 return backtrackPatternCharacterGreedy(opIndex);
  1344             case QuantifierNonGreedy:
  1345                 return backtrackPatternCharacterNonGreedy(opIndex);
  1347             break;
  1349         case PatternTerm::TypeCharacterClass:
  1350             switch (term->quantityType) {
  1351             case QuantifierFixedCount:
  1352                 if (term->quantityCount == 1)
  1353                     return backtrackCharacterClassOnce(opIndex);
  1354                 else
  1355                     return backtrackCharacterClassFixed(opIndex);
  1356             case QuantifierGreedy:
  1357                 return backtrackCharacterClassGreedy(opIndex);
  1358             case QuantifierNonGreedy:
  1359                 return backtrackCharacterClassNonGreedy(opIndex);
  1361             break;
  1363         case PatternTerm::TypeAssertionBOL:
  1364             return backtrackAssertionBOL(opIndex);
  1366         case PatternTerm::TypeAssertionEOL:
  1367             return backtrackAssertionEOL(opIndex);
  1369         case PatternTerm::TypeAssertionWordBoundary:
  1370             return backtrackAssertionWordBoundary(opIndex);
  1372         case PatternTerm::TypeForwardReference:
  1373             return true;
  1375         case PatternTerm::TypeParenthesesSubpattern:
  1376         case PatternTerm::TypeParentheticalAssertion:
  1377             ASSERT_NOT_REACHED();
  1378             return false;
  1380         case PatternTerm::TypeDotStarEnclosure:
  1381             return backtrackDotStarEnclosure(opIndex);
  1383         case PatternTerm::TypeBackReference:
  1384             return false;
  1386         return true;
  1389     bool generate()
  1391         // Forwards generate the matching code.
  1392         ASSERT(m_ops.size());
  1393         size_t opIndex = 0;
  1395         do {
  1396             YarrOp& op = m_ops[opIndex];
  1397             switch (op.m_op) {
  1399             case OpTerm:
  1400                 if (!generateTerm(opIndex))
  1401                     return false;
  1402                 break;
  1404             // OpBodyAlternativeBegin/Next/End
  1405             //
  1406             // These nodes wrap the set of alternatives in the body of the regular expression.
  1407             // There may be either one or two chains of OpBodyAlternative nodes, one representing
  1408             // the 'once through' sequence of alternatives (if any exist), and one representing
  1409             // the repeating alternatives (again, if any exist).
  1410             //
  1411             // Upon normal entry to the Begin alternative, we will check that input is available.
  1412             // Reentry to the Begin alternative will take place after the check has taken place,
  1413             // and will assume that the input position has already been progressed as appropriate.
  1414             //
  1415             // Entry to subsequent Next/End alternatives occurs when the prior alternative has
  1416             // successfully completed a match - return a success state from JIT code.
  1417             //
  1418             // Next alternatives allow for reentry optimized to suit backtracking from its
  1419             // preceding alternative. It expects the input position to still be set to a position
  1420             // appropriate to its predecessor, and it will only perform an input check if the
  1421             // predecessor had a minimum size less than its own.
  1422             //
  1423             // In the case 'once through' expressions, the End node will also have a reentry
  1424             // point to jump to when the last alternative fails. Again, this expects the input
  1425             // position to still reflect that expected by the prior alternative.
  1426             case OpBodyAlternativeBegin: {
  1427                 PatternAlternative* alternative = op.m_alternative;
  1429                 // Upon entry at the head of the set of alternatives, check if input is available
  1430                 // to run the first alternative. (This progresses the input position).
  1431                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
  1432                 // We will reenter after the check, and assume the input position to have been
  1433                 // set as appropriate to this alternative.
  1434                 op.m_reentry = label();
  1436                 if (alternative->m_minimumSize > INT_MAX)
  1437                     return false;
  1438                 m_checked = alternative->m_minimumSize;
  1439                 break;
  1441             case OpBodyAlternativeNext:
  1442             case OpBodyAlternativeEnd: {
  1443                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
  1444                 PatternAlternative* alternative = op.m_alternative;
  1446                 // If we get here, the prior alternative matched - return success.
  1448                 // Adjust the stack pointer to remove the pattern's frame.
  1449 #if !WTF_CPU_SPARC
  1450                 removeCallFrame();
  1451 #endif
  1453                 // Load appropriate values into the return register and the first output
  1454                 // slot, and return. In the case of pattern with a fixed size, we will
  1455                 // not have yet set the value in the first
  1456                 ASSERT(index != returnRegister);
  1457                 if (m_pattern.m_body->m_hasFixedSize) {
  1458                     move(index, returnRegister);
  1459                     if (priorAlternative->m_minimumSize)
  1460                         sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
  1461                     if (compileMode == IncludeSubpatterns)
  1462                         store32(returnRegister, output);
  1463                 } else
  1464                     getMatchStart(returnRegister);
  1465                 if (compileMode == IncludeSubpatterns)
  1466                     store32(index, Address(output, 4));
  1467 #if WTF_CPU_X86_64
  1468                 // upper 32bit to 0
  1469                 move32(returnRegister, returnRegister);
  1470                 lshiftPtr(Imm32(32), index);
  1471                 orPtr(index, returnRegister);
  1472 #else
  1473                 move(index, returnRegister2);
  1474 #endif
  1476                 generateReturn();
  1478                 // This is the divide between the tail of the prior alternative, above, and
  1479                 // the head of the subsequent alternative, below.
  1481                 if (op.m_op == OpBodyAlternativeNext) {
  1482                     // This is the reentry point for the Next alternative. We expect any code
  1483                     // that jumps here to do so with the input position matching that of the
  1484                     // PRIOR alteranative, and we will only check input availability if we
  1485                     // need to progress it forwards.
  1486                     op.m_reentry = label();
  1487                     if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
  1488                         add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
  1489                         op.m_jumps.append(jumpIfNoAvailableInput());
  1490                     } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
  1491                         sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
  1492                 } else if (op.m_nextOp == notFound) {
  1493                     // This is the reentry point for the End of 'once through' alternatives,
  1494                     // jumped to when the last alternative fails to match.
  1495                     op.m_reentry = label();
  1496                     sub32(Imm32(priorAlternative->m_minimumSize), index);
  1499                 if (op.m_op == OpBodyAlternativeNext)
  1500                     m_checked += alternative->m_minimumSize;
  1501                 m_checked -= priorAlternative->m_minimumSize;
  1502                 break;
  1505             // OpSimpleNestedAlternativeBegin/Next/End
  1506             // OpNestedAlternativeBegin/Next/End
  1507             //
  1508             // These nodes are used to handle sets of alternatives that are nested within
  1509             // subpatterns and parenthetical assertions. The 'simple' forms are used where
  1510             // we do not need to be able to backtrack back into any alternative other than
  1511             // the last, the normal forms allow backtracking into any alternative.
  1512             //
  1513             // Each Begin/Next node is responsible for planting an input check to ensure
  1514             // sufficient input is available on entry. Next nodes additionally need to
  1515             // jump to the end - Next nodes use the End node's m_jumps list to hold this
  1516             // set of jumps.
  1517             //
  1518             // In the non-simple forms, successful alternative matches must store a
  1519             // 'return address' using a DataLabelPtr, used to store the address to jump
  1520             // to when backtracking, to get to the code for the appropriate alternative.
  1521             case OpSimpleNestedAlternativeBegin:
  1522             case OpNestedAlternativeBegin: {
  1523                 PatternTerm* term = op.m_term;
  1524                 PatternAlternative* alternative = op.m_alternative;
  1525                 PatternDisjunction* disjunction = term->parentheses.disjunction;
  1527                 // Calculate how much input we need to check for, and if non-zero check.
  1528                 op.m_checkAdjust = alternative->m_minimumSize;
  1529                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
  1530                     op.m_checkAdjust -= disjunction->m_minimumSize;
  1531                 if (op.m_checkAdjust)
  1532                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
  1534                 m_checked += op.m_checkAdjust;
  1535                 break;
  1537             case OpSimpleNestedAlternativeNext:
  1538             case OpNestedAlternativeNext: {
  1539                 PatternTerm* term = op.m_term;
  1540                 PatternAlternative* alternative = op.m_alternative;
  1541                 PatternDisjunction* disjunction = term->parentheses.disjunction;
  1543                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
  1544                 if (op.m_op == OpNestedAlternativeNext) {
  1545                     unsigned parenthesesFrameLocation = term->frameLocation;
  1546                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
  1547                     if (term->quantityType != QuantifierFixedCount)
  1548                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
  1549                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
  1552                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
  1553                     // If the previous alternative matched without consuming characters then
  1554                     // backtrack to try to match while consumming some input.
  1555                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
  1558                 // If we reach here then the last alternative has matched - jump to the
  1559                 // End node, to skip over any further alternatives.
  1560                 //
  1561                 // FIXME: this is logically O(N^2) (though N can be expected to be very
  1562                 // small). We could avoid this either by adding an extra jump to the JIT
  1563                 // data structures, or by making backtracking code that jumps to Next
  1564                 // alternatives are responsible for checking that input is available (if
  1565                 // we didn't need to plant the input checks, then m_jumps would be free).
  1566                 YarrOp* endOp = &m_ops[op.m_nextOp];
  1567                 while (endOp->m_nextOp != notFound) {
  1568                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
  1569                     endOp = &m_ops[endOp->m_nextOp];
  1571                 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
  1572                 endOp->m_jumps.append(jump());
  1574                 // This is the entry point for the next alternative.
  1575                 op.m_reentry = label();
  1577                 // Calculate how much input we need to check for, and if non-zero check.
  1578                 op.m_checkAdjust = alternative->m_minimumSize;
  1579                 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
  1580                     op.m_checkAdjust -= disjunction->m_minimumSize;
  1581                 if (op.m_checkAdjust)
  1582                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
  1584                 YarrOp& lastOp = m_ops[op.m_previousOp];
  1585                 m_checked -= lastOp.m_checkAdjust;
  1586                 m_checked += op.m_checkAdjust;
  1587                 break;
  1589             case OpSimpleNestedAlternativeEnd:
  1590             case OpNestedAlternativeEnd: {
  1591                 PatternTerm* term = op.m_term;
  1593                 // In the non-simple case, store a 'return address' so we can backtrack correctly.
  1594                 if (op.m_op == OpNestedAlternativeEnd) {
  1595                     unsigned parenthesesFrameLocation = term->frameLocation;
  1596                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
  1597                     if (term->quantityType != QuantifierFixedCount)
  1598                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
  1599                     op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
  1602                 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
  1603                     // If the previous alternative matched without consuming characters then
  1604                     // backtrack to try to match while consumming some input.
  1605                     op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
  1608                 // If this set of alternatives contains more than one alternative,
  1609                 // then the Next nodes will have planted jumps to the End, and added
  1610                 // them to this node's m_jumps list.
  1611                 op.m_jumps.link(this);
  1612                 op.m_jumps.clear();
  1614                 YarrOp& lastOp = m_ops[op.m_previousOp];
  1615                 m_checked -= lastOp.m_checkAdjust;
  1616                 break;
  1619             // OpParenthesesSubpatternOnceBegin/End
  1620             //
  1621             // These nodes support (optionally) capturing subpatterns, that have a
  1622             // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
  1623             case OpParenthesesSubpatternOnceBegin: {
  1624                 PatternTerm* term = op.m_term;
  1625                 unsigned parenthesesFrameLocation = term->frameLocation;
  1626                 const RegisterID indexTemporary = regT0;
  1627                 ASSERT(term->quantityCount == 1);
  1629                 // Upon entry to a Greedy quantified set of parenthese store the index.
  1630                 // We'll use this for two purposes:
  1631                 //  - To indicate which iteration we are on of mathing the remainder of
  1632                 //    the expression after the parentheses - the first, including the
  1633                 //    match within the parentheses, or the second having skipped over them.
  1634                 //  - To check for empty matches, which must be rejected.
  1635                 //
  1636                 // At the head of a NonGreedy set of parentheses we'll immediately set the
  1637                 // value on the stack to -1 (indicating a match skipping the subpattern),
  1638                 // and plant a jump to the end. We'll also plant a label to backtrack to
  1639                 // to reenter the subpattern later, with a store to set up index on the
  1640                 // second iteration.
  1641                 //
  1642                 // FIXME: for capturing parens, could use the index in the capture array?
  1643                 if (term->quantityType == QuantifierGreedy)
  1644                     storeToFrame(index, parenthesesFrameLocation);
  1645                 else if (term->quantityType == QuantifierNonGreedy) {
  1646                     storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
  1647                     op.m_jumps.append(jump());
  1648                     op.m_reentry = label();
  1649                     storeToFrame(index, parenthesesFrameLocation);
  1652                 // If the parenthese are capturing, store the starting index value to the
  1653                 // captures array, offsetting as necessary.
  1654                 //
  1655                 // FIXME: could avoid offsetting this value in JIT code, apply
  1656                 // offsets only afterwards, at the point the results array is
  1657                 // being accessed.
  1658                 if (term->capture() && compileMode == IncludeSubpatterns) {
  1659                     int inputOffset = term->inputPosition - m_checked;
  1660                     if (term->quantityType == QuantifierFixedCount)
  1661                         inputOffset -= term->parentheses.disjunction->m_minimumSize;
  1662                     if (inputOffset) {
  1663                         move(index, indexTemporary);
  1664                         add32(Imm32(inputOffset), indexTemporary);
  1665                         setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
  1666                     } else
  1667                         setSubpatternStart(index, term->parentheses.subpatternId);
  1669                 break;
  1671             case OpParenthesesSubpatternOnceEnd: {
  1672                 PatternTerm* term = op.m_term;
  1673                 const RegisterID indexTemporary = regT0;
  1674                 ASSERT(term->quantityCount == 1);
  1676 #ifndef NDEBUG
  1677                 // Runtime ASSERT to make sure that the nested alternative handled the
  1678                 // "no input consumed" check.
  1679                 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
  1680                     Jump pastBreakpoint;
  1681                     pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
  1682                     breakpoint();
  1683                     pastBreakpoint.link(this);
  1685 #endif
  1687                 // If the parenthese are capturing, store the ending index value to the
  1688                 // captures array, offsetting as necessary.
  1689                 //
  1690                 // FIXME: could avoid offsetting this value in JIT code, apply
  1691                 // offsets only afterwards, at the point the results array is
  1692                 // being accessed.
  1693                 if (term->capture() && compileMode == IncludeSubpatterns) {
  1694                     int inputOffset = term->inputPosition - m_checked;
  1695                     if (inputOffset) {
  1696                         move(index, indexTemporary);
  1697                         add32(Imm32(inputOffset), indexTemporary);
  1698                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
  1699                     } else
  1700                         setSubpatternEnd(index, term->parentheses.subpatternId);
  1703                 // If the parentheses are quantified Greedy then add a label to jump back
  1704                 // to if get a failed match from after the parentheses. For NonGreedy
  1705                 // parentheses, link the jump from before the subpattern to here.
  1706                 if (term->quantityType == QuantifierGreedy)
  1707                     op.m_reentry = label();
  1708                 else if (term->quantityType == QuantifierNonGreedy) {
  1709                     YarrOp& beginOp = m_ops[op.m_previousOp];
  1710                     beginOp.m_jumps.link(this);
  1712                 break;
  1715             // OpParenthesesSubpatternTerminalBegin/End
  1716             case OpParenthesesSubpatternTerminalBegin: {
  1717                 PatternTerm* term = op.m_term;
  1718                 ASSERT(term->quantityType == QuantifierGreedy);
  1719                 ASSERT(term->quantityCount == quantifyInfinite);
  1720                 ASSERT(!term->capture());
  1722                 // Upon entry set a label to loop back to.
  1723                 op.m_reentry = label();
  1725                 // Store the start index of the current match; we need to reject zero
  1726                 // length matches.
  1727                 storeToFrame(index, term->frameLocation);
  1728                 break;
  1730             case OpParenthesesSubpatternTerminalEnd: {
  1731                 YarrOp& beginOp = m_ops[op.m_previousOp];
  1732 #ifndef NDEBUG
  1733                 PatternTerm* term = op.m_term;
  1735                 // Runtime ASSERT to make sure that the nested alternative handled the
  1736                 // "no input consumed" check.
  1737                 Jump pastBreakpoint;
  1738                 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
  1739                 breakpoint();
  1740                 pastBreakpoint.link(this);
  1741 #endif
  1743                 // We know that the match is non-zero, we can accept it  and
  1744                 // loop back up to the head of the subpattern.
  1745                 jump(beginOp.m_reentry);
  1747                 // This is the entry point to jump to when we stop matching - we will
  1748                 // do so once the subpattern cannot match any more.
  1749                 op.m_reentry = label();
  1750                 break;
  1753             // OpParentheticalAssertionBegin/End
  1754             case OpParentheticalAssertionBegin: {
  1755                 PatternTerm* term = op.m_term;
  1757                 // Store the current index - assertions should not update index, so
  1758                 // we will need to restore it upon a successful match.
  1759                 unsigned parenthesesFrameLocation = term->frameLocation;
  1760                 storeToFrame(index, parenthesesFrameLocation);
  1762                 // Check
  1763                 op.m_checkAdjust = m_checked - term->inputPosition;
  1764                 if (op.m_checkAdjust)
  1765                     sub32(Imm32(op.m_checkAdjust), index);
  1767                 m_checked -= op.m_checkAdjust;
  1768                 break;
  1770             case OpParentheticalAssertionEnd: {
  1771                 PatternTerm* term = op.m_term;
  1773                 // Restore the input index value.
  1774                 unsigned parenthesesFrameLocation = term->frameLocation;
  1775                 loadFromFrame(parenthesesFrameLocation, index);
  1777                 // If inverted, a successful match of the assertion must be treated
  1778                 // as a failure, so jump to backtracking.
  1779                 if (term->invert()) {
  1780                     op.m_jumps.append(jump());
  1781                     op.m_reentry = label();
  1784                 YarrOp& lastOp = m_ops[op.m_previousOp];
  1785                 m_checked += lastOp.m_checkAdjust;
  1786                 break;
  1789             case OpMatchFailed:
  1790 #if !WTF_CPU_SPARC
  1791                 removeCallFrame();
  1792 #endif
  1793 #if WTF_CPU_X86_64
  1794                 move(TrustedImm32(int(WTF::notFound)), returnRegister);
  1795 #else
  1796                 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
  1797                 move(TrustedImm32(0), returnRegister2);
  1798 #endif
  1799                 generateReturn();
  1800                 break;
  1803             ++opIndex;
  1804         } while (opIndex < m_ops.size());
  1806         return true;
  1809     bool backtrack()
  1811         // Backwards generate the backtracking code.
  1812         size_t opIndex = m_ops.size();
  1813         ASSERT(opIndex);
  1815         do {
  1816             --opIndex;
  1817             YarrOp& op = m_ops[opIndex];
  1818             switch (op.m_op) {
  1820             case OpTerm:
  1821                 if (!backtrackTerm(opIndex))
  1822                     return false;
  1823                 break;
  1825             // OpBodyAlternativeBegin/Next/End
  1826             //
  1827             // For each Begin/Next node representing an alternative, we need to decide what to do
  1828             // in two circumstances:
  1829             //  - If we backtrack back into this node, from within the alternative.
  1830             //  - If the input check at the head of the alternative fails (if this exists).
  1831             //
  1832             // We treat these two cases differently since in the former case we have slightly
  1833             // more information - since we are backtracking out of a prior alternative we know
  1834             // that at least enough input was available to run it. For example, given the regular
  1835             // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
  1836             // character match of 'a'), then we need not perform an additional input availability
  1837             // check before running the second alternative.
  1838             //
  1839             // Backtracking required differs for the last alternative, which in the case of the
  1840             // repeating set of alternatives must loop. The code generated for the last alternative
  1841             // will also be used to handle all input check failures from any prior alternatives -
  1842             // these require similar functionality, in seeking the next available alternative for
  1843             // which there is sufficient input.
  1844             //
  1845             // Since backtracking of all other alternatives simply requires us to link backtracks
  1846             // to the reentry point for the subsequent alternative, we will only be generating any
  1847             // code when backtracking the last alternative.
  1848             case OpBodyAlternativeBegin:
  1849             case OpBodyAlternativeNext: {
  1850                 PatternAlternative* alternative = op.m_alternative;
  1852                 if (op.m_op == OpBodyAlternativeNext) {
  1853                     PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
  1854                     m_checked += priorAlternative->m_minimumSize;
  1856                 m_checked -= alternative->m_minimumSize;
  1858                 // Is this the last alternative? If not, then if we backtrack to this point we just
  1859                 // need to jump to try to match the next alternative.
  1860                 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
  1861                     m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
  1862                     break;
  1864                 YarrOp& endOp = m_ops[op.m_nextOp];
  1866                 YarrOp* beginOp = &op;
  1867                 while (beginOp->m_op != OpBodyAlternativeBegin) {
  1868                     ASSERT(beginOp->m_op == OpBodyAlternativeNext);
  1869                     beginOp = &m_ops[beginOp->m_previousOp];
  1872                 bool onceThrough = endOp.m_nextOp == notFound;
  1874                 // First, generate code to handle cases where we backtrack out of an attempted match
  1875                 // of the last alternative. If this is a 'once through' set of alternatives then we
  1876                 // have nothing to do - link this straight through to the End.
  1877                 if (onceThrough)
  1878                     m_backtrackingState.linkTo(endOp.m_reentry, this);
  1879                 else {
  1880                     // If we don't need to move the input poistion, and the pattern has a fixed size
  1881                     // (in which case we omit the store of the start index until the pattern has matched)
  1882                     // then we can just link the backtrack out of the last alternative straight to the
  1883                     // head of the first alternative.
  1884                     if (m_pattern.m_body->m_hasFixedSize
  1885                         && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
  1886                         && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
  1887                         m_backtrackingState.linkTo(beginOp->m_reentry, this);
  1888                     else {
  1889                         // We need to generate a trampoline of code to execute before looping back
  1890                         // around to the first alternative.
  1891                         m_backtrackingState.link(this);
  1893                         // If the pattern size is not fixed, then store the start index, for use if we match.
  1894                         if (!m_pattern.m_body->m_hasFixedSize) {
  1895                             if (alternative->m_minimumSize == 1)
  1896                                 setMatchStart(index);
  1897                             else {
  1898                                 move(index, regT0);
  1899                                 if (alternative->m_minimumSize)
  1900                                     sub32(Imm32(alternative->m_minimumSize - 1), regT0);
  1901                                 else
  1902                                     add32(TrustedImm32(1), regT0);
  1903                                 setMatchStart(regT0);
  1907                         // Generate code to loop. Check whether the last alternative is longer than the
  1908                         // first (e.g. /a|xy/ or /a|xyz/).
  1909                         if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
  1910                             // We want to loop, and increment input position. If the delta is 1, it is
  1911                             // already correctly incremented, if more than one then decrement as appropriate.
  1912                             unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
  1913                             ASSERT(delta);
  1914                             if (delta != 1)
  1915                                 sub32(Imm32(delta - 1), index);
  1916                             jump(beginOp->m_reentry);
  1917                         } else {
  1918                             // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
  1919                             // be sufficent input available to handle this, so just fall through.
  1920                             unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
  1921                             if (delta != 0xFFFFFFFFu) {
  1922                                 // We need to check input because we are incrementing the input.
  1923                                 add32(Imm32(delta + 1), index);
  1924                                 checkInput().linkTo(beginOp->m_reentry, this);
  1930                 // We can reach this point in the code in two ways:
  1931                 //  - Fallthrough from the code above (a repeating alternative backtracked out of its
  1932                 //    last alternative, and did not have sufficent input to run the first).
  1933                 //  - We will loop back up to the following label when a releating alternative loops,
  1934                 //    following a failed input check.
  1935                 //
  1936                 // Either way, we have just failed the input check for the first alternative.
  1937                 Label firstInputCheckFailed(this);
  1939                 // Generate code to handle input check failures from alternatives except the last.
  1940                 // prevOp is the alternative we're handling a bail out from (initially Begin), and
  1941                 // nextOp is the alternative we will be attempting to reenter into.
  1942                 //
  1943                 // We will link input check failures from the forwards matching path back to the code
  1944                 // that can handle them.
  1945                 YarrOp* prevOp = beginOp;
  1946                 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
  1947                 while (nextOp->m_op != OpBodyAlternativeEnd) {
  1948                     prevOp->m_jumps.link(this);
  1950                     // We only get here if an input check fails, it is only worth checking again
  1951                     // if the next alternative has a minimum size less than the last.
  1952                     if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
  1953                         // FIXME: if we added an extra label to YarrOp, we could avoid needing to
  1954                         // subtract delta back out, and reduce this code. Should performance test
  1955                         // the benefit of this.
  1956                         unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
  1957                         sub32(Imm32(delta), index);
  1958                         Jump fail = jumpIfNoAvailableInput();
  1959                         add32(Imm32(delta), index);
  1960                         jump(nextOp->m_reentry);
  1961                         fail.link(this);
  1962                     } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
  1963                         add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
  1964                     prevOp = nextOp;
  1965                     nextOp = &m_ops[nextOp->m_nextOp];
  1968                 // We fall through to here if there is insufficient input to run the last alternative.
  1970                 // If there is insufficient input to run the last alternative, then for 'once through'
  1971                 // alternatives we are done - just jump back up into the forwards matching path at the End.
  1972                 if (onceThrough) {
  1973                     op.m_jumps.linkTo(endOp.m_reentry, this);
  1974                     jump(endOp.m_reentry);
  1975                     break;
  1978                 // For repeating alternatives, link any input check failure from the last alternative to
  1979                 // this point.
  1980                 op.m_jumps.link(this);
  1982                 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
  1984                 // Check for cases where input position is already incremented by 1 for the last
  1985                 // alternative (this is particularly useful where the minimum size of the body
  1986                 // disjunction is 0, e.g. /a*|b/).
  1987                 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
  1988                     // index is already incremented by 1, so just store it now!
  1989                     setMatchStart(index);
  1990                     needsToUpdateMatchStart = false;
  1993                 // Check whether there is sufficient input to loop. Increment the input position by
  1994                 // one, and check. Also add in the minimum disjunction size before checking - there
  1995                 // is no point in looping if we're just going to fail all the input checks around
  1996                 // the next iteration.
  1997                 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
  1998                 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
  1999                     // If the last alternative had the same minimum size as the disjunction,
  2000                     // just simply increment input pos by 1, no adjustment based on minimum size.
  2001                     add32(TrustedImm32(1), index);
  2002                 } else {
  2003                     // If the minumum for the last alternative was one greater than than that
  2004                     // for the disjunction, we're already progressed by 1, nothing to do!
  2005                     unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
  2006                     if (delta)
  2007                         sub32(Imm32(delta), index);
  2009                 Jump matchFailed = jumpIfNoAvailableInput();
  2011                 if (needsToUpdateMatchStart) {
  2012                     if (!m_pattern.m_body->m_minimumSize)
  2013                         setMatchStart(index);
  2014                     else {
  2015                         move(index, regT0);
  2016                         sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
  2017                         setMatchStart(regT0);
  2021                 // Calculate how much more input the first alternative requires than the minimum
  2022                 // for the body as a whole. If no more is needed then we dont need an additional
  2023                 // input check here - jump straight back up to the start of the first alternative.
  2024                 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
  2025                     jump(beginOp->m_reentry);
  2026                 else {
  2027                     if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
  2028                         add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
  2029                     else
  2030                         sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
  2031                     checkInput().linkTo(beginOp->m_reentry, this);
  2032                     jump(firstInputCheckFailed);
  2035                 // We jump to here if we iterate to the point that there is insufficient input to
  2036                 // run any matches, and need to return a failure state from JIT code.
  2037                 matchFailed.link(this);
  2039 #if !WTF_CPU_SPARC
  2040                 removeCallFrame();
  2041 #endif
  2042 #if WTF_CPU_X86_64
  2043                 move(TrustedImm32(int(WTF::notFound)), returnRegister);
  2044 #else
  2045                 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
  2046                 move(TrustedImm32(0), returnRegister2);
  2047 #endif
  2048                 generateReturn();
  2049                 break;
  2051             case OpBodyAlternativeEnd: {
  2052                 // We should never backtrack back into a body disjunction.
  2053                 ASSERT(m_backtrackingState.isEmpty());
  2055                 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
  2056                 m_checked += priorAlternative->m_minimumSize;
  2057                 break;
  2060             // OpSimpleNestedAlternativeBegin/Next/End
  2061             // OpNestedAlternativeBegin/Next/End
  2062             //
  2063             // Generate code for when we backtrack back out of an alternative into
  2064             // a Begin or Next node, or when the entry input count check fails. If
  2065             // there are more alternatives we need to jump to the next alternative,
  2066             // if not we backtrack back out of the current set of parentheses.
  2067             //
  2068             // In the case of non-simple nested assertions we need to also link the
  2069             // 'return address' appropriately to backtrack back out into the correct
  2070             // alternative.
  2071             case OpSimpleNestedAlternativeBegin:
  2072             case OpSimpleNestedAlternativeNext:
  2073             case OpNestedAlternativeBegin:
  2074             case OpNestedAlternativeNext: {
  2075                 YarrOp& nextOp = m_ops[op.m_nextOp];
  2076                 bool isBegin = op.m_previousOp == notFound;
  2077                 bool isLastAlternative = nextOp.m_nextOp == notFound;
  2078                 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
  2079                 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
  2081                 // Treat an input check failure the same as a failed match.
  2082                 m_backtrackingState.append(op.m_jumps);
  2084                 // Set the backtracks to jump to the appropriate place. We may need
  2085                 // to link the backtracks in one of three different way depending on
  2086                 // the type of alternative we are dealing with:
  2087                 //  - A single alternative, with no simplings.
  2088                 //  - The last alternative of a set of two or more.
  2089                 //  - An alternative other than the last of a set of two or more.
  2090                 //
  2091                 // In the case of a single alternative on its own, we don't need to
  2092                 // jump anywhere - if the alternative fails to match we can just
  2093                 // continue to backtrack out of the parentheses without jumping.
  2094                 //
  2095                 // In the case of the last alternative in a set of more than one, we
  2096                 // need to jump to return back out to the beginning. We'll do so by
  2097                 // adding a jump to the End node's m_jumps list, and linking this
  2098                 // when we come to generate the Begin node. For alternatives other
  2099                 // than the last, we need to jump to the next alternative.
  2100                 //
  2101                 // If the alternative had adjusted the input position we must link
  2102                 // backtracking to here, correct, and then jump on. If not we can
  2103                 // link the backtracks directly to their destination.
  2104                 if (op.m_checkAdjust) {
  2105                     // Handle the cases where we need to link the backtracks here.
  2106                     m_backtrackingState.link(this);
  2107                     sub32(Imm32(op.m_checkAdjust), index);
  2108                     if (!isLastAlternative) {
  2109                         // An alternative that is not the last should jump to its successor.
  2110                         jump(nextOp.m_reentry);
  2111                     } else if (!isBegin) {
  2112                         // The last of more than one alternatives must jump back to the beginning.
  2113                         nextOp.m_jumps.append(jump());
  2114                     } else {
  2115                         // A single alternative on its own can fall through.
  2116                         m_backtrackingState.fallthrough();
  2118                 } else {
  2119                     // Handle the cases where we can link the backtracks directly to their destinations.
  2120                     if (!isLastAlternative) {
  2121                         // An alternative that is not the last should jump to its successor.
  2122                         m_backtrackingState.linkTo(nextOp.m_reentry, this);
  2123                     } else if (!isBegin) {
  2124                         // The last of more than one alternatives must jump back to the beginning.
  2125                         m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
  2127                     // In the case of a single alternative on its own do nothing - it can fall through.
  2130                 // If there is a backtrack jump from a zero length match link it here.
  2131                 if (op.m_zeroLengthMatch.isSet())
  2132                     m_backtrackingState.append(op.m_zeroLengthMatch);
  2134                 // At this point we've handled the backtracking back into this node.
  2135                 // Now link any backtracks that need to jump to here.
  2137                 // For non-simple alternatives, link the alternative's 'return address'
  2138                 // so that we backtrack back out into the previous alternative.
  2139                 if (op.m_op == OpNestedAlternativeNext)
  2140                     m_backtrackingState.append(op.m_returnAddress);
  2142                 // If there is more than one alternative, then the last alternative will
  2143                 // have planted a jump to be linked to the end. This jump was added to the
  2144                 // End node's m_jumps list. If we are back at the beginning, link it here.
  2145                 if (isBegin) {
  2146                     YarrOp* endOp = &m_ops[op.m_nextOp];
  2147                     while (endOp->m_nextOp != notFound) {
  2148                         ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
  2149                         endOp = &m_ops[endOp->m_nextOp];
  2151                     ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
  2152                     m_backtrackingState.append(endOp->m_jumps);
  2155                 if (!isBegin) {
  2156                     YarrOp& lastOp = m_ops[op.m_previousOp];
  2157                     m_checked += lastOp.m_checkAdjust;
  2159                 m_checked -= op.m_checkAdjust;
  2160                 break;
  2162             case OpSimpleNestedAlternativeEnd:
  2163             case OpNestedAlternativeEnd: {
  2164                 PatternTerm* term = op.m_term;
  2166                 // If there is a backtrack jump from a zero length match link it here.
  2167                 if (op.m_zeroLengthMatch.isSet())
  2168                     m_backtrackingState.append(op.m_zeroLengthMatch);
  2170                 // If we backtrack into the end of a simple subpattern do nothing;
  2171                 // just continue through into the last alternative. If we backtrack
  2172                 // into the end of a non-simple set of alterntives we need to jump
  2173                 // to the backtracking return address set up during generation.
  2174                 if (op.m_op == OpNestedAlternativeEnd) {
  2175                     m_backtrackingState.link(this);
  2177                     // Plant a jump to the return address.
  2178                     unsigned parenthesesFrameLocation = term->frameLocation;
  2179                     unsigned alternativeFrameLocation = parenthesesFrameLocation;
  2180                     if (term->quantityType != QuantifierFixedCount)
  2181                         alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
  2182                     loadFromFrameAndJump(alternativeFrameLocation);
  2184                     // Link the DataLabelPtr associated with the end of the last
  2185                     // alternative to this point.
  2186                     m_backtrackingState.append(op.m_returnAddress);
  2189                 YarrOp& lastOp = m_ops[op.m_previousOp];
  2190                 m_checked += lastOp.m_checkAdjust;
  2191                 break;
  2194             // OpParenthesesSubpatternOnceBegin/End
  2195             //
  2196             // When we are backtracking back out of a capturing subpattern we need
  2197             // to clear the start index in the matches output array, to record that
  2198             // this subpattern has not been captured.
  2199             //
  2200             // When backtracking back out of a Greedy quantified subpattern we need
  2201             // to catch this, and try running the remainder of the alternative after
  2202             // the subpattern again, skipping the parentheses.
  2203             //
  2204             // Upon backtracking back into a quantified set of parentheses we need to
  2205             // check whether we were currently skipping the subpattern. If not, we
  2206             // can backtrack into them, if we were we need to either backtrack back
  2207             // out of the start of the parentheses, or jump back to the forwards
  2208             // matching start, depending of whether the match is Greedy or NonGreedy.
  2209             case OpParenthesesSubpatternOnceBegin: {
  2210                 PatternTerm* term = op.m_term;
  2211                 ASSERT(term->quantityCount == 1);
  2213                 // We only need to backtrack to thispoint if capturing or greedy.
  2214                 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
  2215                     m_backtrackingState.link(this);
  2217                     // If capturing, clear the capture (we only need to reset start).
  2218                     if (term->capture() && compileMode == IncludeSubpatterns)
  2219                         clearSubpatternStart(term->parentheses.subpatternId);
  2221                     // If Greedy, jump to the end.
  2222                     if (term->quantityType == QuantifierGreedy) {
  2223                         // Clear the flag in the stackframe indicating we ran through the subpattern.
  2224                         unsigned parenthesesFrameLocation = term->frameLocation;
  2225                         storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
  2226                         // Jump to after the parentheses, skipping the subpattern.
  2227                         jump(m_ops[op.m_nextOp].m_reentry);
  2228                         // A backtrack from after the parentheses, when skipping the subpattern,
  2229                         // will jump back to here.
  2230                         op.m_jumps.link(this);
  2233                     m_backtrackingState.fallthrough();
  2235                 break;
  2237             case OpParenthesesSubpatternOnceEnd: {
  2238                 PatternTerm* term = op.m_term;
  2240                 if (term->quantityType != QuantifierFixedCount) {
  2241                     m_backtrackingState.link(this);
  2243                     // Check whether we should backtrack back into the parentheses, or if we
  2244                     // are currently in a state where we had skipped over the subpattern
  2245                     // (in which case the flag value on the stack will be -1).
  2246                     unsigned parenthesesFrameLocation = term->frameLocation;
  2247                     Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
  2249                     if (term->quantityType == QuantifierGreedy) {
  2250                         // For Greedy parentheses, we skip after having already tried going
  2251                         // through the subpattern, so if we get here we're done.
  2252                         YarrOp& beginOp = m_ops[op.m_previousOp];
  2253                         beginOp.m_jumps.append(hadSkipped);
  2254                     } else {
  2255                         // For NonGreedy parentheses, we try skipping the subpattern first,
  2256                         // so if we get here we need to try running through the subpattern
  2257                         // next. Jump back to the start of the parentheses in the forwards
  2258                         // matching path.
  2259                         ASSERT(term->quantityType == QuantifierNonGreedy);
  2260                         YarrOp& beginOp = m_ops[op.m_previousOp];
  2261                         hadSkipped.linkTo(beginOp.m_reentry, this);
  2264                     m_backtrackingState.fallthrough();
  2267                 m_backtrackingState.append(op.m_jumps);
  2268                 break;
  2271             // OpParenthesesSubpatternTerminalBegin/End
  2272             //
  2273             // Terminal subpatterns will always match - there is nothing after them to
  2274             // force a backtrack, and they have a minimum count of 0, and as such will
  2275             // always produce an acceptable result.
  2276             case OpParenthesesSubpatternTerminalBegin: {
  2277                 // We will backtrack to this point once the subpattern cannot match any
  2278                 // more. Since no match is accepted as a successful match (we are Greedy
  2279                 // quantified with a minimum of zero) jump back to the forwards matching
  2280                 // path at the end.
  2281                 YarrOp& endOp = m_ops[op.m_nextOp];
  2282                 m_backtrackingState.linkTo(endOp.m_reentry, this);
  2283                 break;
  2285             case OpParenthesesSubpatternTerminalEnd:
  2286                 // We should never be backtracking to here (hence the 'terminal' in the name).
  2287                 ASSERT(m_backtrackingState.isEmpty());
  2288                 m_backtrackingState.append(op.m_jumps);
  2289                 break;
  2291             // OpParentheticalAssertionBegin/End
  2292             case OpParentheticalAssertionBegin: {
  2293                 PatternTerm* term = op.m_term;
  2294                 YarrOp& endOp = m_ops[op.m_nextOp];
  2296                 // We need to handle the backtracks upon backtracking back out
  2297                 // of a parenthetical assertion if either we need to correct
  2298                 // the input index, or the assertion was inverted.
  2299                 if (op.m_checkAdjust || term->invert()) {
  2300                      m_backtrackingState.link(this);
  2302                     if (op.m_checkAdjust)
  2303                         add32(Imm32(op.m_checkAdjust), index);
  2305                     // In an inverted assertion failure to match the subpattern
  2306                     // is treated as a successful match - jump to the end of the
  2307                     // subpattern. We already have adjusted the input position
  2308                     // back to that before the assertion, which is correct.
  2309                     if (term->invert())
  2310                         jump(endOp.m_reentry);
  2312                     m_backtrackingState.fallthrough();
  2315                 // The End node's jump list will contain any backtracks into
  2316                 // the end of the assertion. Also, if inverted, we will have
  2317                 // added the failure caused by a successful match to this.
  2318                 m_backtrackingState.append(endOp.m_jumps);
  2320                 m_checked += op.m_checkAdjust;
  2321                 break;
  2323             case OpParentheticalAssertionEnd: {
  2324                 // FIXME: We should really be clearing any nested subpattern
  2325                 // matches on bailing out from after the pattern. Firefox has
  2326                 // this bug too (presumably because they use YARR!)
  2328                 // Never backtrack into an assertion; later failures bail to before the begin.
  2329                 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
  2331                 YarrOp& lastOp = m_ops[op.m_previousOp];
  2332                 m_checked -= lastOp.m_checkAdjust;
  2333                 break;
  2336             case OpMatchFailed:
  2337                 break;
  2340         } while (opIndex);
  2342         return true;
  2345     // Compilation methods:
  2346     // ====================
  2348     // opCompileParenthesesSubpattern
  2349     // Emits ops for a subpattern (set of parentheses). These consist
  2350     // of a set of alternatives wrapped in an outer set of nodes for
  2351     // the parentheses.
  2352     // Supported types of parentheses are 'Once' (quantityCount == 1)
  2353     // and 'Terminal' (non-capturing parentheses quantified as greedy
  2354     // and infinite).
  2355     // Alternatives will use the 'Simple' set of ops if either the
  2356     // subpattern is terminal (in which case we will never need to
  2357     // backtrack), or if the subpattern only contains one alternative.
  2358     void opCompileParenthesesSubpattern(PatternTerm* term)
  2360         YarrOpCode parenthesesBeginOpCode;
  2361         YarrOpCode parenthesesEndOpCode;
  2362         YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
  2363         YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
  2364         YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
  2366         // We can currently only compile quantity 1 subpatterns that are
  2367         // not copies. We generate a copy in the case of a range quantifier,
  2368         // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
  2369         // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
  2370         // comes where the subpattern is capturing, in which case we would
  2371         // need to restore the capture from the first subpattern upon a
  2372         // failure in the second.
  2373         if (term->quantityCount == 1 && !term->parentheses.isCopy) {
  2374             // Select the 'Once' nodes.
  2375             parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
  2376             parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
  2378             // If there is more than one alternative we cannot use the 'simple' nodes.
  2379             if (term->parentheses.disjunction->m_alternatives.size() != 1) {
  2380                 alternativeBeginOpCode = OpNestedAlternativeBegin;
  2381                 alternativeNextOpCode = OpNestedAlternativeNext;
  2382                 alternativeEndOpCode = OpNestedAlternativeEnd;
  2384         } else if (term->parentheses.isTerminal) {
  2385             // Terminal groups are optimized on the assumption that matching will never
  2386             // backtrack into the terminal group. But this is false if there is more
  2387             // than one alternative and one of the alternatives can match empty. In that
  2388             // case, the empty match is counted as a failure, so we would need to backtrack.
  2389             // The backtracking code doesn't handle this case correctly, so we fall back
  2390             // to the interpreter.
  2391             Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
  2392             if (alternatives.size() != 1) {
  2393                 for (unsigned i = 0; i < alternatives.size(); ++i) {
  2394                     if (alternatives[i]->m_minimumSize == 0) {
  2395                         m_shouldFallBack = true;
  2396                         return;
  2401             // Select the 'Terminal' nodes.
  2402             parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
  2403             parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
  2404         } else {
  2405             // This subpattern is not supported by the JIT.
  2406             m_shouldFallBack = true;
  2407             return;
  2410         size_t parenBegin = m_ops.size();
  2411         m_ops.append(parenthesesBeginOpCode);
  2413         m_ops.append(alternativeBeginOpCode);
  2414         m_ops.last().m_previousOp = notFound;
  2415         m_ops.last().m_term = term;
  2416         Vector<PatternAlternative*>& alternatives =  term->parentheses.disjunction->m_alternatives;
  2417         for (unsigned i = 0; i < alternatives.size(); ++i) {
  2418             size_t lastOpIndex = m_ops.size() - 1;
  2420             PatternAlternative* nestedAlternative = alternatives[i];
  2421             opCompileAlternative(nestedAlternative);
  2423             size_t thisOpIndex = m_ops.size();
  2424             m_ops.append(YarrOp(alternativeNextOpCode));
  2426             YarrOp& lastOp = m_ops[lastOpIndex];
  2427             YarrOp& thisOp = m_ops[thisOpIndex];
  2429             lastOp.m_alternative = nestedAlternative;
  2430             lastOp.m_nextOp = thisOpIndex;
  2431             thisOp.m_previousOp = lastOpIndex;
  2432             thisOp.m_term = term;
  2434         YarrOp& lastOp = m_ops.last();
  2435         ASSERT(lastOp.m_op == alternativeNextOpCode);
  2436         lastOp.m_op = alternativeEndOpCode;
  2437         lastOp.m_alternative = 0;
  2438         lastOp.m_nextOp = notFound;
  2440         size_t parenEnd = m_ops.size();
  2441         m_ops.append(parenthesesEndOpCode);
  2443         m_ops[parenBegin].m_term = term;
  2444         m_ops[parenBegin].m_previousOp = notFound;
  2445         m_ops[parenBegin].m_nextOp = parenEnd;
  2446         m_ops[parenEnd].m_term = term;
  2447         m_ops[parenEnd].m_previousOp = parenBegin;
  2448         m_ops[parenEnd].m_nextOp = notFound;
  2451     // opCompileParentheticalAssertion
  2452     // Emits ops for a parenthetical assertion. These consist of an
  2453     // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
  2454     // the alternatives, with these wrapped by an outer pair of
  2455     // OpParentheticalAssertionBegin/End nodes.
  2456     // We can always use the OpSimpleNestedAlternative nodes in the
  2457     // case of parenthetical assertions since these only ever match
  2458     // once, and will never backtrack back into the assertion.
  2459     void opCompileParentheticalAssertion(PatternTerm* term)
  2461         size_t parenBegin = m_ops.size();
  2462         m_ops.append(OpParentheticalAssertionBegin);
  2464         m_ops.append(OpSimpleNestedAlternativeBegin);
  2465         m_ops.last().m_previousOp = notFound;
  2466         m_ops.last().m_term = term;
  2467         Vector<PatternAlternative*>& alternatives =  term->parentheses.disjunction->m_alternatives;
  2468         for (unsigned i = 0; i < alternatives.size(); ++i) {
  2469             size_t lastOpIndex = m_ops.size() - 1;
  2471             PatternAlternative* nestedAlternative = alternatives[i];
  2472             opCompileAlternative(nestedAlternative);
  2474             size_t thisOpIndex = m_ops.size();
  2475             m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
  2477             YarrOp& lastOp = m_ops[lastOpIndex];
  2478             YarrOp& thisOp = m_ops[thisOpIndex];
  2480             lastOp.m_alternative = nestedAlternative;
  2481             lastOp.m_nextOp = thisOpIndex;
  2482             thisOp.m_previousOp = lastOpIndex;
  2483             thisOp.m_term = term;
  2485         YarrOp& lastOp = m_ops.last();
  2486         ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
  2487         lastOp.m_op = OpSimpleNestedAlternativeEnd;
  2488         lastOp.m_alternative = 0;
  2489         lastOp.m_nextOp = notFound;
  2491         size_t parenEnd = m_ops.size();
  2492         m_ops.append(OpParentheticalAssertionEnd);
  2494         m_ops[parenBegin].m_term = term;
  2495         m_ops[parenBegin].m_previousOp = notFound;
  2496         m_ops[parenBegin].m_nextOp = parenEnd;
  2497         m_ops[parenEnd].m_term = term;
  2498         m_ops[parenEnd].m_previousOp = parenBegin;
  2499         m_ops[parenEnd].m_nextOp = notFound;
  2502     // opCompileAlternative
  2503     // Called to emit nodes for all terms in an alternative.
  2504     void opCompileAlternative(PatternAlternative* alternative)
  2506         optimizeAlternative(alternative);
  2508         for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
  2509             PatternTerm* term = &alternative->m_terms[i];
  2511             switch (term->type) {
  2512             case PatternTerm::TypeParenthesesSubpattern:
  2513                 opCompileParenthesesSubpattern(term);
  2514                 break;
  2516             case PatternTerm::TypeParentheticalAssertion:
  2517                 opCompileParentheticalAssertion(term);
  2518                 break;
  2520             default:
  2521                 m_ops.append(term);
  2526     // opCompileBody
  2527     // This method compiles the body disjunction of the regular expression.
  2528     // The body consists of two sets of alternatives - zero or more 'once
  2529     // through' (BOL anchored) alternatives, followed by zero or more
  2530     // repeated alternatives.
  2531     // For each of these two sets of alteratives, if not empty they will be
  2532     // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
  2533     // 'begin' node referencing the first alternative, and 'next' nodes
  2534     // referencing any further alternatives. The begin/next/end nodes are
  2535     // linked together in a doubly linked list. In the case of repeating
  2536     // alternatives, the end node is also linked back to the beginning.
  2537     // If no repeating alternatives exist, then a OpMatchFailed node exists
  2538     // to return the failing result.
  2539     void opCompileBody(PatternDisjunction* disjunction)
  2541         Vector<PatternAlternative*>& alternatives =  disjunction->m_alternatives;
  2542         size_t currentAlternativeIndex = 0;
  2544         // Emit the 'once through' alternatives.
  2545         if (alternatives.size() && alternatives[0]->onceThrough()) {
  2546             m_ops.append(YarrOp(OpBodyAlternativeBegin));
  2547             m_ops.last().m_previousOp = notFound;
  2549             do {
  2550                 size_t lastOpIndex = m_ops.size() - 1;
  2551                 PatternAlternative* alternative = alternatives[currentAlternativeIndex];
  2552                 opCompileAlternative(alternative);
  2554                 size_t thisOpIndex = m_ops.size();
  2555                 m_ops.append(YarrOp(OpBodyAlternativeNext));
  2557                 YarrOp& lastOp = m_ops[lastOpIndex];
  2558                 YarrOp& thisOp = m_ops[thisOpIndex];
  2560                 lastOp.m_alternative = alternative;
  2561                 lastOp.m_nextOp = thisOpIndex;
  2562                 thisOp.m_previousOp = lastOpIndex;
  2564                 ++currentAlternativeIndex;
  2565             } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
  2567             YarrOp& lastOp = m_ops.last();
  2569             ASSERT(lastOp.m_op == OpBodyAlternativeNext);
  2570             lastOp.m_op = OpBodyAlternativeEnd;
  2571             lastOp.m_alternative = 0;
  2572             lastOp.m_nextOp = notFound;
  2575         if (currentAlternativeIndex == alternatives.size()) {
  2576             m_ops.append(YarrOp(OpMatchFailed));
  2577             return;
  2580         // Emit the repeated alternatives.
  2581         size_t repeatLoop = m_ops.size();
  2582         m_ops.append(YarrOp(OpBodyAlternativeBegin));
  2583         m_ops.last().m_previousOp = notFound;
  2584         do {
  2585             size_t lastOpIndex = m_ops.size() - 1;
  2586             PatternAlternative* alternative = alternatives[currentAlternativeIndex];
  2587             ASSERT(!alternative->onceThrough());
  2588             opCompileAlternative(alternative);
  2590             size_t thisOpIndex = m_ops.size();
  2591             m_ops.append(YarrOp(OpBodyAlternativeNext));
  2593             YarrOp& lastOp = m_ops[lastOpIndex];
  2594             YarrOp& thisOp = m_ops[thisOpIndex];
  2596             lastOp.m_alternative = alternative;
  2597             lastOp.m_nextOp = thisOpIndex;
  2598             thisOp.m_previousOp = lastOpIndex;
  2600             ++currentAlternativeIndex;
  2601         } while (currentAlternativeIndex < alternatives.size());
  2602         YarrOp& lastOp = m_ops.last();
  2603         ASSERT(lastOp.m_op == OpBodyAlternativeNext);
  2604         lastOp.m_op = OpBodyAlternativeEnd;
  2605         lastOp.m_alternative = 0;
  2606         lastOp.m_nextOp = repeatLoop;
  2609     void generateEnter()
  2611 #if WTF_CPU_X86_64
  2612         push(X86Registers::ebp);
  2613         move(stackPointerRegister, X86Registers::ebp);
  2614         push(X86Registers::ebx);
  2615         // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
  2616         zeroExtend32ToPtr(index, index);
  2617         zeroExtend32ToPtr(length, length);
  2618 #elif WTF_CPU_X86
  2619         push(X86Registers::ebp);
  2620         move(stackPointerRegister, X86Registers::ebp);
  2621         // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
  2622         push(X86Registers::ebx);
  2623         push(X86Registers::edi);
  2624         push(X86Registers::esi);
  2625         // load output into edi (2 = saved ebp + return address).
  2626 # if WTF_COMPILER_MSVC || WTF_COMPILER_SUNCC
  2627         loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
  2628         loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
  2629         loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
  2630         if (compileMode == IncludeSubpatterns)
  2631             loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
  2632 # else
  2633         if (compileMode == IncludeSubpatterns)
  2634             loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
  2635 # endif
  2636 #elif WTF_CPU_ARM
  2637         push(ARMRegisters::r4);
  2638         push(ARMRegisters::r5);
  2639         push(ARMRegisters::r6);
  2640 # if WTF_CPU_ARM_TRADITIONAL
  2641         push(ARMRegisters::r8); // scratch register
  2642 # endif
  2643         if (compileMode == IncludeSubpatterns)
  2644             move(ARMRegisters::r3, output);
  2645 #elif WTF_CPU_SH4
  2646         push(SH4Registers::r11);
  2647         push(SH4Registers::r13);
  2648 #elif WTF_CPU_SPARC
  2649         save(Imm32(-m_pattern.m_body->m_callFrameSize * sizeof(void*)));
  2650 #elif WTF_CPU_MIPS
  2651         // Do nothing.
  2652 #endif
  2655     void generateReturn()
  2657 #if WTF_CPU_X86_64
  2658         pop(X86Registers::ebx);
  2659         pop(X86Registers::ebp);
  2660 #elif WTF_CPU_X86
  2661         pop(X86Registers::esi);
  2662         pop(X86Registers::edi);
  2663         pop(X86Registers::ebx);
  2664         pop(X86Registers::ebp);
  2665 #elif WTF_CPU_ARM
  2666 # if WTF_CPU_ARM_TRADITIONAL
  2667         pop(ARMRegisters::r8); // scratch register
  2668 # endif
  2669         pop(ARMRegisters::r6);
  2670         pop(ARMRegisters::r5);
  2671         pop(ARMRegisters::r4);
  2672 #elif WTF_CPU_SH4
  2673         pop(SH4Registers::r13);
  2674         pop(SH4Registers::r11);
  2675 #elif WTF_CPU_SPARC
  2676         ret_and_restore();
  2677         return;
  2678 #elif WTF_CPU_MIPS
  2679         // Do nothing
  2680 #endif
  2681         ret();
  2684 public:
  2685     YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
  2686         : m_pattern(pattern)
  2687         , m_charSize(charSize)
  2688         , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
  2689         , m_shouldFallBack(false)
  2690         , m_checked(0)
  2694     void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
  2696         generateEnter();
  2698         Jump hasInput = checkInput();
  2699 #if WTF_CPU_X86_64
  2700         move(TrustedImm32(int(WTF::notFound)), returnRegister);
  2701 #else
  2702         move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
  2703         move(TrustedImm32(0), returnRegister2);
  2704 #endif
  2705         generateReturn();
  2706         hasInput.link(this);
  2708         if (compileMode == IncludeSubpatterns) {
  2709             for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
  2710                 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
  2713         if (!m_pattern.m_body->m_hasFixedSize)
  2714             setMatchStart(index);
  2716         initCallFrame();
  2718         // Compile the pattern to the internal 'YarrOp' representation.
  2719         opCompileBody(m_pattern.m_body);
  2721         // If we encountered anything we can't handle in the JIT code
  2722         // (e.g. backreferences) then return early.
  2723         if (m_shouldFallBack) {
  2724             jitObject.setFallBack(true);
  2725             return;
  2728         if (!generate() || !backtrack()) {
  2729             jitObject.setFallBack(true);
  2730             return;
  2733         // Link & finalize the code.
  2734         ExecutablePool *pool;
  2735         bool ok;
  2736         LinkBuffer linkBuffer(this, globalData->regexAllocator, &pool, &ok, REGEXP_CODE);
  2738         // Attempt to detect OOM during linkBuffer creation.
  2739         if (linkBuffer.unsafeCode() == nullptr) {
  2740             jitObject.setFallBack(true);
  2741             return;
  2744         m_backtrackingState.linkDataLabels(linkBuffer);
  2746         if (compileMode == MatchOnly) {
  2747 #if YARR_8BIT_CHAR_SUPPORT
  2748             if (m_charSize == Char8)
  2749                 jitObject.set8BitCodeMatchOnly(linkBuffer.finalizeCode());
  2750             else
  2751 #endif
  2752                 jitObject.set16BitCodeMatchOnly(linkBuffer.finalizeCode());
  2753         } else {
  2754 #if YARR_8BIT_CHAR_SUPPORT
  2755             if (m_charSize == Char8)
  2756                 jitObject.set8BitCode(linkBuffer.finalizeCode());
  2757             else
  2758 #endif
  2759                 jitObject.set16BitCode(linkBuffer.finalizeCode());
  2761         jitObject.setFallBack(m_shouldFallBack);
  2764 private:
  2765     YarrPattern& m_pattern;
  2767     YarrCharSize m_charSize;
  2769     Scale m_charScale;
  2771     // Used to detect regular expression constructs that are not currently
  2772     // supported in the JIT; fall back to the interpreter when this is detected.
  2773     bool m_shouldFallBack;
  2775     // The regular expression expressed as a linear sequence of operations.
  2776     Vector<YarrOp, 128> m_ops;
  2778     // This records the current input offset being applied due to the current
  2779     // set of alternatives we are nested within. E.g. when matching the
  2780     // character 'b' within the regular expression /abc/, we will know that
  2781     // the minimum size for the alternative is 3, checked upon entry to the
  2782     // alternative, and that 'b' is at offset 1 from the start, and as such
  2783     // when matching 'b' we need to apply an offset of -2 to the load.
  2784     //
  2785     // FIXME: This should go away. Rather than tracking this value throughout
  2786     // code generation, we should gather this information up front & store it
  2787     // on the YarrOp structure.
  2788     int m_checked;
  2790     // This class records state whilst generating the backtracking path of code.
  2791     BacktrackingState m_backtrackingState;
  2792 };
  2794 void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject, YarrJITCompileMode mode)
  2796     if (mode == MatchOnly)
  2797         YarrGenerator<MatchOnly>(pattern, charSize).compile(globalData, jitObject);
  2798     else
  2799         YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(globalData, jitObject);
  2802 }}
  2804 #endif

mercurial