michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * vim: set ts=8 sts=4 et sw=4 tw=99: michael@0: * michael@0: * Copyright (C) 2009 Apple Inc. All rights reserved. michael@0: * michael@0: * Redistribution and use in source and binary forms, with or without michael@0: * modification, are permitted provided that the following conditions michael@0: * are met: michael@0: * 1. Redistributions of source code must retain the above copyright michael@0: * notice, this list of conditions and the following disclaimer. michael@0: * 2. Redistributions in binary form must reproduce the above copyright michael@0: * notice, this list of conditions and the following disclaimer in the michael@0: * documentation and/or other materials provided with the distribution. michael@0: * michael@0: * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY michael@0: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE michael@0: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR michael@0: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR michael@0: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, michael@0: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, michael@0: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR michael@0: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY michael@0: * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: */ michael@0: michael@0: #include "yarr/YarrJIT.h" michael@0: michael@0: #include "assembler/assembler/LinkBuffer.h" michael@0: #include "yarr/Yarr.h" michael@0: #include "yarr/YarrCanonicalizeUCS2.h" michael@0: michael@0: #if ENABLE_YARR_JIT michael@0: michael@0: using namespace WTF; michael@0: michael@0: namespace JSC { namespace Yarr { michael@0: michael@0: template michael@0: class YarrGenerator : private MacroAssembler { michael@0: friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); michael@0: michael@0: #if WTF_CPU_ARM michael@0: static const RegisterID input = ARMRegisters::r0; michael@0: static const RegisterID index = ARMRegisters::r1; michael@0: static const RegisterID length = ARMRegisters::r2; michael@0: static const RegisterID output = ARMRegisters::r4; michael@0: michael@0: static const RegisterID regT0 = ARMRegisters::r5; michael@0: static const RegisterID regT1 = ARMRegisters::r6; michael@0: michael@0: static const RegisterID returnRegister = ARMRegisters::r0; michael@0: static const RegisterID returnRegister2 = ARMRegisters::r1; michael@0: #elif WTF_CPU_MIPS michael@0: static const RegisterID input = MIPSRegisters::a0; michael@0: static const RegisterID index = MIPSRegisters::a1; michael@0: static const RegisterID length = MIPSRegisters::a2; michael@0: static const RegisterID output = MIPSRegisters::a3; michael@0: michael@0: static const RegisterID regT0 = MIPSRegisters::t4; michael@0: static const RegisterID regT1 = MIPSRegisters::t5; michael@0: michael@0: static const RegisterID returnRegister = MIPSRegisters::v0; michael@0: static const RegisterID returnRegister2 = MIPSRegisters::v1; michael@0: #elif WTF_CPU_SH4 michael@0: static const RegisterID input = SH4Registers::r4; michael@0: static const RegisterID index = SH4Registers::r5; michael@0: static const RegisterID length = SH4Registers::r6; michael@0: static const RegisterID output = SH4Registers::r7; michael@0: michael@0: static const RegisterID regT0 = SH4Registers::r0; michael@0: static const RegisterID regT1 = SH4Registers::r1; michael@0: michael@0: static const RegisterID returnRegister = SH4Registers::r0; michael@0: static const RegisterID returnRegister2 = SH4Registers::r1; michael@0: #elif WTF_CPU_SPARC michael@0: static const RegisterID input = SparcRegisters::i0; michael@0: static const RegisterID index = SparcRegisters::i1; michael@0: static const RegisterID length = SparcRegisters::i2; michael@0: static const RegisterID output = SparcRegisters::i3; michael@0: michael@0: static const RegisterID regT0 = SparcRegisters::i4; michael@0: static const RegisterID regT1 = SparcRegisters::i5; michael@0: michael@0: static const RegisterID returnRegister = SparcRegisters::i0; michael@0: #elif WTF_CPU_X86 michael@0: static const RegisterID input = X86Registers::eax; michael@0: static const RegisterID index = X86Registers::edx; michael@0: static const RegisterID length = X86Registers::ecx; michael@0: static const RegisterID output = X86Registers::edi; michael@0: michael@0: static const RegisterID regT0 = X86Registers::ebx; michael@0: static const RegisterID regT1 = X86Registers::esi; michael@0: michael@0: static const RegisterID returnRegister = X86Registers::eax; michael@0: static const RegisterID returnRegister2 = X86Registers::edx; michael@0: #elif WTF_CPU_X86_64 michael@0: # if WTF_PLATFORM_WIN michael@0: static const RegisterID input = X86Registers::ecx; michael@0: static const RegisterID index = X86Registers::edx; michael@0: static const RegisterID length = X86Registers::r8; michael@0: static const RegisterID output = X86Registers::r9; michael@0: # else michael@0: static const RegisterID input = X86Registers::edi; michael@0: static const RegisterID index = X86Registers::esi; michael@0: static const RegisterID length = X86Registers::edx; michael@0: static const RegisterID output = X86Registers::ecx; michael@0: # endif michael@0: michael@0: static const RegisterID regT0 = X86Registers::eax; michael@0: static const RegisterID regT1 = X86Registers::ebx; michael@0: michael@0: static const RegisterID returnRegister = X86Registers::eax; michael@0: michael@0: # if !WTF_PLATFORM_WIN michael@0: // no way to use int128_t as return value on Win64 ABI michael@0: static const RegisterID returnRegister2 = X86Registers::edx; michael@0: # endif michael@0: #endif michael@0: michael@0: void optimizeAlternative(PatternAlternative* alternative) michael@0: { michael@0: if (!alternative->m_terms.size()) michael@0: return; michael@0: michael@0: for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { michael@0: PatternTerm& term = alternative->m_terms[i]; michael@0: PatternTerm& nextTerm = alternative->m_terms[i + 1]; michael@0: michael@0: if ((term.type == PatternTerm::TypeCharacterClass) michael@0: && (term.quantityType == QuantifierFixedCount) michael@0: && (nextTerm.type == PatternTerm::TypePatternCharacter) michael@0: && (nextTerm.quantityType == QuantifierFixedCount)) { michael@0: PatternTerm termCopy = term; michael@0: alternative->m_terms[i] = nextTerm; michael@0: alternative->m_terms[i + 1] = termCopy; michael@0: } michael@0: } michael@0: } michael@0: michael@0: void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) michael@0: { michael@0: do { michael@0: // pick which range we're going to generate michael@0: int which = count >> 1; michael@0: char lo = ranges[which].begin; michael@0: char hi = ranges[which].end; michael@0: michael@0: // check if there are any ranges or matches below lo. If not, just jl to failure - michael@0: // if there is anything else to check, check that first, if it falls through jmp to failure. michael@0: if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { michael@0: Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); michael@0: michael@0: // generate code for all ranges before this one michael@0: if (which) michael@0: matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); michael@0: michael@0: while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { michael@0: matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); michael@0: ++*matchIndex; michael@0: } michael@0: failures.append(jump()); michael@0: michael@0: loOrAbove.link(this); michael@0: } else if (which) { michael@0: Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); michael@0: michael@0: matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); michael@0: failures.append(jump()); michael@0: michael@0: loOrAbove.link(this); michael@0: } else michael@0: failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); michael@0: michael@0: while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) michael@0: ++*matchIndex; michael@0: michael@0: matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); michael@0: // fall through to here, the value is above hi. michael@0: michael@0: // shuffle along & loop around if there are any more matches to handle. michael@0: unsigned next = which + 1; michael@0: ranges += next; michael@0: count -= next; michael@0: } while (count); michael@0: } michael@0: michael@0: void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) michael@0: { michael@0: if (charClass->m_table) { michael@0: ExtendedAddress tableEntry(character, reinterpret_cast(charClass->m_table)); michael@0: matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry)); michael@0: return; michael@0: } michael@0: Jump unicodeFail; michael@0: if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { michael@0: Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); michael@0: michael@0: if (charClass->m_matchesUnicode.size()) { michael@0: for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { michael@0: UChar ch = charClass->m_matchesUnicode[i]; michael@0: matchDest.append(branch32(Equal, character, Imm32(ch))); michael@0: } michael@0: } michael@0: michael@0: if (charClass->m_rangesUnicode.size()) { michael@0: for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { michael@0: UChar lo = charClass->m_rangesUnicode[i].begin; michael@0: UChar hi = charClass->m_rangesUnicode[i].end; michael@0: michael@0: Jump below = branch32(LessThan, character, Imm32(lo)); michael@0: matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); michael@0: below.link(this); michael@0: } michael@0: } michael@0: michael@0: unicodeFail = jump(); michael@0: isAscii.link(this); michael@0: } michael@0: michael@0: if (charClass->m_ranges.size()) { michael@0: unsigned matchIndex = 0; michael@0: JumpList failures; michael@0: matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); michael@0: while (matchIndex < charClass->m_matches.size()) michael@0: matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); michael@0: michael@0: failures.link(this); michael@0: } else if (charClass->m_matches.size()) { michael@0: // optimization: gather 'a','A' etc back together, can mask & test once. michael@0: Vector matchesAZaz; michael@0: michael@0: for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { michael@0: char ch = charClass->m_matches[i]; michael@0: if (m_pattern.m_ignoreCase) { michael@0: if (isASCIILower(ch)) { michael@0: matchesAZaz.append(ch); michael@0: continue; michael@0: } michael@0: if (isASCIIUpper(ch)) michael@0: continue; michael@0: } michael@0: matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); michael@0: } michael@0: michael@0: if (unsigned countAZaz = matchesAZaz.size()) { michael@0: or32(TrustedImm32(32), character); michael@0: for (unsigned i = 0; i < countAZaz; ++i) michael@0: matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); michael@0: } michael@0: } michael@0: michael@0: if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) michael@0: unicodeFail.link(this); michael@0: } michael@0: michael@0: // Jumps if input not available; will have (incorrectly) incremented already! michael@0: Jump jumpIfNoAvailableInput(unsigned countToCheck = 0) michael@0: { michael@0: if (countToCheck) michael@0: add32(Imm32(countToCheck), index); michael@0: return branch32(Above, index, length); michael@0: } michael@0: michael@0: Jump jumpIfAvailableInput(unsigned countToCheck) michael@0: { michael@0: add32(Imm32(countToCheck), index); michael@0: return branch32(BelowOrEqual, index, length); michael@0: } michael@0: michael@0: Jump checkInput() michael@0: { michael@0: return branch32(BelowOrEqual, index, length); michael@0: } michael@0: michael@0: Jump atEndOfInput() michael@0: { michael@0: return branch32(Equal, index, length); michael@0: } michael@0: michael@0: Jump notAtEndOfInput() michael@0: { michael@0: return branch32(NotEqual, index, length); michael@0: } michael@0: michael@0: Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character) michael@0: { michael@0: readCharacter(inputPosition, character); michael@0: michael@0: // For case-insesitive compares, non-ascii characters that have different michael@0: // upper & lower case representations are converted to a character class. michael@0: ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); michael@0: if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { michael@0: or32(TrustedImm32(0x20), character); michael@0: ch |= 0x20; michael@0: } michael@0: michael@0: return branch32(NotEqual, character, Imm32(ch)); michael@0: } michael@0: michael@0: void readCharacter(int inputPosition, RegisterID reg) michael@0: { michael@0: if (m_charSize == Char8) michael@0: load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg); michael@0: else michael@0: load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); michael@0: } michael@0: michael@0: void storeToFrame(RegisterID reg, unsigned frameLocation) michael@0: { michael@0: poke(reg, frameLocation); michael@0: } michael@0: michael@0: void storeToFrame(TrustedImm32 imm, unsigned frameLocation) michael@0: { michael@0: poke(imm, frameLocation); michael@0: } michael@0: michael@0: DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) michael@0: { michael@0: return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); michael@0: } michael@0: michael@0: void loadFromFrame(unsigned frameLocation, RegisterID reg) michael@0: { michael@0: peek(reg, frameLocation); michael@0: } michael@0: michael@0: void loadFromFrameAndJump(unsigned frameLocation) michael@0: { michael@0: jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); michael@0: } michael@0: michael@0: void initCallFrame() michael@0: { michael@0: unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; michael@0: if (callFrameSize) michael@0: subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); michael@0: } michael@0: void removeCallFrame() michael@0: { michael@0: unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; michael@0: if (callFrameSize) michael@0: addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); michael@0: } michael@0: michael@0: // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns. michael@0: void setSubpatternStart(RegisterID reg, unsigned subpattern) michael@0: { michael@0: ASSERT(subpattern); michael@0: // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( michael@0: store32(reg, Address(output, (subpattern << 1) * sizeof(int))); michael@0: } michael@0: void setSubpatternEnd(RegisterID reg, unsigned subpattern) michael@0: { michael@0: ASSERT(subpattern); michael@0: // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( michael@0: store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int))); michael@0: } michael@0: void clearSubpatternStart(unsigned subpattern) michael@0: { michael@0: ASSERT(subpattern); michael@0: // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( michael@0: store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int))); michael@0: } michael@0: michael@0: // We use one of three different strategies to track the start of the current match, michael@0: // while matching. michael@0: // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily michael@0: // at the end of matching. This is irrespective of compileMode, and in this case michael@0: // these methods should never be called. michael@0: // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output michael@0: // vector, store the match start in the output vector. michael@0: // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly michael@0: // in this register. michael@0: void setMatchStart(RegisterID reg) michael@0: { michael@0: ASSERT(!m_pattern.m_body->m_hasFixedSize); michael@0: if (compileMode == IncludeSubpatterns) michael@0: store32(reg, output); michael@0: else michael@0: move(reg, output); michael@0: } michael@0: void getMatchStart(RegisterID reg) michael@0: { michael@0: ASSERT(!m_pattern.m_body->m_hasFixedSize); michael@0: if (compileMode == IncludeSubpatterns) michael@0: load32(output, reg); michael@0: else michael@0: move(output, reg); michael@0: } michael@0: michael@0: enum YarrOpCode { michael@0: // These nodes wrap body alternatives - those in the main disjunction, michael@0: // rather than subpatterns or assertions. These are chained together in michael@0: // a doubly linked list, with a 'begin' node for the first alternative, michael@0: // a 'next' node for each subsequent alternative, and an 'end' node at michael@0: // the end. In the case of repeating alternatives, the 'end' node also michael@0: // has a reference back to 'begin'. michael@0: OpBodyAlternativeBegin, michael@0: OpBodyAlternativeNext, michael@0: OpBodyAlternativeEnd, michael@0: // Similar to the body alternatives, but used for subpatterns with two michael@0: // or more alternatives. michael@0: OpNestedAlternativeBegin, michael@0: OpNestedAlternativeNext, michael@0: OpNestedAlternativeEnd, michael@0: // Used for alternatives in subpatterns where there is only a single michael@0: // alternative (backtrackingis easier in these cases), or for alternatives michael@0: // which never need to be backtracked (those in parenthetical assertions, michael@0: // terminal subpatterns). michael@0: OpSimpleNestedAlternativeBegin, michael@0: OpSimpleNestedAlternativeNext, michael@0: OpSimpleNestedAlternativeEnd, michael@0: // Used to wrap 'Once' subpattern matches (quantityCount == 1). michael@0: OpParenthesesSubpatternOnceBegin, michael@0: OpParenthesesSubpatternOnceEnd, michael@0: // Used to wrap 'Terminal' subpattern matches (at the end of the regexp). michael@0: OpParenthesesSubpatternTerminalBegin, michael@0: OpParenthesesSubpatternTerminalEnd, michael@0: // Used to wrap parenthetical assertions. michael@0: OpParentheticalAssertionBegin, michael@0: OpParentheticalAssertionEnd, michael@0: // Wraps all simple terms (pattern characters, character classes). michael@0: OpTerm, michael@0: // Where an expression contains only 'once through' body alternatives michael@0: // and no repeating ones, this op is used to return match failure. michael@0: OpMatchFailed michael@0: }; michael@0: michael@0: // This structure is used to hold the compiled opcode information, michael@0: // including reference back to the original PatternTerm/PatternAlternatives, michael@0: // and JIT compilation data structures. michael@0: struct YarrOp { michael@0: explicit YarrOp(PatternTerm* term) michael@0: : m_op(OpTerm) michael@0: , m_term(term) michael@0: , m_isDeadCode(false) michael@0: { michael@0: } michael@0: michael@0: explicit YarrOp(YarrOpCode op) michael@0: : m_op(op) michael@0: , m_isDeadCode(false) michael@0: { michael@0: } michael@0: michael@0: // The operation, as a YarrOpCode, and also a reference to the PatternTerm. michael@0: YarrOpCode m_op; michael@0: PatternTerm* m_term; michael@0: michael@0: // For alternatives, this holds the PatternAlternative and doubly linked michael@0: // references to this alternative's siblings. In the case of the michael@0: // OpBodyAlternativeEnd node at the end of a section of repeating nodes, michael@0: // m_nextOp will reference the OpBodyAlternativeBegin node of the first michael@0: // repeating alternative. michael@0: PatternAlternative* m_alternative; michael@0: size_t m_previousOp; michael@0: size_t m_nextOp; michael@0: michael@0: // Used to record a set of Jumps out of the generated code, typically michael@0: // used for jumps out to backtracking code, and a single reentry back michael@0: // into the code for a node (likely where a backtrack will trigger michael@0: // rematching). michael@0: Label m_reentry; michael@0: JumpList m_jumps; michael@0: michael@0: // Used for backtracking when the prior alternative did not consume any michael@0: // characters but matched. michael@0: Jump m_zeroLengthMatch; michael@0: michael@0: // This flag is used to null out the second pattern character, when michael@0: // two are fused to match a pair together. michael@0: bool m_isDeadCode; michael@0: michael@0: // Currently used in the case of some of the more complex management of michael@0: // 'm_checked', to cache the offset used in this alternative, to avoid michael@0: // recalculating it. michael@0: int m_checkAdjust; michael@0: michael@0: // Used by OpNestedAlternativeNext/End to hold the pointer to the michael@0: // value that will be pushed into the pattern's frame to return to, michael@0: // upon backtracking back into the disjunction. michael@0: DataLabelPtr m_returnAddress; michael@0: }; michael@0: michael@0: // BacktrackingState michael@0: // This class encapsulates information about the state of code generation michael@0: // whilst generating the code for backtracking, when a term fails to match. michael@0: // Upon entry to code generation of the backtracking code for a given node, michael@0: // the Backtracking state will hold references to all control flow sources michael@0: // that are outputs in need of further backtracking from the prior node michael@0: // generated (which is the subsequent operation in the regular expression, michael@0: // and in the m_ops Vector, since we generated backtracking backwards). michael@0: // These references to control flow take the form of: michael@0: // - A jump list of jumps, to be linked to code that will backtrack them michael@0: // further. michael@0: // - A set of DataLabelPtr values, to be populated with values to be michael@0: // treated effectively as return addresses backtracking into complex michael@0: // subpatterns. michael@0: // - A flag indicating that the current sequence of generated code up to michael@0: // this point requires backtracking. michael@0: class BacktrackingState { michael@0: public: michael@0: BacktrackingState() michael@0: : m_pendingFallthrough(false) michael@0: { michael@0: } michael@0: michael@0: // Add a jump or jumps, a return address, or set the flag indicating michael@0: // that the current 'fallthrough' control flow requires backtracking. michael@0: void append(const Jump& jump) michael@0: { michael@0: m_laterFailures.append(jump); michael@0: } michael@0: void append(JumpList& jumpList) michael@0: { michael@0: m_laterFailures.append(jumpList); michael@0: } michael@0: void append(const DataLabelPtr& returnAddress) michael@0: { michael@0: m_pendingReturns.append(returnAddress); michael@0: } michael@0: void fallthrough() michael@0: { michael@0: ASSERT(!m_pendingFallthrough); michael@0: m_pendingFallthrough = true; michael@0: } michael@0: michael@0: // These methods clear the backtracking state, either linking to the michael@0: // current location, a provided label, or copying the backtracking out michael@0: // to a JumpList. All actions may require code generation to take place, michael@0: // and as such are passed a pointer to the assembler. michael@0: void link(MacroAssembler* assembler) michael@0: { michael@0: if (m_pendingReturns.size()) { michael@0: Label here(assembler); michael@0: for (unsigned i = 0; i < m_pendingReturns.size(); ++i) michael@0: m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); michael@0: m_pendingReturns.clear(); michael@0: } michael@0: m_laterFailures.link(assembler); michael@0: m_laterFailures.clear(); michael@0: m_pendingFallthrough = false; michael@0: } michael@0: void linkTo(Label label, MacroAssembler* assembler) michael@0: { michael@0: if (m_pendingReturns.size()) { michael@0: for (unsigned i = 0; i < m_pendingReturns.size(); ++i) michael@0: m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label)); michael@0: m_pendingReturns.clear(); michael@0: } michael@0: if (m_pendingFallthrough) michael@0: assembler->jump(label); michael@0: m_laterFailures.linkTo(label, assembler); michael@0: m_laterFailures.clear(); michael@0: m_pendingFallthrough = false; michael@0: } michael@0: void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler) michael@0: { michael@0: if (m_pendingReturns.size()) { michael@0: Label here(assembler); michael@0: for (unsigned i = 0; i < m_pendingReturns.size(); ++i) michael@0: m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); michael@0: m_pendingReturns.clear(); michael@0: m_pendingFallthrough = true; michael@0: } michael@0: if (m_pendingFallthrough) michael@0: jumpList.append(assembler->jump()); michael@0: jumpList.append(m_laterFailures); michael@0: m_laterFailures.clear(); michael@0: m_pendingFallthrough = false; michael@0: } michael@0: michael@0: bool isEmpty() michael@0: { michael@0: return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough; michael@0: } michael@0: michael@0: // Called at the end of code generation to link all return addresses. michael@0: void linkDataLabels(LinkBuffer& linkBuffer) michael@0: { michael@0: ASSERT(isEmpty()); michael@0: for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) michael@0: linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation)); michael@0: } michael@0: michael@0: private: michael@0: struct ReturnAddressRecord { michael@0: ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation) michael@0: : m_dataLabel(dataLabel) michael@0: , m_backtrackLocation(backtrackLocation) michael@0: { michael@0: } michael@0: michael@0: DataLabelPtr m_dataLabel; michael@0: Label m_backtrackLocation; michael@0: }; michael@0: michael@0: JumpList m_laterFailures; michael@0: bool m_pendingFallthrough; michael@0: Vector m_pendingReturns; michael@0: Vector m_backtrackRecords; michael@0: }; michael@0: michael@0: // Generation methods: michael@0: // =================== michael@0: michael@0: // This method provides a default implementation of backtracking common michael@0: // to many terms; terms commonly jump out of the forwards matching path michael@0: // on any failed conditions, and add these jumps to the m_jumps list. If michael@0: // no special handling is required we can often just backtrack to m_jumps. michael@0: bool backtrackTermDefault(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: m_backtrackingState.append(op.m_jumps); michael@0: return true; michael@0: } michael@0: michael@0: bool generateAssertionBOL(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: if (m_pattern.m_multiline) { michael@0: const RegisterID character = regT0; michael@0: michael@0: JumpList matchDest; michael@0: if (!term->inputPosition) michael@0: matchDest.append(branch32(Equal, index, Imm32(m_checked))); michael@0: michael@0: readCharacter((term->inputPosition - m_checked) - 1, character); michael@0: matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); michael@0: op.m_jumps.append(jump()); michael@0: michael@0: matchDest.link(this); michael@0: } else { michael@0: // Erk, really should poison out these alternatives early. :-/ michael@0: if (term->inputPosition) michael@0: op.m_jumps.append(jump()); michael@0: else michael@0: op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked))); michael@0: } michael@0: return true; michael@0: } michael@0: bool backtrackAssertionBOL(size_t opIndex) michael@0: { michael@0: return backtrackTermDefault(opIndex); michael@0: } michael@0: michael@0: bool generateAssertionEOL(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: if (m_pattern.m_multiline) { michael@0: const RegisterID character = regT0; michael@0: michael@0: JumpList matchDest; michael@0: if (term->inputPosition == m_checked) michael@0: matchDest.append(atEndOfInput()); michael@0: michael@0: readCharacter(term->inputPosition - m_checked, character); michael@0: matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); michael@0: op.m_jumps.append(jump()); michael@0: michael@0: matchDest.link(this); michael@0: } else { michael@0: if (term->inputPosition == m_checked) michael@0: op.m_jumps.append(notAtEndOfInput()); michael@0: // Erk, really should poison out these alternatives early. :-/ michael@0: else michael@0: op.m_jumps.append(jump()); michael@0: } michael@0: return true; michael@0: } michael@0: bool backtrackAssertionEOL(size_t opIndex) michael@0: { michael@0: return backtrackTermDefault(opIndex); michael@0: } michael@0: michael@0: // Also falls though on nextIsNotWordChar. michael@0: void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID character = regT0; michael@0: michael@0: if (term->inputPosition == m_checked) michael@0: nextIsNotWordChar.append(atEndOfInput()); michael@0: michael@0: readCharacter((term->inputPosition - m_checked), character); michael@0: matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); michael@0: } michael@0: michael@0: bool generateAssertionWordBoundary(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID character = regT0; michael@0: michael@0: Jump atBegin; michael@0: JumpList matchDest; michael@0: if (!term->inputPosition) michael@0: atBegin = branch32(Equal, index, Imm32(m_checked)); michael@0: readCharacter((term->inputPosition - m_checked) - 1, character); michael@0: matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); michael@0: if (!term->inputPosition) michael@0: atBegin.link(this); michael@0: michael@0: // We fall through to here if the last character was not a wordchar. michael@0: JumpList nonWordCharThenWordChar; michael@0: JumpList nonWordCharThenNonWordChar; michael@0: if (term->invert()) { michael@0: matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar); michael@0: nonWordCharThenWordChar.append(jump()); michael@0: } else { michael@0: matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar); michael@0: nonWordCharThenNonWordChar.append(jump()); michael@0: } michael@0: op.m_jumps.append(nonWordCharThenNonWordChar); michael@0: michael@0: // We jump here if the last character was a wordchar. michael@0: matchDest.link(this); michael@0: JumpList wordCharThenWordChar; michael@0: JumpList wordCharThenNonWordChar; michael@0: if (term->invert()) { michael@0: matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar); michael@0: wordCharThenWordChar.append(jump()); michael@0: } else { michael@0: matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar); michael@0: // This can fall-though! michael@0: } michael@0: michael@0: op.m_jumps.append(wordCharThenWordChar); michael@0: michael@0: nonWordCharThenWordChar.link(this); michael@0: wordCharThenNonWordChar.link(this); michael@0: return true; michael@0: } michael@0: bool backtrackAssertionWordBoundary(size_t opIndex) michael@0: { michael@0: return backtrackTermDefault(opIndex); michael@0: } michael@0: michael@0: bool generatePatternCharacterOnce(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: michael@0: if (op.m_isDeadCode) michael@0: return true; michael@0: michael@0: // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed michael@0: // node, so there must always be at least one more node. michael@0: ASSERT(opIndex + 1 < m_ops.size()); michael@0: YarrOp* nextOp = &m_ops[opIndex + 1]; michael@0: michael@0: PatternTerm* term = op.m_term; michael@0: UChar ch = term->patternCharacter; michael@0: michael@0: if ((ch > 0xff) && (m_charSize == Char8)) { michael@0: // Have a 16 bit pattern character and an 8 bit string - short circuit michael@0: op.m_jumps.append(jump()); michael@0: return true; michael@0: } michael@0: michael@0: const RegisterID character = regT0; michael@0: int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; michael@0: unsigned ignoreCaseMask = 0; michael@0: #if CPU(BIG_ENDIAN) michael@0: int allCharacters = ch << (m_charSize == Char8 ? 24 : 16); michael@0: #else michael@0: int allCharacters = ch; michael@0: #endif michael@0: int numberCharacters; michael@0: int startTermPosition = term->inputPosition; michael@0: michael@0: // For case-insesitive compares, non-ascii characters that have different michael@0: // upper & lower case representations are converted to a character class. michael@0: ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); michael@0: michael@0: if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) michael@0: #if CPU(BIG_ENDIAN) michael@0: ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16); michael@0: #else michael@0: ignoreCaseMask |= 32; michael@0: #endif michael@0: michael@0: for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) { michael@0: PatternTerm* nextTerm = nextOp->m_term; michael@0: michael@0: if (nextTerm->type != PatternTerm::TypePatternCharacter michael@0: || nextTerm->quantityType != QuantifierFixedCount michael@0: || nextTerm->quantityCount != 1 michael@0: || nextTerm->inputPosition != (startTermPosition + numberCharacters)) michael@0: break; michael@0: michael@0: nextOp->m_isDeadCode = true; michael@0: michael@0: #if CPU(BIG_ENDIAN) michael@0: int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters); michael@0: #else michael@0: int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters; michael@0: #endif michael@0: michael@0: UChar currentCharacter = nextTerm->patternCharacter; michael@0: michael@0: if ((currentCharacter > 0xff) && (m_charSize == Char8)) { michael@0: // Have a 16 bit pattern character and an 8 bit string - short circuit michael@0: op.m_jumps.append(jump()); michael@0: return true; michael@0: } michael@0: michael@0: // For case-insesitive compares, non-ascii characters that have different michael@0: // upper & lower case representations are converted to a character class. michael@0: ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter)); michael@0: michael@0: allCharacters |= (currentCharacter << shiftAmount); michael@0: michael@0: if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter))) michael@0: ignoreCaseMask |= 32 << shiftAmount; michael@0: } michael@0: michael@0: if (m_charSize == Char8) { michael@0: switch (numberCharacters) { michael@0: case 1: michael@0: op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character)); michael@0: return true; michael@0: case 2: { michael@0: BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); michael@0: load16Unaligned(address, character); michael@0: break; michael@0: } michael@0: case 3: { michael@0: BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); michael@0: load16Unaligned(highAddress, character); michael@0: if (ignoreCaseMask) michael@0: or32(Imm32(ignoreCaseMask), character); michael@0: op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask))); michael@0: op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character)); michael@0: return true; michael@0: } michael@0: case 4: { michael@0: BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); michael@0: load32WithUnalignedHalfWords(address, character); michael@0: break; michael@0: } michael@0: } michael@0: } else { michael@0: switch (numberCharacters) { michael@0: case 1: michael@0: op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); michael@0: return true; michael@0: case 2: michael@0: BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar)); michael@0: load32WithUnalignedHalfWords(address, character); michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (ignoreCaseMask) michael@0: or32(Imm32(ignoreCaseMask), character); michael@0: op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask))); michael@0: return true; michael@0: } michael@0: bool backtrackPatternCharacterOnce(size_t opIndex) michael@0: { michael@0: return backtrackTermDefault(opIndex); michael@0: } michael@0: michael@0: bool generatePatternCharacterFixed(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: UChar ch = term->patternCharacter; michael@0: michael@0: const RegisterID character = regT0; michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: move(index, countRegister); michael@0: if (term->quantityCount.hasOverflowed()) michael@0: return false; michael@0: sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); michael@0: michael@0: Label loop(this); michael@0: int offset; michael@0: if ((Checked(term->inputPosition - m_checked + Checked(term->quantityCount)) * static_cast(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).safeGet(offset)) michael@0: return false; michael@0: BaseIndex address(input, countRegister, m_charScale, offset); michael@0: michael@0: if (m_charSize == Char8) michael@0: load8(address, character); michael@0: else michael@0: load16(address, character); michael@0: michael@0: // For case-insesitive compares, non-ascii characters that have different michael@0: // upper & lower case representations are converted to a character class. michael@0: ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); michael@0: if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { michael@0: or32(TrustedImm32(0x20), character); michael@0: ch |= 0x20; michael@0: } michael@0: michael@0: op.m_jumps.append(branch32(NotEqual, character, Imm32(ch))); michael@0: add32(TrustedImm32(1), countRegister); michael@0: branch32(NotEqual, countRegister, index).linkTo(loop, this); michael@0: michael@0: return true; michael@0: } michael@0: bool backtrackPatternCharacterFixed(size_t opIndex) michael@0: { michael@0: return backtrackTermDefault(opIndex); michael@0: } michael@0: michael@0: bool generatePatternCharacterGreedy(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: UChar ch = term->patternCharacter; michael@0: michael@0: const RegisterID character = regT0; michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: move(TrustedImm32(0), countRegister); michael@0: michael@0: // Unless have a 16 bit pattern character and an 8 bit string - short circuit michael@0: if (!((ch > 0xff) && (m_charSize == Char8))) { michael@0: JumpList failures; michael@0: Label loop(this); michael@0: failures.append(atEndOfInput()); michael@0: failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); michael@0: michael@0: add32(TrustedImm32(1), countRegister); michael@0: add32(TrustedImm32(1), index); michael@0: if (term->quantityCount == quantifyInfinite) { michael@0: jump(loop); michael@0: } else { michael@0: if (term->quantityCount.hasOverflowed()) michael@0: return false; michael@0: branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); michael@0: } michael@0: michael@0: failures.link(this); michael@0: } michael@0: op.m_reentry = label(); michael@0: michael@0: storeToFrame(countRegister, term->frameLocation); michael@0: return true; michael@0: } michael@0: bool backtrackPatternCharacterGreedy(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: m_backtrackingState.link(this); michael@0: michael@0: loadFromFrame(term->frameLocation, countRegister); michael@0: m_backtrackingState.append(branchTest32(Zero, countRegister)); michael@0: sub32(TrustedImm32(1), countRegister); michael@0: sub32(TrustedImm32(1), index); michael@0: jump(op.m_reentry); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool generatePatternCharacterNonGreedy(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: move(TrustedImm32(0), countRegister); michael@0: op.m_reentry = label(); michael@0: storeToFrame(countRegister, term->frameLocation); michael@0: return true; michael@0: } michael@0: bool backtrackPatternCharacterNonGreedy(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: UChar ch = term->patternCharacter; michael@0: michael@0: const RegisterID character = regT0; michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: m_backtrackingState.link(this); michael@0: michael@0: loadFromFrame(term->frameLocation, countRegister); michael@0: michael@0: // Unless have a 16 bit pattern character and an 8 bit string - short circuit michael@0: if (!((ch > 0xff) && (m_charSize == Char8))) { michael@0: JumpList nonGreedyFailures; michael@0: nonGreedyFailures.append(atEndOfInput()); michael@0: if (term->quantityCount != quantifyInfinite) { michael@0: if (term->quantityCount.hasOverflowed()) michael@0: return false; michael@0: nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); michael@0: } michael@0: nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); michael@0: michael@0: add32(TrustedImm32(1), countRegister); michael@0: add32(TrustedImm32(1), index); michael@0: michael@0: jump(op.m_reentry); michael@0: nonGreedyFailures.link(this); michael@0: } michael@0: michael@0: sub32(countRegister, index); michael@0: m_backtrackingState.fallthrough(); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool generateCharacterClassOnce(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID character = regT0; michael@0: michael@0: JumpList matchDest; michael@0: readCharacter(term->inputPosition - m_checked, character); michael@0: matchCharacterClass(character, matchDest, term->characterClass); michael@0: michael@0: if (term->invert()) michael@0: op.m_jumps.append(matchDest); michael@0: else { michael@0: op.m_jumps.append(jump()); michael@0: matchDest.link(this); michael@0: } michael@0: return true; michael@0: } michael@0: bool backtrackCharacterClassOnce(size_t opIndex) michael@0: { michael@0: return backtrackTermDefault(opIndex); michael@0: } michael@0: michael@0: bool generateCharacterClassFixed(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID character = regT0; michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: move(index, countRegister); michael@0: if (term->quantityCount.hasOverflowed()) michael@0: return false; michael@0: sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); michael@0: michael@0: Label loop(this); michael@0: JumpList matchDest; michael@0: michael@0: int offset; michael@0: Checked checkedOffset(term->inputPosition - m_checked + Checked(term->quantityCount)); michael@0: michael@0: if (m_charSize == Char8) { michael@0: if ((Checked(checkedOffset) * static_cast(sizeof(char))).safeGet(offset)) michael@0: return false; michael@0: load8(BaseIndex(input, countRegister, TimesOne, offset), character); michael@0: } else { michael@0: if ((Checked(checkedOffset) * static_cast(sizeof(UChar))).safeGet(offset)) michael@0: return false; michael@0: load16(BaseIndex(input, countRegister, TimesTwo, offset), character); michael@0: } michael@0: matchCharacterClass(character, matchDest, term->characterClass); michael@0: michael@0: if (term->invert()) michael@0: op.m_jumps.append(matchDest); michael@0: else { michael@0: op.m_jumps.append(jump()); michael@0: matchDest.link(this); michael@0: } michael@0: michael@0: add32(TrustedImm32(1), countRegister); michael@0: branch32(NotEqual, countRegister, index).linkTo(loop, this); michael@0: return true; michael@0: } michael@0: bool backtrackCharacterClassFixed(size_t opIndex) michael@0: { michael@0: return backtrackTermDefault(opIndex); michael@0: } michael@0: michael@0: bool generateCharacterClassGreedy(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID character = regT0; michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: move(TrustedImm32(0), countRegister); michael@0: michael@0: JumpList failures; michael@0: Label loop(this); michael@0: failures.append(atEndOfInput()); michael@0: michael@0: if (term->invert()) { michael@0: readCharacter(term->inputPosition - m_checked, character); michael@0: matchCharacterClass(character, failures, term->characterClass); michael@0: } else { michael@0: JumpList matchDest; michael@0: readCharacter(term->inputPosition - m_checked, character); michael@0: matchCharacterClass(character, matchDest, term->characterClass); michael@0: failures.append(jump()); michael@0: matchDest.link(this); michael@0: } michael@0: michael@0: add32(TrustedImm32(1), countRegister); michael@0: add32(TrustedImm32(1), index); michael@0: if (term->quantityCount != quantifyInfinite) { michael@0: unsigned quantityCount; michael@0: if (term->quantityCount.safeGet(quantityCount)) michael@0: return false; michael@0: branch32(NotEqual, countRegister, Imm32(quantityCount)).linkTo(loop, this); michael@0: failures.append(jump()); michael@0: } else michael@0: jump(loop); michael@0: michael@0: failures.link(this); michael@0: op.m_reentry = label(); michael@0: michael@0: storeToFrame(countRegister, term->frameLocation); michael@0: return true; michael@0: } michael@0: bool backtrackCharacterClassGreedy(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: m_backtrackingState.link(this); michael@0: michael@0: loadFromFrame(term->frameLocation, countRegister); michael@0: m_backtrackingState.append(branchTest32(Zero, countRegister)); michael@0: sub32(TrustedImm32(1), countRegister); michael@0: sub32(TrustedImm32(1), index); michael@0: jump(op.m_reentry); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool generateCharacterClassNonGreedy(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: move(TrustedImm32(0), countRegister); michael@0: op.m_reentry = label(); michael@0: storeToFrame(countRegister, term->frameLocation); michael@0: return true; michael@0: } michael@0: bool backtrackCharacterClassNonGreedy(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID character = regT0; michael@0: const RegisterID countRegister = regT1; michael@0: michael@0: JumpList nonGreedyFailures; michael@0: michael@0: m_backtrackingState.link(this); michael@0: michael@0: loadFromFrame(term->frameLocation, countRegister); michael@0: michael@0: nonGreedyFailures.append(atEndOfInput()); michael@0: if (term->quantityCount.hasOverflowed()) michael@0: return false; michael@0: nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); michael@0: michael@0: JumpList matchDest; michael@0: readCharacter(term->inputPosition - m_checked, character); michael@0: matchCharacterClass(character, matchDest, term->characterClass); michael@0: michael@0: if (term->invert()) michael@0: nonGreedyFailures.append(matchDest); michael@0: else { michael@0: nonGreedyFailures.append(jump()); michael@0: matchDest.link(this); michael@0: } michael@0: michael@0: add32(TrustedImm32(1), countRegister); michael@0: add32(TrustedImm32(1), index); michael@0: michael@0: jump(op.m_reentry); michael@0: michael@0: nonGreedyFailures.link(this); michael@0: sub32(countRegister, index); michael@0: m_backtrackingState.fallthrough(); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool generateDotStarEnclosure(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: const RegisterID character = regT0; michael@0: const RegisterID matchPos = regT1; michael@0: michael@0: JumpList foundBeginningNewLine; michael@0: JumpList saveStartIndex; michael@0: JumpList foundEndingNewLine; michael@0: michael@0: ASSERT(!m_pattern.m_body->m_hasFixedSize); michael@0: getMatchStart(matchPos); michael@0: michael@0: saveStartIndex.append(branchTest32(Zero, matchPos)); michael@0: Label findBOLLoop(this); michael@0: sub32(TrustedImm32(1), matchPos); michael@0: if (m_charSize == Char8) michael@0: load8(BaseIndex(input, matchPos, TimesOne, 0), character); michael@0: else michael@0: load16(BaseIndex(input, matchPos, TimesTwo, 0), character); michael@0: matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass()); michael@0: branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this); michael@0: saveStartIndex.append(jump()); michael@0: michael@0: foundBeginningNewLine.link(this); michael@0: add32(TrustedImm32(1), matchPos); // Advance past newline michael@0: saveStartIndex.link(this); michael@0: michael@0: if (!m_pattern.m_multiline && term->anchors.bolAnchor) michael@0: op.m_jumps.append(branchTest32(NonZero, matchPos)); michael@0: michael@0: ASSERT(!m_pattern.m_body->m_hasFixedSize); michael@0: setMatchStart(matchPos); michael@0: michael@0: move(index, matchPos); michael@0: michael@0: Label findEOLLoop(this); michael@0: foundEndingNewLine.append(branch32(Equal, matchPos, length)); michael@0: if (m_charSize == Char8) michael@0: load8(BaseIndex(input, matchPos, TimesOne, 0), character); michael@0: else michael@0: load16(BaseIndex(input, matchPos, TimesTwo, 0), character); michael@0: matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass()); michael@0: add32(TrustedImm32(1), matchPos); michael@0: jump(findEOLLoop); michael@0: michael@0: foundEndingNewLine.link(this); michael@0: michael@0: if (!m_pattern.m_multiline && term->anchors.eolAnchor) michael@0: op.m_jumps.append(branch32(NotEqual, matchPos, length)); michael@0: michael@0: move(matchPos, index); michael@0: return true; michael@0: } michael@0: michael@0: bool backtrackDotStarEnclosure(size_t opIndex) michael@0: { michael@0: return backtrackTermDefault(opIndex); michael@0: } michael@0: michael@0: // Code generation/backtracking for simple terms michael@0: // (pattern characters, character classes, and assertions). michael@0: // These methods farm out work to the set of functions above. michael@0: bool generateTerm(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: switch (term->type) { michael@0: case PatternTerm::TypePatternCharacter: michael@0: switch (term->quantityType) { michael@0: case QuantifierFixedCount: michael@0: if (term->quantityCount == 1) michael@0: return generatePatternCharacterOnce(opIndex); michael@0: else michael@0: return generatePatternCharacterFixed(opIndex); michael@0: break; michael@0: case QuantifierGreedy: michael@0: return generatePatternCharacterGreedy(opIndex); michael@0: case QuantifierNonGreedy: michael@0: return generatePatternCharacterNonGreedy(opIndex); michael@0: } michael@0: break; michael@0: michael@0: case PatternTerm::TypeCharacterClass: michael@0: switch (term->quantityType) { michael@0: case QuantifierFixedCount: michael@0: if (term->quantityCount == 1) michael@0: return generateCharacterClassOnce(opIndex); michael@0: else michael@0: return generateCharacterClassFixed(opIndex); michael@0: break; michael@0: case QuantifierGreedy: michael@0: return generateCharacterClassGreedy(opIndex); michael@0: case QuantifierNonGreedy: michael@0: return generateCharacterClassNonGreedy(opIndex); michael@0: } michael@0: break; michael@0: michael@0: case PatternTerm::TypeAssertionBOL: michael@0: return generateAssertionBOL(opIndex); michael@0: michael@0: case PatternTerm::TypeAssertionEOL: michael@0: return generateAssertionEOL(opIndex); michael@0: michael@0: case PatternTerm::TypeAssertionWordBoundary: michael@0: return generateAssertionWordBoundary(opIndex); michael@0: michael@0: case PatternTerm::TypeForwardReference: michael@0: return true; michael@0: michael@0: case PatternTerm::TypeParenthesesSubpattern: michael@0: case PatternTerm::TypeParentheticalAssertion: michael@0: ASSERT_NOT_REACHED(); michael@0: return false; michael@0: case PatternTerm::TypeBackReference: michael@0: return false; michael@0: case PatternTerm::TypeDotStarEnclosure: michael@0: return generateDotStarEnclosure(opIndex); michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: bool backtrackTerm(size_t opIndex) michael@0: { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: switch (term->type) { michael@0: case PatternTerm::TypePatternCharacter: michael@0: switch (term->quantityType) { michael@0: case QuantifierFixedCount: michael@0: if (term->quantityCount == 1) michael@0: return backtrackPatternCharacterOnce(opIndex); michael@0: else michael@0: return backtrackPatternCharacterFixed(opIndex); michael@0: case QuantifierGreedy: michael@0: return backtrackPatternCharacterGreedy(opIndex); michael@0: case QuantifierNonGreedy: michael@0: return backtrackPatternCharacterNonGreedy(opIndex); michael@0: } michael@0: break; michael@0: michael@0: case PatternTerm::TypeCharacterClass: michael@0: switch (term->quantityType) { michael@0: case QuantifierFixedCount: michael@0: if (term->quantityCount == 1) michael@0: return backtrackCharacterClassOnce(opIndex); michael@0: else michael@0: return backtrackCharacterClassFixed(opIndex); michael@0: case QuantifierGreedy: michael@0: return backtrackCharacterClassGreedy(opIndex); michael@0: case QuantifierNonGreedy: michael@0: return backtrackCharacterClassNonGreedy(opIndex); michael@0: } michael@0: break; michael@0: michael@0: case PatternTerm::TypeAssertionBOL: michael@0: return backtrackAssertionBOL(opIndex); michael@0: michael@0: case PatternTerm::TypeAssertionEOL: michael@0: return backtrackAssertionEOL(opIndex); michael@0: michael@0: case PatternTerm::TypeAssertionWordBoundary: michael@0: return backtrackAssertionWordBoundary(opIndex); michael@0: michael@0: case PatternTerm::TypeForwardReference: michael@0: return true; michael@0: michael@0: case PatternTerm::TypeParenthesesSubpattern: michael@0: case PatternTerm::TypeParentheticalAssertion: michael@0: ASSERT_NOT_REACHED(); michael@0: return false; michael@0: michael@0: case PatternTerm::TypeDotStarEnclosure: michael@0: return backtrackDotStarEnclosure(opIndex); michael@0: michael@0: case PatternTerm::TypeBackReference: michael@0: return false; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: bool generate() michael@0: { michael@0: // Forwards generate the matching code. michael@0: ASSERT(m_ops.size()); michael@0: size_t opIndex = 0; michael@0: michael@0: do { michael@0: YarrOp& op = m_ops[opIndex]; michael@0: switch (op.m_op) { michael@0: michael@0: case OpTerm: michael@0: if (!generateTerm(opIndex)) michael@0: return false; michael@0: break; michael@0: michael@0: // OpBodyAlternativeBegin/Next/End michael@0: // michael@0: // These nodes wrap the set of alternatives in the body of the regular expression. michael@0: // There may be either one or two chains of OpBodyAlternative nodes, one representing michael@0: // the 'once through' sequence of alternatives (if any exist), and one representing michael@0: // the repeating alternatives (again, if any exist). michael@0: // michael@0: // Upon normal entry to the Begin alternative, we will check that input is available. michael@0: // Reentry to the Begin alternative will take place after the check has taken place, michael@0: // and will assume that the input position has already been progressed as appropriate. michael@0: // michael@0: // Entry to subsequent Next/End alternatives occurs when the prior alternative has michael@0: // successfully completed a match - return a success state from JIT code. michael@0: // michael@0: // Next alternatives allow for reentry optimized to suit backtracking from its michael@0: // preceding alternative. It expects the input position to still be set to a position michael@0: // appropriate to its predecessor, and it will only perform an input check if the michael@0: // predecessor had a minimum size less than its own. michael@0: // michael@0: // In the case 'once through' expressions, the End node will also have a reentry michael@0: // point to jump to when the last alternative fails. Again, this expects the input michael@0: // position to still reflect that expected by the prior alternative. michael@0: case OpBodyAlternativeBegin: { michael@0: PatternAlternative* alternative = op.m_alternative; michael@0: michael@0: // Upon entry at the head of the set of alternatives, check if input is available michael@0: // to run the first alternative. (This progresses the input position). michael@0: op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize)); michael@0: // We will reenter after the check, and assume the input position to have been michael@0: // set as appropriate to this alternative. michael@0: op.m_reentry = label(); michael@0: michael@0: if (alternative->m_minimumSize > INT_MAX) michael@0: return false; michael@0: m_checked = alternative->m_minimumSize; michael@0: break; michael@0: } michael@0: case OpBodyAlternativeNext: michael@0: case OpBodyAlternativeEnd: { michael@0: PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; michael@0: PatternAlternative* alternative = op.m_alternative; michael@0: michael@0: // If we get here, the prior alternative matched - return success. michael@0: michael@0: // Adjust the stack pointer to remove the pattern's frame. michael@0: #if !WTF_CPU_SPARC michael@0: removeCallFrame(); michael@0: #endif michael@0: michael@0: // Load appropriate values into the return register and the first output michael@0: // slot, and return. In the case of pattern with a fixed size, we will michael@0: // not have yet set the value in the first michael@0: ASSERT(index != returnRegister); michael@0: if (m_pattern.m_body->m_hasFixedSize) { michael@0: move(index, returnRegister); michael@0: if (priorAlternative->m_minimumSize) michael@0: sub32(Imm32(priorAlternative->m_minimumSize), returnRegister); michael@0: if (compileMode == IncludeSubpatterns) michael@0: store32(returnRegister, output); michael@0: } else michael@0: getMatchStart(returnRegister); michael@0: if (compileMode == IncludeSubpatterns) michael@0: store32(index, Address(output, 4)); michael@0: #if WTF_CPU_X86_64 michael@0: // upper 32bit to 0 michael@0: move32(returnRegister, returnRegister); michael@0: lshiftPtr(Imm32(32), index); michael@0: orPtr(index, returnRegister); michael@0: #else michael@0: move(index, returnRegister2); michael@0: #endif michael@0: michael@0: generateReturn(); michael@0: michael@0: // This is the divide between the tail of the prior alternative, above, and michael@0: // the head of the subsequent alternative, below. michael@0: michael@0: if (op.m_op == OpBodyAlternativeNext) { michael@0: // This is the reentry point for the Next alternative. We expect any code michael@0: // that jumps here to do so with the input position matching that of the michael@0: // PRIOR alteranative, and we will only check input availability if we michael@0: // need to progress it forwards. michael@0: op.m_reentry = label(); michael@0: if (alternative->m_minimumSize > priorAlternative->m_minimumSize) { michael@0: add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index); michael@0: op.m_jumps.append(jumpIfNoAvailableInput()); michael@0: } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize) michael@0: sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index); michael@0: } else if (op.m_nextOp == notFound) { michael@0: // This is the reentry point for the End of 'once through' alternatives, michael@0: // jumped to when the last alternative fails to match. michael@0: op.m_reentry = label(); michael@0: sub32(Imm32(priorAlternative->m_minimumSize), index); michael@0: } michael@0: michael@0: if (op.m_op == OpBodyAlternativeNext) michael@0: m_checked += alternative->m_minimumSize; michael@0: m_checked -= priorAlternative->m_minimumSize; michael@0: break; michael@0: } michael@0: michael@0: // OpSimpleNestedAlternativeBegin/Next/End michael@0: // OpNestedAlternativeBegin/Next/End michael@0: // michael@0: // These nodes are used to handle sets of alternatives that are nested within michael@0: // subpatterns and parenthetical assertions. The 'simple' forms are used where michael@0: // we do not need to be able to backtrack back into any alternative other than michael@0: // the last, the normal forms allow backtracking into any alternative. michael@0: // michael@0: // Each Begin/Next node is responsible for planting an input check to ensure michael@0: // sufficient input is available on entry. Next nodes additionally need to michael@0: // jump to the end - Next nodes use the End node's m_jumps list to hold this michael@0: // set of jumps. michael@0: // michael@0: // In the non-simple forms, successful alternative matches must store a michael@0: // 'return address' using a DataLabelPtr, used to store the address to jump michael@0: // to when backtracking, to get to the code for the appropriate alternative. michael@0: case OpSimpleNestedAlternativeBegin: michael@0: case OpNestedAlternativeBegin: { michael@0: PatternTerm* term = op.m_term; michael@0: PatternAlternative* alternative = op.m_alternative; michael@0: PatternDisjunction* disjunction = term->parentheses.disjunction; michael@0: michael@0: // Calculate how much input we need to check for, and if non-zero check. michael@0: op.m_checkAdjust = alternative->m_minimumSize; michael@0: if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) michael@0: op.m_checkAdjust -= disjunction->m_minimumSize; michael@0: if (op.m_checkAdjust) michael@0: op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); michael@0: michael@0: m_checked += op.m_checkAdjust; michael@0: break; michael@0: } michael@0: case OpSimpleNestedAlternativeNext: michael@0: case OpNestedAlternativeNext: { michael@0: PatternTerm* term = op.m_term; michael@0: PatternAlternative* alternative = op.m_alternative; michael@0: PatternDisjunction* disjunction = term->parentheses.disjunction; michael@0: michael@0: // In the non-simple case, store a 'return address' so we can backtrack correctly. michael@0: if (op.m_op == OpNestedAlternativeNext) { michael@0: unsigned parenthesesFrameLocation = term->frameLocation; michael@0: unsigned alternativeFrameLocation = parenthesesFrameLocation; michael@0: if (term->quantityType != QuantifierFixedCount) michael@0: alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; michael@0: op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); michael@0: } michael@0: michael@0: if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { michael@0: // If the previous alternative matched without consuming characters then michael@0: // backtrack to try to match while consumming some input. michael@0: op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); michael@0: } michael@0: michael@0: // If we reach here then the last alternative has matched - jump to the michael@0: // End node, to skip over any further alternatives. michael@0: // michael@0: // FIXME: this is logically O(N^2) (though N can be expected to be very michael@0: // small). We could avoid this either by adding an extra jump to the JIT michael@0: // data structures, or by making backtracking code that jumps to Next michael@0: // alternatives are responsible for checking that input is available (if michael@0: // we didn't need to plant the input checks, then m_jumps would be free). michael@0: YarrOp* endOp = &m_ops[op.m_nextOp]; michael@0: while (endOp->m_nextOp != notFound) { michael@0: ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); michael@0: endOp = &m_ops[endOp->m_nextOp]; michael@0: } michael@0: ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); michael@0: endOp->m_jumps.append(jump()); michael@0: michael@0: // This is the entry point for the next alternative. michael@0: op.m_reentry = label(); michael@0: michael@0: // Calculate how much input we need to check for, and if non-zero check. michael@0: op.m_checkAdjust = alternative->m_minimumSize; michael@0: if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) michael@0: op.m_checkAdjust -= disjunction->m_minimumSize; michael@0: if (op.m_checkAdjust) michael@0: op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); michael@0: michael@0: YarrOp& lastOp = m_ops[op.m_previousOp]; michael@0: m_checked -= lastOp.m_checkAdjust; michael@0: m_checked += op.m_checkAdjust; michael@0: break; michael@0: } michael@0: case OpSimpleNestedAlternativeEnd: michael@0: case OpNestedAlternativeEnd: { michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: // In the non-simple case, store a 'return address' so we can backtrack correctly. michael@0: if (op.m_op == OpNestedAlternativeEnd) { michael@0: unsigned parenthesesFrameLocation = term->frameLocation; michael@0: unsigned alternativeFrameLocation = parenthesesFrameLocation; michael@0: if (term->quantityType != QuantifierFixedCount) michael@0: alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; michael@0: op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); michael@0: } michael@0: michael@0: if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { michael@0: // If the previous alternative matched without consuming characters then michael@0: // backtrack to try to match while consumming some input. michael@0: op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); michael@0: } michael@0: michael@0: // If this set of alternatives contains more than one alternative, michael@0: // then the Next nodes will have planted jumps to the End, and added michael@0: // them to this node's m_jumps list. michael@0: op.m_jumps.link(this); michael@0: op.m_jumps.clear(); michael@0: michael@0: YarrOp& lastOp = m_ops[op.m_previousOp]; michael@0: m_checked -= lastOp.m_checkAdjust; michael@0: break; michael@0: } michael@0: michael@0: // OpParenthesesSubpatternOnceBegin/End michael@0: // michael@0: // These nodes support (optionally) capturing subpatterns, that have a michael@0: // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). michael@0: case OpParenthesesSubpatternOnceBegin: { michael@0: PatternTerm* term = op.m_term; michael@0: unsigned parenthesesFrameLocation = term->frameLocation; michael@0: const RegisterID indexTemporary = regT0; michael@0: ASSERT(term->quantityCount == 1); michael@0: michael@0: // Upon entry to a Greedy quantified set of parenthese store the index. michael@0: // We'll use this for two purposes: michael@0: // - To indicate which iteration we are on of mathing the remainder of michael@0: // the expression after the parentheses - the first, including the michael@0: // match within the parentheses, or the second having skipped over them. michael@0: // - To check for empty matches, which must be rejected. michael@0: // michael@0: // At the head of a NonGreedy set of parentheses we'll immediately set the michael@0: // value on the stack to -1 (indicating a match skipping the subpattern), michael@0: // and plant a jump to the end. We'll also plant a label to backtrack to michael@0: // to reenter the subpattern later, with a store to set up index on the michael@0: // second iteration. michael@0: // michael@0: // FIXME: for capturing parens, could use the index in the capture array? michael@0: if (term->quantityType == QuantifierGreedy) michael@0: storeToFrame(index, parenthesesFrameLocation); michael@0: else if (term->quantityType == QuantifierNonGreedy) { michael@0: storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); michael@0: op.m_jumps.append(jump()); michael@0: op.m_reentry = label(); michael@0: storeToFrame(index, parenthesesFrameLocation); michael@0: } michael@0: michael@0: // If the parenthese are capturing, store the starting index value to the michael@0: // captures array, offsetting as necessary. michael@0: // michael@0: // FIXME: could avoid offsetting this value in JIT code, apply michael@0: // offsets only afterwards, at the point the results array is michael@0: // being accessed. michael@0: if (term->capture() && compileMode == IncludeSubpatterns) { michael@0: int inputOffset = term->inputPosition - m_checked; michael@0: if (term->quantityType == QuantifierFixedCount) michael@0: inputOffset -= term->parentheses.disjunction->m_minimumSize; michael@0: if (inputOffset) { michael@0: move(index, indexTemporary); michael@0: add32(Imm32(inputOffset), indexTemporary); michael@0: setSubpatternStart(indexTemporary, term->parentheses.subpatternId); michael@0: } else michael@0: setSubpatternStart(index, term->parentheses.subpatternId); michael@0: } michael@0: break; michael@0: } michael@0: case OpParenthesesSubpatternOnceEnd: { michael@0: PatternTerm* term = op.m_term; michael@0: const RegisterID indexTemporary = regT0; michael@0: ASSERT(term->quantityCount == 1); michael@0: michael@0: #ifndef NDEBUG michael@0: // Runtime ASSERT to make sure that the nested alternative handled the michael@0: // "no input consumed" check. michael@0: if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { michael@0: Jump pastBreakpoint; michael@0: pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); michael@0: breakpoint(); michael@0: pastBreakpoint.link(this); michael@0: } michael@0: #endif michael@0: michael@0: // If the parenthese are capturing, store the ending index value to the michael@0: // captures array, offsetting as necessary. michael@0: // michael@0: // FIXME: could avoid offsetting this value in JIT code, apply michael@0: // offsets only afterwards, at the point the results array is michael@0: // being accessed. michael@0: if (term->capture() && compileMode == IncludeSubpatterns) { michael@0: int inputOffset = term->inputPosition - m_checked; michael@0: if (inputOffset) { michael@0: move(index, indexTemporary); michael@0: add32(Imm32(inputOffset), indexTemporary); michael@0: setSubpatternEnd(indexTemporary, term->parentheses.subpatternId); michael@0: } else michael@0: setSubpatternEnd(index, term->parentheses.subpatternId); michael@0: } michael@0: michael@0: // If the parentheses are quantified Greedy then add a label to jump back michael@0: // to if get a failed match from after the parentheses. For NonGreedy michael@0: // parentheses, link the jump from before the subpattern to here. michael@0: if (term->quantityType == QuantifierGreedy) michael@0: op.m_reentry = label(); michael@0: else if (term->quantityType == QuantifierNonGreedy) { michael@0: YarrOp& beginOp = m_ops[op.m_previousOp]; michael@0: beginOp.m_jumps.link(this); michael@0: } michael@0: break; michael@0: } michael@0: michael@0: // OpParenthesesSubpatternTerminalBegin/End michael@0: case OpParenthesesSubpatternTerminalBegin: { michael@0: PatternTerm* term = op.m_term; michael@0: ASSERT(term->quantityType == QuantifierGreedy); michael@0: ASSERT(term->quantityCount == quantifyInfinite); michael@0: ASSERT(!term->capture()); michael@0: michael@0: // Upon entry set a label to loop back to. michael@0: op.m_reentry = label(); michael@0: michael@0: // Store the start index of the current match; we need to reject zero michael@0: // length matches. michael@0: storeToFrame(index, term->frameLocation); michael@0: break; michael@0: } michael@0: case OpParenthesesSubpatternTerminalEnd: { michael@0: YarrOp& beginOp = m_ops[op.m_previousOp]; michael@0: #ifndef NDEBUG michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: // Runtime ASSERT to make sure that the nested alternative handled the michael@0: // "no input consumed" check. michael@0: Jump pastBreakpoint; michael@0: pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); michael@0: breakpoint(); michael@0: pastBreakpoint.link(this); michael@0: #endif michael@0: michael@0: // We know that the match is non-zero, we can accept it and michael@0: // loop back up to the head of the subpattern. michael@0: jump(beginOp.m_reentry); michael@0: michael@0: // This is the entry point to jump to when we stop matching - we will michael@0: // do so once the subpattern cannot match any more. michael@0: op.m_reentry = label(); michael@0: break; michael@0: } michael@0: michael@0: // OpParentheticalAssertionBegin/End michael@0: case OpParentheticalAssertionBegin: { michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: // Store the current index - assertions should not update index, so michael@0: // we will need to restore it upon a successful match. michael@0: unsigned parenthesesFrameLocation = term->frameLocation; michael@0: storeToFrame(index, parenthesesFrameLocation); michael@0: michael@0: // Check michael@0: op.m_checkAdjust = m_checked - term->inputPosition; michael@0: if (op.m_checkAdjust) michael@0: sub32(Imm32(op.m_checkAdjust), index); michael@0: michael@0: m_checked -= op.m_checkAdjust; michael@0: break; michael@0: } michael@0: case OpParentheticalAssertionEnd: { michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: // Restore the input index value. michael@0: unsigned parenthesesFrameLocation = term->frameLocation; michael@0: loadFromFrame(parenthesesFrameLocation, index); michael@0: michael@0: // If inverted, a successful match of the assertion must be treated michael@0: // as a failure, so jump to backtracking. michael@0: if (term->invert()) { michael@0: op.m_jumps.append(jump()); michael@0: op.m_reentry = label(); michael@0: } michael@0: michael@0: YarrOp& lastOp = m_ops[op.m_previousOp]; michael@0: m_checked += lastOp.m_checkAdjust; michael@0: break; michael@0: } michael@0: michael@0: case OpMatchFailed: michael@0: #if !WTF_CPU_SPARC michael@0: removeCallFrame(); michael@0: #endif michael@0: #if WTF_CPU_X86_64 michael@0: move(TrustedImm32(int(WTF::notFound)), returnRegister); michael@0: #else michael@0: move(TrustedImmPtr((void*)WTF::notFound), returnRegister); michael@0: move(TrustedImm32(0), returnRegister2); michael@0: #endif michael@0: generateReturn(); michael@0: break; michael@0: } michael@0: michael@0: ++opIndex; michael@0: } while (opIndex < m_ops.size()); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool backtrack() michael@0: { michael@0: // Backwards generate the backtracking code. michael@0: size_t opIndex = m_ops.size(); michael@0: ASSERT(opIndex); michael@0: michael@0: do { michael@0: --opIndex; michael@0: YarrOp& op = m_ops[opIndex]; michael@0: switch (op.m_op) { michael@0: michael@0: case OpTerm: michael@0: if (!backtrackTerm(opIndex)) michael@0: return false; michael@0: break; michael@0: michael@0: // OpBodyAlternativeBegin/Next/End michael@0: // michael@0: // For each Begin/Next node representing an alternative, we need to decide what to do michael@0: // in two circumstances: michael@0: // - If we backtrack back into this node, from within the alternative. michael@0: // - If the input check at the head of the alternative fails (if this exists). michael@0: // michael@0: // We treat these two cases differently since in the former case we have slightly michael@0: // more information - since we are backtracking out of a prior alternative we know michael@0: // that at least enough input was available to run it. For example, given the regular michael@0: // expression /a|b/, if we backtrack out of the first alternative (a failed pattern michael@0: // character match of 'a'), then we need not perform an additional input availability michael@0: // check before running the second alternative. michael@0: // michael@0: // Backtracking required differs for the last alternative, which in the case of the michael@0: // repeating set of alternatives must loop. The code generated for the last alternative michael@0: // will also be used to handle all input check failures from any prior alternatives - michael@0: // these require similar functionality, in seeking the next available alternative for michael@0: // which there is sufficient input. michael@0: // michael@0: // Since backtracking of all other alternatives simply requires us to link backtracks michael@0: // to the reentry point for the subsequent alternative, we will only be generating any michael@0: // code when backtracking the last alternative. michael@0: case OpBodyAlternativeBegin: michael@0: case OpBodyAlternativeNext: { michael@0: PatternAlternative* alternative = op.m_alternative; michael@0: michael@0: if (op.m_op == OpBodyAlternativeNext) { michael@0: PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; michael@0: m_checked += priorAlternative->m_minimumSize; michael@0: } michael@0: m_checked -= alternative->m_minimumSize; michael@0: michael@0: // Is this the last alternative? If not, then if we backtrack to this point we just michael@0: // need to jump to try to match the next alternative. michael@0: if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) { michael@0: m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this); michael@0: break; michael@0: } michael@0: YarrOp& endOp = m_ops[op.m_nextOp]; michael@0: michael@0: YarrOp* beginOp = &op; michael@0: while (beginOp->m_op != OpBodyAlternativeBegin) { michael@0: ASSERT(beginOp->m_op == OpBodyAlternativeNext); michael@0: beginOp = &m_ops[beginOp->m_previousOp]; michael@0: } michael@0: michael@0: bool onceThrough = endOp.m_nextOp == notFound; michael@0: michael@0: // First, generate code to handle cases where we backtrack out of an attempted match michael@0: // of the last alternative. If this is a 'once through' set of alternatives then we michael@0: // have nothing to do - link this straight through to the End. michael@0: if (onceThrough) michael@0: m_backtrackingState.linkTo(endOp.m_reentry, this); michael@0: else { michael@0: // If we don't need to move the input poistion, and the pattern has a fixed size michael@0: // (in which case we omit the store of the start index until the pattern has matched) michael@0: // then we can just link the backtrack out of the last alternative straight to the michael@0: // head of the first alternative. michael@0: if (m_pattern.m_body->m_hasFixedSize michael@0: && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) michael@0: && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1)) michael@0: m_backtrackingState.linkTo(beginOp->m_reentry, this); michael@0: else { michael@0: // We need to generate a trampoline of code to execute before looping back michael@0: // around to the first alternative. michael@0: m_backtrackingState.link(this); michael@0: michael@0: // If the pattern size is not fixed, then store the start index, for use if we match. michael@0: if (!m_pattern.m_body->m_hasFixedSize) { michael@0: if (alternative->m_minimumSize == 1) michael@0: setMatchStart(index); michael@0: else { michael@0: move(index, regT0); michael@0: if (alternative->m_minimumSize) michael@0: sub32(Imm32(alternative->m_minimumSize - 1), regT0); michael@0: else michael@0: add32(TrustedImm32(1), regT0); michael@0: setMatchStart(regT0); michael@0: } michael@0: } michael@0: michael@0: // Generate code to loop. Check whether the last alternative is longer than the michael@0: // first (e.g. /a|xy/ or /a|xyz/). michael@0: if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) { michael@0: // We want to loop, and increment input position. If the delta is 1, it is michael@0: // already correctly incremented, if more than one then decrement as appropriate. michael@0: unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize; michael@0: ASSERT(delta); michael@0: if (delta != 1) michael@0: sub32(Imm32(delta - 1), index); michael@0: jump(beginOp->m_reentry); michael@0: } else { michael@0: // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot michael@0: // be sufficent input available to handle this, so just fall through. michael@0: unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize; michael@0: if (delta != 0xFFFFFFFFu) { michael@0: // We need to check input because we are incrementing the input. michael@0: add32(Imm32(delta + 1), index); michael@0: checkInput().linkTo(beginOp->m_reentry, this); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: // We can reach this point in the code in two ways: michael@0: // - Fallthrough from the code above (a repeating alternative backtracked out of its michael@0: // last alternative, and did not have sufficent input to run the first). michael@0: // - We will loop back up to the following label when a releating alternative loops, michael@0: // following a failed input check. michael@0: // michael@0: // Either way, we have just failed the input check for the first alternative. michael@0: Label firstInputCheckFailed(this); michael@0: michael@0: // Generate code to handle input check failures from alternatives except the last. michael@0: // prevOp is the alternative we're handling a bail out from (initially Begin), and michael@0: // nextOp is the alternative we will be attempting to reenter into. michael@0: // michael@0: // We will link input check failures from the forwards matching path back to the code michael@0: // that can handle them. michael@0: YarrOp* prevOp = beginOp; michael@0: YarrOp* nextOp = &m_ops[beginOp->m_nextOp]; michael@0: while (nextOp->m_op != OpBodyAlternativeEnd) { michael@0: prevOp->m_jumps.link(this); michael@0: michael@0: // We only get here if an input check fails, it is only worth checking again michael@0: // if the next alternative has a minimum size less than the last. michael@0: if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) { michael@0: // FIXME: if we added an extra label to YarrOp, we could avoid needing to michael@0: // subtract delta back out, and reduce this code. Should performance test michael@0: // the benefit of this. michael@0: unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize; michael@0: sub32(Imm32(delta), index); michael@0: Jump fail = jumpIfNoAvailableInput(); michael@0: add32(Imm32(delta), index); michael@0: jump(nextOp->m_reentry); michael@0: fail.link(this); michael@0: } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize) michael@0: add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index); michael@0: prevOp = nextOp; michael@0: nextOp = &m_ops[nextOp->m_nextOp]; michael@0: } michael@0: michael@0: // We fall through to here if there is insufficient input to run the last alternative. michael@0: michael@0: // If there is insufficient input to run the last alternative, then for 'once through' michael@0: // alternatives we are done - just jump back up into the forwards matching path at the End. michael@0: if (onceThrough) { michael@0: op.m_jumps.linkTo(endOp.m_reentry, this); michael@0: jump(endOp.m_reentry); michael@0: break; michael@0: } michael@0: michael@0: // For repeating alternatives, link any input check failure from the last alternative to michael@0: // this point. michael@0: op.m_jumps.link(this); michael@0: michael@0: bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize; michael@0: michael@0: // Check for cases where input position is already incremented by 1 for the last michael@0: // alternative (this is particularly useful where the minimum size of the body michael@0: // disjunction is 0, e.g. /a*|b/). michael@0: if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) { michael@0: // index is already incremented by 1, so just store it now! michael@0: setMatchStart(index); michael@0: needsToUpdateMatchStart = false; michael@0: } michael@0: michael@0: // Check whether there is sufficient input to loop. Increment the input position by michael@0: // one, and check. Also add in the minimum disjunction size before checking - there michael@0: // is no point in looping if we're just going to fail all the input checks around michael@0: // the next iteration. michael@0: ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize); michael@0: if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) { michael@0: // If the last alternative had the same minimum size as the disjunction, michael@0: // just simply increment input pos by 1, no adjustment based on minimum size. michael@0: add32(TrustedImm32(1), index); michael@0: } else { michael@0: // If the minumum for the last alternative was one greater than than that michael@0: // for the disjunction, we're already progressed by 1, nothing to do! michael@0: unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1; michael@0: if (delta) michael@0: sub32(Imm32(delta), index); michael@0: } michael@0: Jump matchFailed = jumpIfNoAvailableInput(); michael@0: michael@0: if (needsToUpdateMatchStart) { michael@0: if (!m_pattern.m_body->m_minimumSize) michael@0: setMatchStart(index); michael@0: else { michael@0: move(index, regT0); michael@0: sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); michael@0: setMatchStart(regT0); michael@0: } michael@0: } michael@0: michael@0: // Calculate how much more input the first alternative requires than the minimum michael@0: // for the body as a whole. If no more is needed then we dont need an additional michael@0: // input check here - jump straight back up to the start of the first alternative. michael@0: if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) michael@0: jump(beginOp->m_reentry); michael@0: else { michael@0: if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize) michael@0: add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index); michael@0: else michael@0: sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index); michael@0: checkInput().linkTo(beginOp->m_reentry, this); michael@0: jump(firstInputCheckFailed); michael@0: } michael@0: michael@0: // We jump to here if we iterate to the point that there is insufficient input to michael@0: // run any matches, and need to return a failure state from JIT code. michael@0: matchFailed.link(this); michael@0: michael@0: #if !WTF_CPU_SPARC michael@0: removeCallFrame(); michael@0: #endif michael@0: #if WTF_CPU_X86_64 michael@0: move(TrustedImm32(int(WTF::notFound)), returnRegister); michael@0: #else michael@0: move(TrustedImmPtr((void*)WTF::notFound), returnRegister); michael@0: move(TrustedImm32(0), returnRegister2); michael@0: #endif michael@0: generateReturn(); michael@0: break; michael@0: } michael@0: case OpBodyAlternativeEnd: { michael@0: // We should never backtrack back into a body disjunction. michael@0: ASSERT(m_backtrackingState.isEmpty()); michael@0: michael@0: PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; michael@0: m_checked += priorAlternative->m_minimumSize; michael@0: break; michael@0: } michael@0: michael@0: // OpSimpleNestedAlternativeBegin/Next/End michael@0: // OpNestedAlternativeBegin/Next/End michael@0: // michael@0: // Generate code for when we backtrack back out of an alternative into michael@0: // a Begin or Next node, or when the entry input count check fails. If michael@0: // there are more alternatives we need to jump to the next alternative, michael@0: // if not we backtrack back out of the current set of parentheses. michael@0: // michael@0: // In the case of non-simple nested assertions we need to also link the michael@0: // 'return address' appropriately to backtrack back out into the correct michael@0: // alternative. michael@0: case OpSimpleNestedAlternativeBegin: michael@0: case OpSimpleNestedAlternativeNext: michael@0: case OpNestedAlternativeBegin: michael@0: case OpNestedAlternativeNext: { michael@0: YarrOp& nextOp = m_ops[op.m_nextOp]; michael@0: bool isBegin = op.m_previousOp == notFound; michael@0: bool isLastAlternative = nextOp.m_nextOp == notFound; michael@0: ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin)); michael@0: ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd)); michael@0: michael@0: // Treat an input check failure the same as a failed match. michael@0: m_backtrackingState.append(op.m_jumps); michael@0: michael@0: // Set the backtracks to jump to the appropriate place. We may need michael@0: // to link the backtracks in one of three different way depending on michael@0: // the type of alternative we are dealing with: michael@0: // - A single alternative, with no simplings. michael@0: // - The last alternative of a set of two or more. michael@0: // - An alternative other than the last of a set of two or more. michael@0: // michael@0: // In the case of a single alternative on its own, we don't need to michael@0: // jump anywhere - if the alternative fails to match we can just michael@0: // continue to backtrack out of the parentheses without jumping. michael@0: // michael@0: // In the case of the last alternative in a set of more than one, we michael@0: // need to jump to return back out to the beginning. We'll do so by michael@0: // adding a jump to the End node's m_jumps list, and linking this michael@0: // when we come to generate the Begin node. For alternatives other michael@0: // than the last, we need to jump to the next alternative. michael@0: // michael@0: // If the alternative had adjusted the input position we must link michael@0: // backtracking to here, correct, and then jump on. If not we can michael@0: // link the backtracks directly to their destination. michael@0: if (op.m_checkAdjust) { michael@0: // Handle the cases where we need to link the backtracks here. michael@0: m_backtrackingState.link(this); michael@0: sub32(Imm32(op.m_checkAdjust), index); michael@0: if (!isLastAlternative) { michael@0: // An alternative that is not the last should jump to its successor. michael@0: jump(nextOp.m_reentry); michael@0: } else if (!isBegin) { michael@0: // The last of more than one alternatives must jump back to the beginning. michael@0: nextOp.m_jumps.append(jump()); michael@0: } else { michael@0: // A single alternative on its own can fall through. michael@0: m_backtrackingState.fallthrough(); michael@0: } michael@0: } else { michael@0: // Handle the cases where we can link the backtracks directly to their destinations. michael@0: if (!isLastAlternative) { michael@0: // An alternative that is not the last should jump to its successor. michael@0: m_backtrackingState.linkTo(nextOp.m_reentry, this); michael@0: } else if (!isBegin) { michael@0: // The last of more than one alternatives must jump back to the beginning. michael@0: m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this); michael@0: } michael@0: // In the case of a single alternative on its own do nothing - it can fall through. michael@0: } michael@0: michael@0: // If there is a backtrack jump from a zero length match link it here. michael@0: if (op.m_zeroLengthMatch.isSet()) michael@0: m_backtrackingState.append(op.m_zeroLengthMatch); michael@0: michael@0: // At this point we've handled the backtracking back into this node. michael@0: // Now link any backtracks that need to jump to here. michael@0: michael@0: // For non-simple alternatives, link the alternative's 'return address' michael@0: // so that we backtrack back out into the previous alternative. michael@0: if (op.m_op == OpNestedAlternativeNext) michael@0: m_backtrackingState.append(op.m_returnAddress); michael@0: michael@0: // If there is more than one alternative, then the last alternative will michael@0: // have planted a jump to be linked to the end. This jump was added to the michael@0: // End node's m_jumps list. If we are back at the beginning, link it here. michael@0: if (isBegin) { michael@0: YarrOp* endOp = &m_ops[op.m_nextOp]; michael@0: while (endOp->m_nextOp != notFound) { michael@0: ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); michael@0: endOp = &m_ops[endOp->m_nextOp]; michael@0: } michael@0: ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); michael@0: m_backtrackingState.append(endOp->m_jumps); michael@0: } michael@0: michael@0: if (!isBegin) { michael@0: YarrOp& lastOp = m_ops[op.m_previousOp]; michael@0: m_checked += lastOp.m_checkAdjust; michael@0: } michael@0: m_checked -= op.m_checkAdjust; michael@0: break; michael@0: } michael@0: case OpSimpleNestedAlternativeEnd: michael@0: case OpNestedAlternativeEnd: { michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: // If there is a backtrack jump from a zero length match link it here. michael@0: if (op.m_zeroLengthMatch.isSet()) michael@0: m_backtrackingState.append(op.m_zeroLengthMatch); michael@0: michael@0: // If we backtrack into the end of a simple subpattern do nothing; michael@0: // just continue through into the last alternative. If we backtrack michael@0: // into the end of a non-simple set of alterntives we need to jump michael@0: // to the backtracking return address set up during generation. michael@0: if (op.m_op == OpNestedAlternativeEnd) { michael@0: m_backtrackingState.link(this); michael@0: michael@0: // Plant a jump to the return address. michael@0: unsigned parenthesesFrameLocation = term->frameLocation; michael@0: unsigned alternativeFrameLocation = parenthesesFrameLocation; michael@0: if (term->quantityType != QuantifierFixedCount) michael@0: alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; michael@0: loadFromFrameAndJump(alternativeFrameLocation); michael@0: michael@0: // Link the DataLabelPtr associated with the end of the last michael@0: // alternative to this point. michael@0: m_backtrackingState.append(op.m_returnAddress); michael@0: } michael@0: michael@0: YarrOp& lastOp = m_ops[op.m_previousOp]; michael@0: m_checked += lastOp.m_checkAdjust; michael@0: break; michael@0: } michael@0: michael@0: // OpParenthesesSubpatternOnceBegin/End michael@0: // michael@0: // When we are backtracking back out of a capturing subpattern we need michael@0: // to clear the start index in the matches output array, to record that michael@0: // this subpattern has not been captured. michael@0: // michael@0: // When backtracking back out of a Greedy quantified subpattern we need michael@0: // to catch this, and try running the remainder of the alternative after michael@0: // the subpattern again, skipping the parentheses. michael@0: // michael@0: // Upon backtracking back into a quantified set of parentheses we need to michael@0: // check whether we were currently skipping the subpattern. If not, we michael@0: // can backtrack into them, if we were we need to either backtrack back michael@0: // out of the start of the parentheses, or jump back to the forwards michael@0: // matching start, depending of whether the match is Greedy or NonGreedy. michael@0: case OpParenthesesSubpatternOnceBegin: { michael@0: PatternTerm* term = op.m_term; michael@0: ASSERT(term->quantityCount == 1); michael@0: michael@0: // We only need to backtrack to thispoint if capturing or greedy. michael@0: if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) { michael@0: m_backtrackingState.link(this); michael@0: michael@0: // If capturing, clear the capture (we only need to reset start). michael@0: if (term->capture() && compileMode == IncludeSubpatterns) michael@0: clearSubpatternStart(term->parentheses.subpatternId); michael@0: michael@0: // If Greedy, jump to the end. michael@0: if (term->quantityType == QuantifierGreedy) { michael@0: // Clear the flag in the stackframe indicating we ran through the subpattern. michael@0: unsigned parenthesesFrameLocation = term->frameLocation; michael@0: storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); michael@0: // Jump to after the parentheses, skipping the subpattern. michael@0: jump(m_ops[op.m_nextOp].m_reentry); michael@0: // A backtrack from after the parentheses, when skipping the subpattern, michael@0: // will jump back to here. michael@0: op.m_jumps.link(this); michael@0: } michael@0: michael@0: m_backtrackingState.fallthrough(); michael@0: } michael@0: break; michael@0: } michael@0: case OpParenthesesSubpatternOnceEnd: { michael@0: PatternTerm* term = op.m_term; michael@0: michael@0: if (term->quantityType != QuantifierFixedCount) { michael@0: m_backtrackingState.link(this); michael@0: michael@0: // Check whether we should backtrack back into the parentheses, or if we michael@0: // are currently in a state where we had skipped over the subpattern michael@0: // (in which case the flag value on the stack will be -1). michael@0: unsigned parenthesesFrameLocation = term->frameLocation; michael@0: Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1)); michael@0: michael@0: if (term->quantityType == QuantifierGreedy) { michael@0: // For Greedy parentheses, we skip after having already tried going michael@0: // through the subpattern, so if we get here we're done. michael@0: YarrOp& beginOp = m_ops[op.m_previousOp]; michael@0: beginOp.m_jumps.append(hadSkipped); michael@0: } else { michael@0: // For NonGreedy parentheses, we try skipping the subpattern first, michael@0: // so if we get here we need to try running through the subpattern michael@0: // next. Jump back to the start of the parentheses in the forwards michael@0: // matching path. michael@0: ASSERT(term->quantityType == QuantifierNonGreedy); michael@0: YarrOp& beginOp = m_ops[op.m_previousOp]; michael@0: hadSkipped.linkTo(beginOp.m_reentry, this); michael@0: } michael@0: michael@0: m_backtrackingState.fallthrough(); michael@0: } michael@0: michael@0: m_backtrackingState.append(op.m_jumps); michael@0: break; michael@0: } michael@0: michael@0: // OpParenthesesSubpatternTerminalBegin/End michael@0: // michael@0: // Terminal subpatterns will always match - there is nothing after them to michael@0: // force a backtrack, and they have a minimum count of 0, and as such will michael@0: // always produce an acceptable result. michael@0: case OpParenthesesSubpatternTerminalBegin: { michael@0: // We will backtrack to this point once the subpattern cannot match any michael@0: // more. Since no match is accepted as a successful match (we are Greedy michael@0: // quantified with a minimum of zero) jump back to the forwards matching michael@0: // path at the end. michael@0: YarrOp& endOp = m_ops[op.m_nextOp]; michael@0: m_backtrackingState.linkTo(endOp.m_reentry, this); michael@0: break; michael@0: } michael@0: case OpParenthesesSubpatternTerminalEnd: michael@0: // We should never be backtracking to here (hence the 'terminal' in the name). michael@0: ASSERT(m_backtrackingState.isEmpty()); michael@0: m_backtrackingState.append(op.m_jumps); michael@0: break; michael@0: michael@0: // OpParentheticalAssertionBegin/End michael@0: case OpParentheticalAssertionBegin: { michael@0: PatternTerm* term = op.m_term; michael@0: YarrOp& endOp = m_ops[op.m_nextOp]; michael@0: michael@0: // We need to handle the backtracks upon backtracking back out michael@0: // of a parenthetical assertion if either we need to correct michael@0: // the input index, or the assertion was inverted. michael@0: if (op.m_checkAdjust || term->invert()) { michael@0: m_backtrackingState.link(this); michael@0: michael@0: if (op.m_checkAdjust) michael@0: add32(Imm32(op.m_checkAdjust), index); michael@0: michael@0: // In an inverted assertion failure to match the subpattern michael@0: // is treated as a successful match - jump to the end of the michael@0: // subpattern. We already have adjusted the input position michael@0: // back to that before the assertion, which is correct. michael@0: if (term->invert()) michael@0: jump(endOp.m_reentry); michael@0: michael@0: m_backtrackingState.fallthrough(); michael@0: } michael@0: michael@0: // The End node's jump list will contain any backtracks into michael@0: // the end of the assertion. Also, if inverted, we will have michael@0: // added the failure caused by a successful match to this. michael@0: m_backtrackingState.append(endOp.m_jumps); michael@0: michael@0: m_checked += op.m_checkAdjust; michael@0: break; michael@0: } michael@0: case OpParentheticalAssertionEnd: { michael@0: // FIXME: We should really be clearing any nested subpattern michael@0: // matches on bailing out from after the pattern. Firefox has michael@0: // this bug too (presumably because they use YARR!) michael@0: michael@0: // Never backtrack into an assertion; later failures bail to before the begin. michael@0: m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this); michael@0: michael@0: YarrOp& lastOp = m_ops[op.m_previousOp]; michael@0: m_checked -= lastOp.m_checkAdjust; michael@0: break; michael@0: } michael@0: michael@0: case OpMatchFailed: michael@0: break; michael@0: } michael@0: michael@0: } while (opIndex); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: // Compilation methods: michael@0: // ==================== michael@0: michael@0: // opCompileParenthesesSubpattern michael@0: // Emits ops for a subpattern (set of parentheses). These consist michael@0: // of a set of alternatives wrapped in an outer set of nodes for michael@0: // the parentheses. michael@0: // Supported types of parentheses are 'Once' (quantityCount == 1) michael@0: // and 'Terminal' (non-capturing parentheses quantified as greedy michael@0: // and infinite). michael@0: // Alternatives will use the 'Simple' set of ops if either the michael@0: // subpattern is terminal (in which case we will never need to michael@0: // backtrack), or if the subpattern only contains one alternative. michael@0: void opCompileParenthesesSubpattern(PatternTerm* term) michael@0: { michael@0: YarrOpCode parenthesesBeginOpCode; michael@0: YarrOpCode parenthesesEndOpCode; michael@0: YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin; michael@0: YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext; michael@0: YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd; michael@0: michael@0: // We can currently only compile quantity 1 subpatterns that are michael@0: // not copies. We generate a copy in the case of a range quantifier, michael@0: // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to michael@0: // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem michael@0: // comes where the subpattern is capturing, in which case we would michael@0: // need to restore the capture from the first subpattern upon a michael@0: // failure in the second. michael@0: if (term->quantityCount == 1 && !term->parentheses.isCopy) { michael@0: // Select the 'Once' nodes. michael@0: parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; michael@0: parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; michael@0: michael@0: // If there is more than one alternative we cannot use the 'simple' nodes. michael@0: if (term->parentheses.disjunction->m_alternatives.size() != 1) { michael@0: alternativeBeginOpCode = OpNestedAlternativeBegin; michael@0: alternativeNextOpCode = OpNestedAlternativeNext; michael@0: alternativeEndOpCode = OpNestedAlternativeEnd; michael@0: } michael@0: } else if (term->parentheses.isTerminal) { michael@0: // Terminal groups are optimized on the assumption that matching will never michael@0: // backtrack into the terminal group. But this is false if there is more michael@0: // than one alternative and one of the alternatives can match empty. In that michael@0: // case, the empty match is counted as a failure, so we would need to backtrack. michael@0: // The backtracking code doesn't handle this case correctly, so we fall back michael@0: // to the interpreter. michael@0: Vector& alternatives = term->parentheses.disjunction->m_alternatives; michael@0: if (alternatives.size() != 1) { michael@0: for (unsigned i = 0; i < alternatives.size(); ++i) { michael@0: if (alternatives[i]->m_minimumSize == 0) { michael@0: m_shouldFallBack = true; michael@0: return; michael@0: } michael@0: } michael@0: } michael@0: michael@0: // Select the 'Terminal' nodes. michael@0: parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin; michael@0: parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; michael@0: } else { michael@0: // This subpattern is not supported by the JIT. michael@0: m_shouldFallBack = true; michael@0: return; michael@0: } michael@0: michael@0: size_t parenBegin = m_ops.size(); michael@0: m_ops.append(parenthesesBeginOpCode); michael@0: michael@0: m_ops.append(alternativeBeginOpCode); michael@0: m_ops.last().m_previousOp = notFound; michael@0: m_ops.last().m_term = term; michael@0: Vector& alternatives = term->parentheses.disjunction->m_alternatives; michael@0: for (unsigned i = 0; i < alternatives.size(); ++i) { michael@0: size_t lastOpIndex = m_ops.size() - 1; michael@0: michael@0: PatternAlternative* nestedAlternative = alternatives[i]; michael@0: opCompileAlternative(nestedAlternative); michael@0: michael@0: size_t thisOpIndex = m_ops.size(); michael@0: m_ops.append(YarrOp(alternativeNextOpCode)); michael@0: michael@0: YarrOp& lastOp = m_ops[lastOpIndex]; michael@0: YarrOp& thisOp = m_ops[thisOpIndex]; michael@0: michael@0: lastOp.m_alternative = nestedAlternative; michael@0: lastOp.m_nextOp = thisOpIndex; michael@0: thisOp.m_previousOp = lastOpIndex; michael@0: thisOp.m_term = term; michael@0: } michael@0: YarrOp& lastOp = m_ops.last(); michael@0: ASSERT(lastOp.m_op == alternativeNextOpCode); michael@0: lastOp.m_op = alternativeEndOpCode; michael@0: lastOp.m_alternative = 0; michael@0: lastOp.m_nextOp = notFound; michael@0: michael@0: size_t parenEnd = m_ops.size(); michael@0: m_ops.append(parenthesesEndOpCode); michael@0: michael@0: m_ops[parenBegin].m_term = term; michael@0: m_ops[parenBegin].m_previousOp = notFound; michael@0: m_ops[parenBegin].m_nextOp = parenEnd; michael@0: m_ops[parenEnd].m_term = term; michael@0: m_ops[parenEnd].m_previousOp = parenBegin; michael@0: m_ops[parenEnd].m_nextOp = notFound; michael@0: } michael@0: michael@0: // opCompileParentheticalAssertion michael@0: // Emits ops for a parenthetical assertion. These consist of an michael@0: // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping michael@0: // the alternatives, with these wrapped by an outer pair of michael@0: // OpParentheticalAssertionBegin/End nodes. michael@0: // We can always use the OpSimpleNestedAlternative nodes in the michael@0: // case of parenthetical assertions since these only ever match michael@0: // once, and will never backtrack back into the assertion. michael@0: void opCompileParentheticalAssertion(PatternTerm* term) michael@0: { michael@0: size_t parenBegin = m_ops.size(); michael@0: m_ops.append(OpParentheticalAssertionBegin); michael@0: michael@0: m_ops.append(OpSimpleNestedAlternativeBegin); michael@0: m_ops.last().m_previousOp = notFound; michael@0: m_ops.last().m_term = term; michael@0: Vector& alternatives = term->parentheses.disjunction->m_alternatives; michael@0: for (unsigned i = 0; i < alternatives.size(); ++i) { michael@0: size_t lastOpIndex = m_ops.size() - 1; michael@0: michael@0: PatternAlternative* nestedAlternative = alternatives[i]; michael@0: opCompileAlternative(nestedAlternative); michael@0: michael@0: size_t thisOpIndex = m_ops.size(); michael@0: m_ops.append(YarrOp(OpSimpleNestedAlternativeNext)); michael@0: michael@0: YarrOp& lastOp = m_ops[lastOpIndex]; michael@0: YarrOp& thisOp = m_ops[thisOpIndex]; michael@0: michael@0: lastOp.m_alternative = nestedAlternative; michael@0: lastOp.m_nextOp = thisOpIndex; michael@0: thisOp.m_previousOp = lastOpIndex; michael@0: thisOp.m_term = term; michael@0: } michael@0: YarrOp& lastOp = m_ops.last(); michael@0: ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext); michael@0: lastOp.m_op = OpSimpleNestedAlternativeEnd; michael@0: lastOp.m_alternative = 0; michael@0: lastOp.m_nextOp = notFound; michael@0: michael@0: size_t parenEnd = m_ops.size(); michael@0: m_ops.append(OpParentheticalAssertionEnd); michael@0: michael@0: m_ops[parenBegin].m_term = term; michael@0: m_ops[parenBegin].m_previousOp = notFound; michael@0: m_ops[parenBegin].m_nextOp = parenEnd; michael@0: m_ops[parenEnd].m_term = term; michael@0: m_ops[parenEnd].m_previousOp = parenBegin; michael@0: m_ops[parenEnd].m_nextOp = notFound; michael@0: } michael@0: michael@0: // opCompileAlternative michael@0: // Called to emit nodes for all terms in an alternative. michael@0: void opCompileAlternative(PatternAlternative* alternative) michael@0: { michael@0: optimizeAlternative(alternative); michael@0: michael@0: for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { michael@0: PatternTerm* term = &alternative->m_terms[i]; michael@0: michael@0: switch (term->type) { michael@0: case PatternTerm::TypeParenthesesSubpattern: michael@0: opCompileParenthesesSubpattern(term); michael@0: break; michael@0: michael@0: case PatternTerm::TypeParentheticalAssertion: michael@0: opCompileParentheticalAssertion(term); michael@0: break; michael@0: michael@0: default: michael@0: m_ops.append(term); michael@0: } michael@0: } michael@0: } michael@0: michael@0: // opCompileBody michael@0: // This method compiles the body disjunction of the regular expression. michael@0: // The body consists of two sets of alternatives - zero or more 'once michael@0: // through' (BOL anchored) alternatives, followed by zero or more michael@0: // repeated alternatives. michael@0: // For each of these two sets of alteratives, if not empty they will be michael@0: // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the michael@0: // 'begin' node referencing the first alternative, and 'next' nodes michael@0: // referencing any further alternatives. The begin/next/end nodes are michael@0: // linked together in a doubly linked list. In the case of repeating michael@0: // alternatives, the end node is also linked back to the beginning. michael@0: // If no repeating alternatives exist, then a OpMatchFailed node exists michael@0: // to return the failing result. michael@0: void opCompileBody(PatternDisjunction* disjunction) michael@0: { michael@0: Vector& alternatives = disjunction->m_alternatives; michael@0: size_t currentAlternativeIndex = 0; michael@0: michael@0: // Emit the 'once through' alternatives. michael@0: if (alternatives.size() && alternatives[0]->onceThrough()) { michael@0: m_ops.append(YarrOp(OpBodyAlternativeBegin)); michael@0: m_ops.last().m_previousOp = notFound; michael@0: michael@0: do { michael@0: size_t lastOpIndex = m_ops.size() - 1; michael@0: PatternAlternative* alternative = alternatives[currentAlternativeIndex]; michael@0: opCompileAlternative(alternative); michael@0: michael@0: size_t thisOpIndex = m_ops.size(); michael@0: m_ops.append(YarrOp(OpBodyAlternativeNext)); michael@0: michael@0: YarrOp& lastOp = m_ops[lastOpIndex]; michael@0: YarrOp& thisOp = m_ops[thisOpIndex]; michael@0: michael@0: lastOp.m_alternative = alternative; michael@0: lastOp.m_nextOp = thisOpIndex; michael@0: thisOp.m_previousOp = lastOpIndex; michael@0: michael@0: ++currentAlternativeIndex; michael@0: } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough()); michael@0: michael@0: YarrOp& lastOp = m_ops.last(); michael@0: michael@0: ASSERT(lastOp.m_op == OpBodyAlternativeNext); michael@0: lastOp.m_op = OpBodyAlternativeEnd; michael@0: lastOp.m_alternative = 0; michael@0: lastOp.m_nextOp = notFound; michael@0: } michael@0: michael@0: if (currentAlternativeIndex == alternatives.size()) { michael@0: m_ops.append(YarrOp(OpMatchFailed)); michael@0: return; michael@0: } michael@0: michael@0: // Emit the repeated alternatives. michael@0: size_t repeatLoop = m_ops.size(); michael@0: m_ops.append(YarrOp(OpBodyAlternativeBegin)); michael@0: m_ops.last().m_previousOp = notFound; michael@0: do { michael@0: size_t lastOpIndex = m_ops.size() - 1; michael@0: PatternAlternative* alternative = alternatives[currentAlternativeIndex]; michael@0: ASSERT(!alternative->onceThrough()); michael@0: opCompileAlternative(alternative); michael@0: michael@0: size_t thisOpIndex = m_ops.size(); michael@0: m_ops.append(YarrOp(OpBodyAlternativeNext)); michael@0: michael@0: YarrOp& lastOp = m_ops[lastOpIndex]; michael@0: YarrOp& thisOp = m_ops[thisOpIndex]; michael@0: michael@0: lastOp.m_alternative = alternative; michael@0: lastOp.m_nextOp = thisOpIndex; michael@0: thisOp.m_previousOp = lastOpIndex; michael@0: michael@0: ++currentAlternativeIndex; michael@0: } while (currentAlternativeIndex < alternatives.size()); michael@0: YarrOp& lastOp = m_ops.last(); michael@0: ASSERT(lastOp.m_op == OpBodyAlternativeNext); michael@0: lastOp.m_op = OpBodyAlternativeEnd; michael@0: lastOp.m_alternative = 0; michael@0: lastOp.m_nextOp = repeatLoop; michael@0: } michael@0: michael@0: void generateEnter() michael@0: { michael@0: #if WTF_CPU_X86_64 michael@0: push(X86Registers::ebp); michael@0: move(stackPointerRegister, X86Registers::ebp); michael@0: push(X86Registers::ebx); michael@0: // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. michael@0: zeroExtend32ToPtr(index, index); michael@0: zeroExtend32ToPtr(length, length); michael@0: #elif WTF_CPU_X86 michael@0: push(X86Registers::ebp); michael@0: move(stackPointerRegister, X86Registers::ebp); michael@0: // TODO: do we need spill registers to fill the output pointer if there are no sub captures? michael@0: push(X86Registers::ebx); michael@0: push(X86Registers::edi); michael@0: push(X86Registers::esi); michael@0: // load output into edi (2 = saved ebp + return address). michael@0: # if WTF_COMPILER_MSVC || WTF_COMPILER_SUNCC michael@0: loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); michael@0: loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); michael@0: loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); michael@0: if (compileMode == IncludeSubpatterns) michael@0: loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); michael@0: # else michael@0: if (compileMode == IncludeSubpatterns) michael@0: loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); michael@0: # endif michael@0: #elif WTF_CPU_ARM michael@0: push(ARMRegisters::r4); michael@0: push(ARMRegisters::r5); michael@0: push(ARMRegisters::r6); michael@0: # if WTF_CPU_ARM_TRADITIONAL michael@0: push(ARMRegisters::r8); // scratch register michael@0: # endif michael@0: if (compileMode == IncludeSubpatterns) michael@0: move(ARMRegisters::r3, output); michael@0: #elif WTF_CPU_SH4 michael@0: push(SH4Registers::r11); michael@0: push(SH4Registers::r13); michael@0: #elif WTF_CPU_SPARC michael@0: save(Imm32(-m_pattern.m_body->m_callFrameSize * sizeof(void*))); michael@0: #elif WTF_CPU_MIPS michael@0: // Do nothing. michael@0: #endif michael@0: } michael@0: michael@0: void generateReturn() michael@0: { michael@0: #if WTF_CPU_X86_64 michael@0: pop(X86Registers::ebx); michael@0: pop(X86Registers::ebp); michael@0: #elif WTF_CPU_X86 michael@0: pop(X86Registers::esi); michael@0: pop(X86Registers::edi); michael@0: pop(X86Registers::ebx); michael@0: pop(X86Registers::ebp); michael@0: #elif WTF_CPU_ARM michael@0: # if WTF_CPU_ARM_TRADITIONAL michael@0: pop(ARMRegisters::r8); // scratch register michael@0: # endif michael@0: pop(ARMRegisters::r6); michael@0: pop(ARMRegisters::r5); michael@0: pop(ARMRegisters::r4); michael@0: #elif WTF_CPU_SH4 michael@0: pop(SH4Registers::r13); michael@0: pop(SH4Registers::r11); michael@0: #elif WTF_CPU_SPARC michael@0: ret_and_restore(); michael@0: return; michael@0: #elif WTF_CPU_MIPS michael@0: // Do nothing michael@0: #endif michael@0: ret(); michael@0: } michael@0: michael@0: public: michael@0: YarrGenerator(YarrPattern& pattern, YarrCharSize charSize) michael@0: : m_pattern(pattern) michael@0: , m_charSize(charSize) michael@0: , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo) michael@0: , m_shouldFallBack(false) michael@0: , m_checked(0) michael@0: { michael@0: } michael@0: michael@0: void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) michael@0: { michael@0: generateEnter(); michael@0: michael@0: Jump hasInput = checkInput(); michael@0: #if WTF_CPU_X86_64 michael@0: move(TrustedImm32(int(WTF::notFound)), returnRegister); michael@0: #else michael@0: move(TrustedImmPtr((void*)WTF::notFound), returnRegister); michael@0: move(TrustedImm32(0), returnRegister2); michael@0: #endif michael@0: generateReturn(); michael@0: hasInput.link(this); michael@0: michael@0: if (compileMode == IncludeSubpatterns) { michael@0: for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i) michael@0: store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int))); michael@0: } michael@0: michael@0: if (!m_pattern.m_body->m_hasFixedSize) michael@0: setMatchStart(index); michael@0: michael@0: initCallFrame(); michael@0: michael@0: // Compile the pattern to the internal 'YarrOp' representation. michael@0: opCompileBody(m_pattern.m_body); michael@0: michael@0: // If we encountered anything we can't handle in the JIT code michael@0: // (e.g. backreferences) then return early. michael@0: if (m_shouldFallBack) { michael@0: jitObject.setFallBack(true); michael@0: return; michael@0: } michael@0: michael@0: if (!generate() || !backtrack()) { michael@0: jitObject.setFallBack(true); michael@0: return; michael@0: } michael@0: michael@0: // Link & finalize the code. michael@0: ExecutablePool *pool; michael@0: bool ok; michael@0: LinkBuffer linkBuffer(this, globalData->regexAllocator, &pool, &ok, REGEXP_CODE); michael@0: michael@0: // Attempt to detect OOM during linkBuffer creation. michael@0: if (linkBuffer.unsafeCode() == nullptr) { michael@0: jitObject.setFallBack(true); michael@0: return; michael@0: } michael@0: michael@0: m_backtrackingState.linkDataLabels(linkBuffer); michael@0: michael@0: if (compileMode == MatchOnly) { michael@0: #if YARR_8BIT_CHAR_SUPPORT michael@0: if (m_charSize == Char8) michael@0: jitObject.set8BitCodeMatchOnly(linkBuffer.finalizeCode()); michael@0: else michael@0: #endif michael@0: jitObject.set16BitCodeMatchOnly(linkBuffer.finalizeCode()); michael@0: } else { michael@0: #if YARR_8BIT_CHAR_SUPPORT michael@0: if (m_charSize == Char8) michael@0: jitObject.set8BitCode(linkBuffer.finalizeCode()); michael@0: else michael@0: #endif michael@0: jitObject.set16BitCode(linkBuffer.finalizeCode()); michael@0: } michael@0: jitObject.setFallBack(m_shouldFallBack); michael@0: } michael@0: michael@0: private: michael@0: YarrPattern& m_pattern; michael@0: michael@0: YarrCharSize m_charSize; michael@0: michael@0: Scale m_charScale; michael@0: michael@0: // Used to detect regular expression constructs that are not currently michael@0: // supported in the JIT; fall back to the interpreter when this is detected. michael@0: bool m_shouldFallBack; michael@0: michael@0: // The regular expression expressed as a linear sequence of operations. michael@0: Vector m_ops; michael@0: michael@0: // This records the current input offset being applied due to the current michael@0: // set of alternatives we are nested within. E.g. when matching the michael@0: // character 'b' within the regular expression /abc/, we will know that michael@0: // the minimum size for the alternative is 3, checked upon entry to the michael@0: // alternative, and that 'b' is at offset 1 from the start, and as such michael@0: // when matching 'b' we need to apply an offset of -2 to the load. michael@0: // michael@0: // FIXME: This should go away. Rather than tracking this value throughout michael@0: // code generation, we should gather this information up front & store it michael@0: // on the YarrOp structure. michael@0: int m_checked; michael@0: michael@0: // This class records state whilst generating the backtracking path of code. michael@0: BacktrackingState m_backtrackingState; michael@0: }; michael@0: michael@0: void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject, YarrJITCompileMode mode) michael@0: { michael@0: if (mode == MatchOnly) michael@0: YarrGenerator(pattern, charSize).compile(globalData, jitObject); michael@0: else michael@0: YarrGenerator(pattern, charSize).compile(globalData, jitObject); michael@0: } michael@0: michael@0: }} michael@0: michael@0: #endif