michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * vim: set ts=8 sts=4 et sw=4 tw=99: michael@0: * michael@0: * Copyright (C) 2009 Apple Inc. All rights reserved. michael@0: * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged michael@0: * michael@0: * Redistribution and use in source and binary forms, with or without michael@0: * modification, are permitted provided that the following conditions michael@0: * are met: michael@0: * 1. Redistributions of source code must retain the above copyright michael@0: * notice, this list of conditions and the following disclaimer. michael@0: * 2. Redistributions in binary form must reproduce the above copyright michael@0: * notice, this list of conditions and the following disclaimer in the michael@0: * documentation and/or other materials provided with the distribution. michael@0: * michael@0: * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY michael@0: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE michael@0: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR michael@0: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR michael@0: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, michael@0: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, michael@0: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR michael@0: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY michael@0: * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: */ michael@0: michael@0: #include "yarr/YarrInterpreter.h" michael@0: michael@0: #include "jscntxt.h" michael@0: michael@0: #include "yarr/Yarr.h" michael@0: #include "yarr/YarrCanonicalizeUCS2.h" michael@0: #include "yarr/BumpPointerAllocator.h" michael@0: michael@0: using namespace WTF; michael@0: michael@0: namespace JSC { namespace Yarr { michael@0: michael@0: template michael@0: class Interpreter { michael@0: public: michael@0: struct ParenthesesDisjunctionContext; michael@0: michael@0: struct BackTrackInfoPatternCharacter { michael@0: uintptr_t matchAmount; michael@0: }; michael@0: struct BackTrackInfoCharacterClass { michael@0: uintptr_t matchAmount; michael@0: }; michael@0: struct BackTrackInfoBackReference { michael@0: uintptr_t begin; // Not really needed for greedy quantifiers. michael@0: uintptr_t matchAmount; // Not really needed for fixed quantifiers. michael@0: }; michael@0: struct BackTrackInfoAlternative { michael@0: uintptr_t offset; michael@0: }; michael@0: struct BackTrackInfoParentheticalAssertion { michael@0: uintptr_t begin; michael@0: }; michael@0: struct BackTrackInfoParenthesesOnce { michael@0: uintptr_t begin; michael@0: }; michael@0: struct BackTrackInfoParenthesesTerminal { michael@0: uintptr_t begin; michael@0: }; michael@0: struct BackTrackInfoParentheses { michael@0: uintptr_t matchAmount; michael@0: ParenthesesDisjunctionContext* lastContext; michael@0: }; michael@0: michael@0: static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context) michael@0: { michael@0: context->next = backTrack->lastContext; michael@0: backTrack->lastContext = context; michael@0: ++backTrack->matchAmount; michael@0: } michael@0: michael@0: static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack) michael@0: { michael@0: ASSERT(backTrack->matchAmount); michael@0: ASSERT(backTrack->lastContext); michael@0: backTrack->lastContext = backTrack->lastContext->next; michael@0: --backTrack->matchAmount; michael@0: } michael@0: michael@0: struct DisjunctionContext michael@0: { michael@0: DisjunctionContext() michael@0: : term(0) michael@0: { michael@0: } michael@0: michael@0: void* operator new(size_t, void* where) michael@0: { michael@0: return where; michael@0: } michael@0: michael@0: int term; michael@0: unsigned matchBegin; michael@0: unsigned matchEnd; michael@0: uintptr_t frame[1]; michael@0: }; michael@0: michael@0: DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction) michael@0: { michael@0: size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); michael@0: allocatorPool = allocatorPool->ensureCapacity(size); michael@0: if (!allocatorPool) michael@0: CRASH(); michael@0: return new (allocatorPool->alloc(size)) DisjunctionContext(); michael@0: } michael@0: michael@0: void freeDisjunctionContext(DisjunctionContext* context) michael@0: { michael@0: allocatorPool = allocatorPool->dealloc(context); michael@0: } michael@0: michael@0: struct ParenthesesDisjunctionContext michael@0: { michael@0: ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term) michael@0: : next(0) michael@0: { michael@0: unsigned firstSubpatternId = term.atom.subpatternId; michael@0: unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns; michael@0: michael@0: for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) { michael@0: subpatternBackup[i] = output[(firstSubpatternId << 1) + i]; michael@0: output[(firstSubpatternId << 1) + i] = offsetNoMatch; michael@0: } michael@0: michael@0: new (getDisjunctionContext(term)) DisjunctionContext(); michael@0: } michael@0: michael@0: void* operator new(size_t, void* where) michael@0: { michael@0: return where; michael@0: } michael@0: michael@0: void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns) michael@0: { michael@0: for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) michael@0: output[(firstSubpatternId << 1) + i] = subpatternBackup[i]; michael@0: } michael@0: michael@0: DisjunctionContext* getDisjunctionContext(ByteTerm& term) michael@0: { michael@0: return reinterpret_cast(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1])); michael@0: } michael@0: michael@0: ParenthesesDisjunctionContext* next; michael@0: unsigned subpatternBackup[1]; michael@0: }; michael@0: michael@0: ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) michael@0: { michael@0: size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); michael@0: size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext)); michael@0: allocatorPool = allocatorPool->ensureCapacity(size); michael@0: if (!allocatorPool) michael@0: CRASH(); michael@0: return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); michael@0: } michael@0: michael@0: void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context) michael@0: { michael@0: allocatorPool = allocatorPool->dealloc(context); michael@0: } michael@0: michael@0: class InputStream { michael@0: public: michael@0: InputStream(const CharType* input, unsigned start, unsigned length) michael@0: : input(input) michael@0: , pos(start) michael@0: , length(length) michael@0: { michael@0: } michael@0: michael@0: void next() michael@0: { michael@0: ++pos; michael@0: } michael@0: michael@0: void rewind(unsigned amount) michael@0: { michael@0: ASSERT(pos >= amount); michael@0: pos -= amount; michael@0: } michael@0: michael@0: int read() michael@0: { michael@0: ASSERT(pos < length); michael@0: if (pos < length) michael@0: return input[pos]; michael@0: return -1; michael@0: } michael@0: michael@0: int readPair() michael@0: { michael@0: ASSERT(pos + 1 < length); michael@0: return input[pos] | input[pos + 1] << 16; michael@0: } michael@0: michael@0: int readChecked(unsigned negativePositionOffest) michael@0: { michael@0: if (pos < negativePositionOffest) michael@0: CRASH(); michael@0: unsigned p = pos - negativePositionOffest; michael@0: ASSERT(p < length); michael@0: return input[p]; michael@0: } michael@0: michael@0: int reread(unsigned from) michael@0: { michael@0: ASSERT(from < length); michael@0: return input[from]; michael@0: } michael@0: michael@0: int prev() michael@0: { michael@0: ASSERT(!(pos > length)); michael@0: if (pos && length) michael@0: return input[pos - 1]; michael@0: return -1; michael@0: } michael@0: michael@0: unsigned getPos() michael@0: { michael@0: return pos; michael@0: } michael@0: michael@0: void setPos(unsigned p) michael@0: { michael@0: pos = p; michael@0: } michael@0: michael@0: bool atStart() michael@0: { michael@0: return pos == 0; michael@0: } michael@0: michael@0: bool atEnd() michael@0: { michael@0: return pos == length; michael@0: } michael@0: michael@0: unsigned end() michael@0: { michael@0: return length; michael@0: } michael@0: michael@0: bool checkInput(unsigned count) michael@0: { michael@0: if (((pos + count) <= length) && ((pos + count) >= pos)) { michael@0: pos += count; michael@0: return true; michael@0: } michael@0: return false; michael@0: } michael@0: michael@0: void uncheckInput(unsigned count) michael@0: { michael@0: if (pos < count) michael@0: CRASH(); michael@0: pos -= count; michael@0: } michael@0: michael@0: bool atStart(unsigned negativePositionOffest) michael@0: { michael@0: return pos == negativePositionOffest; michael@0: } michael@0: michael@0: bool atEnd(unsigned negativePositionOffest) michael@0: { michael@0: if (pos < negativePositionOffest) michael@0: CRASH(); michael@0: return (pos - negativePositionOffest) == length; michael@0: } michael@0: michael@0: bool isAvailableInput(unsigned offset) michael@0: { michael@0: return (((pos + offset) <= length) && ((pos + offset) >= pos)); michael@0: } michael@0: michael@0: private: michael@0: const CharType* input; michael@0: unsigned pos; michael@0: unsigned length; michael@0: }; michael@0: michael@0: bool testCharacterClass(CharacterClass* characterClass, int ch) michael@0: { michael@0: if (ch & 0xFF80) { michael@0: for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) michael@0: if (ch == characterClass->m_matchesUnicode[i]) michael@0: return true; michael@0: for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i) michael@0: if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end)) michael@0: return true; michael@0: } else { michael@0: for (unsigned i = 0; i < characterClass->m_matches.size(); ++i) michael@0: if (ch == characterClass->m_matches[i]) michael@0: return true; michael@0: for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i) michael@0: if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end)) michael@0: return true; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool checkCharacter(int testChar, unsigned negativeInputOffset) michael@0: { michael@0: return testChar == input.readChecked(negativeInputOffset); michael@0: } michael@0: michael@0: bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset) michael@0: { michael@0: int ch = input.readChecked(negativeInputOffset); michael@0: return (loChar == ch) || (hiChar == ch); michael@0: } michael@0: michael@0: bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset) michael@0: { michael@0: bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset)); michael@0: return invert ? !match : match; michael@0: } michael@0: michael@0: bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset) michael@0: { michael@0: unsigned matchSize = (unsigned)(matchEnd - matchBegin); michael@0: michael@0: if (!input.checkInput(matchSize)) michael@0: return false; michael@0: michael@0: if (pattern->m_ignoreCase) { michael@0: for (unsigned i = 0; i < matchSize; ++i) { michael@0: int oldCh = input.reread(matchBegin + i); michael@0: int ch = input.readChecked(negativeInputOffset + matchSize - i); michael@0: michael@0: if (oldCh == ch) michael@0: continue; michael@0: michael@0: // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that michael@0: // unicode values are never allowed to match against ascii ones. michael@0: if (isASCII(oldCh) || isASCII(ch)) { michael@0: if (toASCIIUpper(oldCh) == toASCIIUpper(ch)) michael@0: continue; michael@0: } else if (areCanonicallyEquivalent(oldCh, ch)) michael@0: continue; michael@0: michael@0: input.uncheckInput(matchSize); michael@0: return false; michael@0: } michael@0: } else { michael@0: for (unsigned i = 0; i < matchSize; ++i) { michael@0: if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) { michael@0: input.uncheckInput(matchSize); michael@0: return false; michael@0: } michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool matchAssertionBOL(ByteTerm& term) michael@0: { michael@0: return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); michael@0: } michael@0: michael@0: bool matchAssertionEOL(ByteTerm& term) michael@0: { michael@0: if (term.inputPosition) michael@0: return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); michael@0: michael@0: return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read())); michael@0: } michael@0: michael@0: bool matchAssertionWordBoundary(ByteTerm& term) michael@0: { michael@0: bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1)); michael@0: bool readIsWordchar; michael@0: if (term.inputPosition) michael@0: readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition)); michael@0: else michael@0: readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read()); michael@0: michael@0: bool wordBoundary = prevIsWordchar != readIsWordchar; michael@0: return term.invert() ? !wordBoundary : wordBoundary; michael@0: } michael@0: michael@0: bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierFixedCount: michael@0: break; michael@0: michael@0: case QuantifierGreedy: michael@0: if (backTrack->matchAmount) { michael@0: --backTrack->matchAmount; michael@0: input.uncheckInput(1); michael@0: return true; michael@0: } michael@0: break; michael@0: michael@0: case QuantifierNonGreedy: michael@0: if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { michael@0: ++backTrack->matchAmount; michael@0: if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1)) michael@0: return true; michael@0: } michael@0: input.uncheckInput(backTrack->matchAmount); michael@0: break; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierFixedCount: michael@0: break; michael@0: michael@0: case QuantifierGreedy: michael@0: if (backTrack->matchAmount) { michael@0: --backTrack->matchAmount; michael@0: input.uncheckInput(1); michael@0: return true; michael@0: } michael@0: break; michael@0: michael@0: case QuantifierNonGreedy: michael@0: if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { michael@0: ++backTrack->matchAmount; michael@0: if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1)) michael@0: return true; michael@0: } michael@0: input.uncheckInput(backTrack->matchAmount); michael@0: break; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeCharacterClass); michael@0: BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierFixedCount: { michael@0: for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { michael@0: if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) michael@0: return false; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: case QuantifierGreedy: { michael@0: unsigned matchAmount = 0; michael@0: while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { michael@0: if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { michael@0: input.uncheckInput(1); michael@0: break; michael@0: } michael@0: ++matchAmount; michael@0: } michael@0: backTrack->matchAmount = matchAmount; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: case QuantifierNonGreedy: michael@0: backTrack->matchAmount = 0; michael@0: return true; michael@0: } michael@0: michael@0: ASSERT_NOT_REACHED(); michael@0: return false; michael@0: } michael@0: michael@0: bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeCharacterClass); michael@0: BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierFixedCount: michael@0: break; michael@0: michael@0: case QuantifierGreedy: michael@0: if (backTrack->matchAmount) { michael@0: --backTrack->matchAmount; michael@0: input.uncheckInput(1); michael@0: return true; michael@0: } michael@0: break; michael@0: michael@0: case QuantifierNonGreedy: michael@0: if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { michael@0: ++backTrack->matchAmount; michael@0: if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) michael@0: return true; michael@0: } michael@0: input.uncheckInput(backTrack->matchAmount); michael@0: break; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool matchBackReference(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeBackReference); michael@0: BackTrackInfoBackReference* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: unsigned matchBegin = output[(term.atom.subpatternId << 1)]; michael@0: unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1]; michael@0: michael@0: // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that. michael@0: // In this case the result of match is empty string like when it references to a parentheses with zero-width match. michael@0: // Eg.: /(a\1)/ michael@0: if (matchEnd == offsetNoMatch) michael@0: return true; michael@0: michael@0: if (matchBegin == offsetNoMatch) michael@0: return true; michael@0: michael@0: ASSERT(matchBegin <= matchEnd); michael@0: michael@0: if (matchBegin == matchEnd) michael@0: return true; michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierFixedCount: { michael@0: backTrack->begin = input.getPos(); michael@0: for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { michael@0: if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { michael@0: input.setPos(backTrack->begin); michael@0: return false; michael@0: } michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: case QuantifierGreedy: { michael@0: unsigned matchAmount = 0; michael@0: while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) michael@0: ++matchAmount; michael@0: backTrack->matchAmount = matchAmount; michael@0: return true; michael@0: } michael@0: michael@0: case QuantifierNonGreedy: michael@0: backTrack->begin = input.getPos(); michael@0: backTrack->matchAmount = 0; michael@0: return true; michael@0: } michael@0: michael@0: ASSERT_NOT_REACHED(); michael@0: return false; michael@0: } michael@0: michael@0: bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeBackReference); michael@0: BackTrackInfoBackReference* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: unsigned matchBegin = output[(term.atom.subpatternId << 1)]; michael@0: unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1]; michael@0: michael@0: if (matchBegin == offsetNoMatch) michael@0: return false; michael@0: michael@0: ASSERT(matchBegin <= matchEnd); michael@0: michael@0: if (matchBegin == matchEnd) michael@0: return false; michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierFixedCount: michael@0: // for quantityCount == 1, could rewind. michael@0: input.setPos(backTrack->begin); michael@0: break; michael@0: michael@0: case QuantifierGreedy: michael@0: if (backTrack->matchAmount) { michael@0: --backTrack->matchAmount; michael@0: input.rewind(matchEnd - matchBegin); michael@0: return true; michael@0: } michael@0: break; michael@0: michael@0: case QuantifierNonGreedy: michael@0: if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { michael@0: ++backTrack->matchAmount; michael@0: return true; michael@0: } michael@0: input.setPos(backTrack->begin); michael@0: break; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context) michael@0: { michael@0: if (term.capture()) { michael@0: unsigned subpatternId = term.atom.subpatternId; michael@0: output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; michael@0: output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; michael@0: } michael@0: } michael@0: void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) michael@0: { michael@0: unsigned firstSubpatternId = term.atom.subpatternId; michael@0: unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; michael@0: context->restoreOutput(output, firstSubpatternId, count); michael@0: } michael@0: JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) michael@0: { michael@0: while (backTrack->matchAmount) { michael@0: ParenthesesDisjunctionContext* context = backTrack->lastContext; michael@0: michael@0: JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true); michael@0: if (result == JSRegExpMatch) michael@0: return JSRegExpMatch; michael@0: michael@0: resetMatches(term, context); michael@0: popParenthesesDisjunctionContext(backTrack); michael@0: freeParenthesesDisjunctionContext(context); michael@0: michael@0: if (result != JSRegExpNoMatch) michael@0: return result; michael@0: } michael@0: michael@0: return JSRegExpNoMatch; michael@0: } michael@0: michael@0: bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); michael@0: ASSERT(term.atom.quantityCount == 1); michael@0: michael@0: BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierGreedy: { michael@0: // set this speculatively; if we get to the parens end this will be true. michael@0: backTrack->begin = input.getPos(); michael@0: break; michael@0: } michael@0: case QuantifierNonGreedy: { michael@0: backTrack->begin = notFound; michael@0: context->term += term.atom.parenthesesWidth; michael@0: return true; michael@0: } michael@0: case QuantifierFixedCount: michael@0: break; michael@0: } michael@0: michael@0: if (term.capture()) { michael@0: unsigned subpatternId = term.atom.subpatternId; michael@0: output[(subpatternId << 1)] = input.getPos() - term.inputPosition; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); michael@0: ASSERT(term.atom.quantityCount == 1); michael@0: michael@0: if (term.capture()) { michael@0: unsigned subpatternId = term.atom.subpatternId; michael@0: output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; michael@0: } michael@0: michael@0: if (term.atom.quantityType == QuantifierFixedCount) michael@0: return true; michael@0: michael@0: BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: return backTrack->begin != input.getPos(); michael@0: } michael@0: michael@0: bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); michael@0: ASSERT(term.atom.quantityCount == 1); michael@0: michael@0: BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: if (term.capture()) { michael@0: unsigned subpatternId = term.atom.subpatternId; michael@0: output[(subpatternId << 1)] = offsetNoMatch; michael@0: output[(subpatternId << 1) + 1] = offsetNoMatch; michael@0: } michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierGreedy: michael@0: // if we backtrack to this point, there is another chance - try matching nothing. michael@0: ASSERT(backTrack->begin != notFound); michael@0: backTrack->begin = notFound; michael@0: context->term += term.atom.parenthesesWidth; michael@0: return true; michael@0: case QuantifierNonGreedy: michael@0: ASSERT(backTrack->begin != notFound); michael@0: case QuantifierFixedCount: michael@0: break; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); michael@0: ASSERT(term.atom.quantityCount == 1); michael@0: michael@0: BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierGreedy: michael@0: if (backTrack->begin == notFound) { michael@0: context->term -= term.atom.parenthesesWidth; michael@0: return false; michael@0: } michael@0: case QuantifierNonGreedy: michael@0: if (backTrack->begin == notFound) { michael@0: backTrack->begin = input.getPos(); michael@0: if (term.capture()) { michael@0: // Technically this access to inputPosition should be accessing the begin term's michael@0: // inputPosition, but for repeats other than fixed these values should be michael@0: // the same anyway! (We don't pre-check for greedy or non-greedy matches.) michael@0: ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin); michael@0: michael@0: // Disabled, see bug 808478 michael@0: #if 0 michael@0: ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition); michael@0: #endif michael@0: unsigned subpatternId = term.atom.subpatternId; michael@0: output[subpatternId << 1] = input.getPos() + term.inputPosition; michael@0: } michael@0: context->term -= term.atom.parenthesesWidth; michael@0: return true; michael@0: } michael@0: case QuantifierFixedCount: michael@0: break; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); michael@0: ASSERT(term.atom.quantityType == QuantifierGreedy); michael@0: ASSERT(term.atom.quantityCount == quantifyInfinite); michael@0: ASSERT(!term.capture()); michael@0: michael@0: BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: backTrack->begin = input.getPos(); michael@0: return true; michael@0: } michael@0: michael@0: bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd); michael@0: michael@0: BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: // Empty match is a failed match. michael@0: if (backTrack->begin == input.getPos()) michael@0: return false; michael@0: michael@0: // Successful match! Okay, what's next? - loop around and try to match moar! michael@0: context->term -= (term.atom.parenthesesWidth + 1); michael@0: return true; michael@0: } michael@0: michael@0: bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); michael@0: ASSERT(term.atom.quantityType == QuantifierGreedy); michael@0: ASSERT(term.atom.quantityCount == quantifyInfinite); michael@0: ASSERT(!term.capture()); michael@0: michael@0: // If we backtrack to this point, we have failed to match this iteration of the parens. michael@0: // Since this is greedy / zero minimum a failed is also accepted as a match! michael@0: context->term += term.atom.parenthesesWidth; michael@0: return true; michael@0: } michael@0: michael@0: bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*) michael@0: { michael@0: // 'Terminal' parentheses are at the end of the regex, and as such a match past end michael@0: // should always be returned as a successful match - we should never backtrack to here. michael@0: ASSERT_NOT_REACHED(); michael@0: return false; michael@0: } michael@0: michael@0: bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); michael@0: ASSERT(term.atom.quantityCount == 1); michael@0: michael@0: BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: backTrack->begin = input.getPos(); michael@0: return true; michael@0: } michael@0: michael@0: bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); michael@0: ASSERT(term.atom.quantityCount == 1); michael@0: michael@0: BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: input.setPos(backTrack->begin); michael@0: michael@0: // We've reached the end of the parens; if they are inverted, this is failure. michael@0: if (term.invert()) { michael@0: context->term -= term.atom.parenthesesWidth; michael@0: return false; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); michael@0: ASSERT(term.atom.quantityCount == 1); michael@0: michael@0: // We've failed to match parens; if they are inverted, this is win! michael@0: if (term.invert()) { michael@0: context->term += term.atom.parenthesesWidth; michael@0: return true; michael@0: } michael@0: michael@0: return false; michael@0: } michael@0: michael@0: bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); michael@0: ASSERT(term.atom.quantityCount == 1); michael@0: michael@0: BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: michael@0: input.setPos(backTrack->begin); michael@0: michael@0: context->term -= term.atom.parenthesesWidth; michael@0: return false; michael@0: } michael@0: michael@0: JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); michael@0: michael@0: BackTrackInfoParentheses* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; michael@0: michael@0: backTrack->matchAmount = 0; michael@0: backTrack->lastContext = 0; michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierFixedCount: { michael@0: // While we haven't yet reached our fixed limit, michael@0: while (backTrack->matchAmount < term.atom.quantityCount) { michael@0: // Try to do a match, and it it succeeds, add it to the list. michael@0: ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); michael@0: JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); michael@0: if (result == JSRegExpMatch) michael@0: appendParenthesesDisjunctionContext(backTrack, context); michael@0: else { michael@0: // The match failed; try to find an alternate point to carry on from. michael@0: resetMatches(term, context); michael@0: freeParenthesesDisjunctionContext(context); michael@0: michael@0: if (result != JSRegExpNoMatch) michael@0: return result; michael@0: JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); michael@0: if (backtrackResult != JSRegExpMatch) michael@0: return backtrackResult; michael@0: } michael@0: } michael@0: michael@0: ASSERT(backTrack->matchAmount == term.atom.quantityCount); michael@0: ParenthesesDisjunctionContext* context = backTrack->lastContext; michael@0: recordParenthesesMatch(term, context); michael@0: return JSRegExpMatch; michael@0: } michael@0: michael@0: case QuantifierGreedy: { michael@0: while (backTrack->matchAmount < term.atom.quantityCount) { michael@0: ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); michael@0: JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); michael@0: if (result == JSRegExpMatch) michael@0: appendParenthesesDisjunctionContext(backTrack, context); michael@0: else { michael@0: resetMatches(term, context); michael@0: freeParenthesesDisjunctionContext(context); michael@0: michael@0: if (result != JSRegExpNoMatch) michael@0: return result; michael@0: michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (backTrack->matchAmount) { michael@0: ParenthesesDisjunctionContext* context = backTrack->lastContext; michael@0: recordParenthesesMatch(term, context); michael@0: } michael@0: return JSRegExpMatch; michael@0: } michael@0: michael@0: case QuantifierNonGreedy: michael@0: return JSRegExpMatch; michael@0: } michael@0: michael@0: ASSERT_NOT_REACHED(); michael@0: return JSRegExpErrorNoMatch; michael@0: } michael@0: michael@0: // Rules for backtracking differ depending on whether this is greedy or non-greedy. michael@0: // michael@0: // Greedy matches never should try just adding more - you should already have done michael@0: // the 'more' cases. Always backtrack, at least a leetle bit. However cases where michael@0: // you backtrack an item off the list needs checking, since we'll never have matched michael@0: // the one less case. Tracking forwards, still add as much as possible. michael@0: // michael@0: // Non-greedy, we've already done the one less case, so don't match on popping. michael@0: // We haven't done the one more case, so always try to add that. michael@0: // michael@0: JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); michael@0: michael@0: BackTrackInfoParentheses* backTrack = reinterpret_cast(context->frame + term.frameLocation); michael@0: ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; michael@0: michael@0: switch (term.atom.quantityType) { michael@0: case QuantifierFixedCount: { michael@0: ASSERT(backTrack->matchAmount == term.atom.quantityCount); michael@0: michael@0: ParenthesesDisjunctionContext* context = 0; michael@0: JSRegExpResult result = parenthesesDoBacktrack(term, backTrack); michael@0: michael@0: if (result != JSRegExpMatch) michael@0: return result; michael@0: michael@0: // While we haven't yet reached our fixed limit, michael@0: while (backTrack->matchAmount < term.atom.quantityCount) { michael@0: // Try to do a match, and it it succeeds, add it to the list. michael@0: context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); michael@0: result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); michael@0: michael@0: if (result == JSRegExpMatch) michael@0: appendParenthesesDisjunctionContext(backTrack, context); michael@0: else { michael@0: // The match failed; try to find an alternate point to carry on from. michael@0: resetMatches(term, context); michael@0: freeParenthesesDisjunctionContext(context); michael@0: michael@0: if (result != JSRegExpNoMatch) michael@0: return result; michael@0: JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); michael@0: if (backtrackResult != JSRegExpMatch) michael@0: return backtrackResult; michael@0: } michael@0: } michael@0: michael@0: ASSERT(backTrack->matchAmount == term.atom.quantityCount); michael@0: context = backTrack->lastContext; michael@0: recordParenthesesMatch(term, context); michael@0: return JSRegExpMatch; michael@0: } michael@0: michael@0: case QuantifierGreedy: { michael@0: if (!backTrack->matchAmount) michael@0: return JSRegExpNoMatch; michael@0: michael@0: ParenthesesDisjunctionContext* context = backTrack->lastContext; michael@0: JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); michael@0: if (result == JSRegExpMatch) { michael@0: while (backTrack->matchAmount < term.atom.quantityCount) { michael@0: ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); michael@0: JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); michael@0: if (parenthesesResult == JSRegExpMatch) michael@0: appendParenthesesDisjunctionContext(backTrack, context); michael@0: else { michael@0: resetMatches(term, context); michael@0: freeParenthesesDisjunctionContext(context); michael@0: michael@0: if (parenthesesResult != JSRegExpNoMatch) michael@0: return parenthesesResult; michael@0: michael@0: break; michael@0: } michael@0: } michael@0: } else { michael@0: // Avoid a topcrash before it occurs. michael@0: if (!backTrack->lastContext) { michael@0: ASSERT(!"Tripped Bug 856796!"); michael@0: return JSRegExpErrorInternal; michael@0: } michael@0: michael@0: resetMatches(term, context); michael@0: popParenthesesDisjunctionContext(backTrack); michael@0: freeParenthesesDisjunctionContext(context); michael@0: michael@0: if (result != JSRegExpNoMatch) michael@0: return result; michael@0: } michael@0: michael@0: if (backTrack->matchAmount) { michael@0: ParenthesesDisjunctionContext* context = backTrack->lastContext; michael@0: recordParenthesesMatch(term, context); michael@0: } michael@0: return JSRegExpMatch; michael@0: } michael@0: michael@0: case QuantifierNonGreedy: { michael@0: // If we've not reached the limit, try to add one more match. michael@0: if (backTrack->matchAmount < term.atom.quantityCount) { michael@0: ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); michael@0: JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); michael@0: if (result == JSRegExpMatch) { michael@0: appendParenthesesDisjunctionContext(backTrack, context); michael@0: recordParenthesesMatch(term, context); michael@0: return JSRegExpMatch; michael@0: } michael@0: michael@0: resetMatches(term, context); michael@0: freeParenthesesDisjunctionContext(context); michael@0: michael@0: if (result != JSRegExpNoMatch) michael@0: return result; michael@0: } michael@0: michael@0: // Nope - okay backtrack looking for an alternative. michael@0: while (backTrack->matchAmount) { michael@0: ParenthesesDisjunctionContext* context = backTrack->lastContext; michael@0: JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); michael@0: if (result == JSRegExpMatch) { michael@0: // successful backtrack! we're back in the game! michael@0: if (backTrack->matchAmount) { michael@0: context = backTrack->lastContext; michael@0: recordParenthesesMatch(term, context); michael@0: } michael@0: return JSRegExpMatch; michael@0: } michael@0: michael@0: // Avoid a topcrash before it occurs. michael@0: if (!backTrack->lastContext) { michael@0: ASSERT(!"Tripped Bug 856796!"); michael@0: return JSRegExpErrorInternal; michael@0: } michael@0: michael@0: // pop a match off the stack michael@0: resetMatches(term, context); michael@0: popParenthesesDisjunctionContext(backTrack); michael@0: freeParenthesesDisjunctionContext(context); michael@0: michael@0: if (result != JSRegExpNoMatch) michael@0: return result; michael@0: } michael@0: michael@0: return JSRegExpNoMatch; michael@0: } michael@0: } michael@0: michael@0: ASSERT_NOT_REACHED(); michael@0: return JSRegExpErrorNoMatch; michael@0: } michael@0: michael@0: bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context) michael@0: { michael@0: UNUSED_PARAM(term); michael@0: unsigned matchBegin = context->matchBegin; michael@0: michael@0: if (matchBegin) { michael@0: for (matchBegin--; true; matchBegin--) { michael@0: if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) { michael@0: ++matchBegin; michael@0: break; michael@0: } michael@0: michael@0: if (!matchBegin) michael@0: break; michael@0: } michael@0: } michael@0: michael@0: unsigned matchEnd = input.getPos(); michael@0: michael@0: for (; (matchEnd != input.end()) michael@0: && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { } michael@0: michael@0: if (((matchBegin && term.anchors.m_bol) michael@0: || ((matchEnd != input.end()) && term.anchors.m_eol)) michael@0: && !pattern->m_multiline) michael@0: return false; michael@0: michael@0: context->matchBegin = matchBegin; michael@0: context->matchEnd = matchEnd; michael@0: return true; michael@0: } michael@0: michael@0: #define MATCH_NEXT() { ++context->term; goto matchAgain; } michael@0: #define BACKTRACK() { --context->term; goto backtrack; } michael@0: #define currentTerm() (disjunction->terms[context->term]) michael@0: JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) michael@0: { michael@0: if (!--remainingMatchCount) michael@0: return JSRegExpErrorHitLimit; michael@0: michael@0: if (btrack) michael@0: BACKTRACK(); michael@0: michael@0: context->matchBegin = input.getPos(); michael@0: context->term = 0; michael@0: michael@0: matchAgain: michael@0: ASSERT(context->term < static_cast(disjunction->terms.size())); michael@0: michael@0: // Prevent jank resulting from getting stuck in Yarr for a long time. michael@0: if (!CheckForInterrupt(this->cx)) michael@0: return JSRegExpErrorInternal; michael@0: michael@0: switch (currentTerm().type) { michael@0: case ByteTerm::TypeSubpatternBegin: michael@0: MATCH_NEXT(); michael@0: case ByteTerm::TypeSubpatternEnd: michael@0: context->matchEnd = input.getPos(); michael@0: return JSRegExpMatch; michael@0: michael@0: case ByteTerm::TypeBodyAlternativeBegin: michael@0: MATCH_NEXT(); michael@0: case ByteTerm::TypeBodyAlternativeDisjunction: michael@0: case ByteTerm::TypeBodyAlternativeEnd: michael@0: context->matchEnd = input.getPos(); michael@0: return JSRegExpMatch; michael@0: michael@0: case ByteTerm::TypeAlternativeBegin: michael@0: MATCH_NEXT(); michael@0: case ByteTerm::TypeAlternativeDisjunction: michael@0: case ByteTerm::TypeAlternativeEnd: { michael@0: int offset = currentTerm().alternative.end; michael@0: BackTrackInfoAlternative* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); michael@0: backTrack->offset = offset; michael@0: context->term += offset; michael@0: MATCH_NEXT(); michael@0: } michael@0: michael@0: case ByteTerm::TypeAssertionBOL: michael@0: if (matchAssertionBOL(currentTerm())) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeAssertionEOL: michael@0: if (matchAssertionEOL(currentTerm())) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeAssertionWordBoundary: michael@0: if (matchAssertionWordBoundary(currentTerm())) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: michael@0: case ByteTerm::TypePatternCharacterOnce: michael@0: case ByteTerm::TypePatternCharacterFixed: { michael@0: for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { michael@0: if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) michael@0: BACKTRACK(); michael@0: } michael@0: MATCH_NEXT(); michael@0: } michael@0: case ByteTerm::TypePatternCharacterGreedy: { michael@0: BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); michael@0: unsigned matchAmount = 0; michael@0: while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { michael@0: if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) { michael@0: input.uncheckInput(1); michael@0: break; michael@0: } michael@0: ++matchAmount; michael@0: } michael@0: backTrack->matchAmount = matchAmount; michael@0: michael@0: MATCH_NEXT(); michael@0: } michael@0: case ByteTerm::TypePatternCharacterNonGreedy: { michael@0: BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); michael@0: backTrack->matchAmount = 0; michael@0: MATCH_NEXT(); michael@0: } michael@0: michael@0: case ByteTerm::TypePatternCasedCharacterOnce: michael@0: case ByteTerm::TypePatternCasedCharacterFixed: { michael@0: for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { michael@0: if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) michael@0: BACKTRACK(); michael@0: } michael@0: MATCH_NEXT(); michael@0: } michael@0: case ByteTerm::TypePatternCasedCharacterGreedy: { michael@0: BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); michael@0: unsigned matchAmount = 0; michael@0: while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { michael@0: if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) { michael@0: input.uncheckInput(1); michael@0: break; michael@0: } michael@0: ++matchAmount; michael@0: } michael@0: backTrack->matchAmount = matchAmount; michael@0: michael@0: MATCH_NEXT(); michael@0: } michael@0: case ByteTerm::TypePatternCasedCharacterNonGreedy: { michael@0: BackTrackInfoPatternCharacter* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); michael@0: backTrack->matchAmount = 0; michael@0: MATCH_NEXT(); michael@0: } michael@0: michael@0: case ByteTerm::TypeCharacterClass: michael@0: if (matchCharacterClass(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeBackReference: michael@0: if (matchBackReference(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParenthesesSubpattern: { michael@0: JSRegExpResult result = matchParentheses(currentTerm(), context); michael@0: michael@0: if (result == JSRegExpMatch) { michael@0: MATCH_NEXT(); michael@0: } else if (result != JSRegExpNoMatch) michael@0: return result; michael@0: michael@0: BACKTRACK(); michael@0: } michael@0: case ByteTerm::TypeParenthesesSubpatternOnceBegin: michael@0: if (matchParenthesesOnceBegin(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParenthesesSubpatternOnceEnd: michael@0: if (matchParenthesesOnceEnd(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParenthesesSubpatternTerminalBegin: michael@0: if (matchParenthesesTerminalBegin(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParenthesesSubpatternTerminalEnd: michael@0: if (matchParenthesesTerminalEnd(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParentheticalAssertionBegin: michael@0: if (matchParentheticalAssertionBegin(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParentheticalAssertionEnd: michael@0: if (matchParentheticalAssertionEnd(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: michael@0: case ByteTerm::TypeCheckInput: michael@0: if (input.checkInput(currentTerm().checkInputCount)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: michael@0: case ByteTerm::TypeUncheckInput: michael@0: input.uncheckInput(currentTerm().checkInputCount); michael@0: MATCH_NEXT(); michael@0: michael@0: case ByteTerm::TypeDotStarEnclosure: michael@0: if (matchDotStarEnclosure(currentTerm(), context)) michael@0: return JSRegExpMatch; michael@0: BACKTRACK(); michael@0: } michael@0: michael@0: // We should never fall-through to here. michael@0: ASSERT_NOT_REACHED(); michael@0: michael@0: backtrack: michael@0: ASSERT(context->term < static_cast(disjunction->terms.size())); michael@0: michael@0: // Prevent jank resulting from getting stuck in Yarr for a long time. michael@0: if (!CheckForInterrupt(this->cx)) michael@0: return JSRegExpErrorInternal; michael@0: michael@0: switch (currentTerm().type) { michael@0: case ByteTerm::TypeSubpatternBegin: michael@0: return JSRegExpNoMatch; michael@0: case ByteTerm::TypeSubpatternEnd: michael@0: ASSERT_NOT_REACHED(); michael@0: michael@0: case ByteTerm::TypeBodyAlternativeBegin: michael@0: case ByteTerm::TypeBodyAlternativeDisjunction: { michael@0: int offset = currentTerm().alternative.next; michael@0: context->term += offset; michael@0: if (offset > 0) michael@0: MATCH_NEXT(); michael@0: michael@0: if (input.atEnd()) michael@0: return JSRegExpNoMatch; michael@0: michael@0: input.next(); michael@0: michael@0: context->matchBegin = input.getPos(); michael@0: michael@0: if (currentTerm().alternative.onceThrough) michael@0: context->term += currentTerm().alternative.next; michael@0: michael@0: MATCH_NEXT(); michael@0: } michael@0: case ByteTerm::TypeBodyAlternativeEnd: michael@0: ASSERT_NOT_REACHED(); michael@0: michael@0: case ByteTerm::TypeAlternativeBegin: michael@0: case ByteTerm::TypeAlternativeDisjunction: { michael@0: int offset = currentTerm().alternative.next; michael@0: context->term += offset; michael@0: if (offset > 0) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: } michael@0: case ByteTerm::TypeAlternativeEnd: { michael@0: // We should never backtrack back into an alternative of the main body of the regex. michael@0: BackTrackInfoAlternative* backTrack = reinterpret_cast(context->frame + currentTerm().frameLocation); michael@0: unsigned offset = backTrack->offset; michael@0: context->term -= offset; michael@0: BACKTRACK(); michael@0: } michael@0: michael@0: case ByteTerm::TypeAssertionBOL: michael@0: case ByteTerm::TypeAssertionEOL: michael@0: case ByteTerm::TypeAssertionWordBoundary: michael@0: BACKTRACK(); michael@0: michael@0: case ByteTerm::TypePatternCharacterOnce: michael@0: case ByteTerm::TypePatternCharacterFixed: michael@0: case ByteTerm::TypePatternCharacterGreedy: michael@0: case ByteTerm::TypePatternCharacterNonGreedy: michael@0: if (backtrackPatternCharacter(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypePatternCasedCharacterOnce: michael@0: case ByteTerm::TypePatternCasedCharacterFixed: michael@0: case ByteTerm::TypePatternCasedCharacterGreedy: michael@0: case ByteTerm::TypePatternCasedCharacterNonGreedy: michael@0: if (backtrackPatternCasedCharacter(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeCharacterClass: michael@0: if (backtrackCharacterClass(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeBackReference: michael@0: if (backtrackBackReference(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParenthesesSubpattern: { michael@0: JSRegExpResult result = backtrackParentheses(currentTerm(), context); michael@0: michael@0: if (result == JSRegExpMatch) { michael@0: MATCH_NEXT(); michael@0: } else if (result != JSRegExpNoMatch) michael@0: return result; michael@0: michael@0: BACKTRACK(); michael@0: } michael@0: case ByteTerm::TypeParenthesesSubpatternOnceBegin: michael@0: if (backtrackParenthesesOnceBegin(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParenthesesSubpatternOnceEnd: michael@0: if (backtrackParenthesesOnceEnd(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParenthesesSubpatternTerminalBegin: michael@0: if (backtrackParenthesesTerminalBegin(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParenthesesSubpatternTerminalEnd: michael@0: if (backtrackParenthesesTerminalEnd(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParentheticalAssertionBegin: michael@0: if (backtrackParentheticalAssertionBegin(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: case ByteTerm::TypeParentheticalAssertionEnd: michael@0: if (backtrackParentheticalAssertionEnd(currentTerm(), context)) michael@0: MATCH_NEXT(); michael@0: BACKTRACK(); michael@0: michael@0: case ByteTerm::TypeCheckInput: michael@0: input.uncheckInput(currentTerm().checkInputCount); michael@0: BACKTRACK(); michael@0: michael@0: case ByteTerm::TypeUncheckInput: michael@0: input.checkInput(currentTerm().checkInputCount); michael@0: BACKTRACK(); michael@0: michael@0: case ByteTerm::TypeDotStarEnclosure: michael@0: ASSERT_NOT_REACHED(); michael@0: } michael@0: michael@0: ASSERT_NOT_REACHED(); michael@0: return JSRegExpErrorNoMatch; michael@0: } michael@0: michael@0: JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) michael@0: { michael@0: JSRegExpResult result = matchDisjunction(disjunction, context, btrack); michael@0: michael@0: if (result == JSRegExpMatch) { michael@0: while (context->matchBegin == context->matchEnd) { michael@0: result = matchDisjunction(disjunction, context, true); michael@0: if (result != JSRegExpMatch) michael@0: return result; michael@0: } michael@0: return JSRegExpMatch; michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: unsigned interpret() michael@0: { michael@0: if (!input.isAvailableInput(0)) michael@0: return offsetNoMatch; michael@0: michael@0: for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i) michael@0: output[i << 1] = offsetNoMatch; michael@0: michael@0: allocatorPool = pattern->m_allocator->startAllocator(); michael@0: if (!allocatorPool) michael@0: CRASH(); michael@0: michael@0: DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get()); michael@0: michael@0: JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false); michael@0: if (result == JSRegExpMatch) { michael@0: output[0] = context->matchBegin; michael@0: output[1] = context->matchEnd; michael@0: } michael@0: michael@0: freeDisjunctionContext(context); michael@0: michael@0: pattern->m_allocator->stopAllocator(); michael@0: michael@0: ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch)); michael@0: return output[0]; michael@0: } michael@0: michael@0: Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start) michael@0: : cx(cx) michael@0: , pattern(pattern) michael@0: , output(output) michael@0: , input(input, start, length) michael@0: , allocatorPool(0) michael@0: , remainingMatchCount(matchLimit) michael@0: { michael@0: } michael@0: michael@0: private: michael@0: JSContext *cx; michael@0: BytecodePattern* pattern; michael@0: unsigned* output; michael@0: InputStream input; michael@0: BumpPointerPool* allocatorPool; michael@0: unsigned remainingMatchCount; michael@0: }; michael@0: michael@0: michael@0: michael@0: class ByteCompiler { michael@0: struct ParenthesesStackEntry { michael@0: unsigned beginTerm; michael@0: unsigned savedAlternativeIndex; michael@0: michael@0: // For js::Vector. Does not create a valid object. michael@0: ParenthesesStackEntry() { } michael@0: michael@0: ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/) michael@0: : beginTerm(beginTerm) michael@0: , savedAlternativeIndex(savedAlternativeIndex) michael@0: { michael@0: } michael@0: }; michael@0: michael@0: public: michael@0: ByteCompiler(YarrPattern& pattern) michael@0: : m_pattern(pattern) michael@0: { michael@0: m_currentAlternativeIndex = 0; michael@0: } michael@0: michael@0: PassOwnPtr compile(BumpPointerAllocator* allocator) michael@0: { michael@0: regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); michael@0: emitDisjunction(m_pattern.m_body); michael@0: regexEnd(); michael@0: michael@0: return adoptPtr(newOrCrash(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref(m_pattern), allocator)); michael@0: } michael@0: michael@0: void checkInput(unsigned count) michael@0: { michael@0: m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count)); michael@0: } michael@0: michael@0: void uncheckInput(unsigned count) michael@0: { michael@0: m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count)); michael@0: } michael@0: michael@0: void assertionBOL(unsigned inputPosition) michael@0: { michael@0: m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition)); michael@0: } michael@0: michael@0: void assertionEOL(unsigned inputPosition) michael@0: { michael@0: m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition)); michael@0: } michael@0: michael@0: void assertionWordBoundary(bool invert, unsigned inputPosition) michael@0: { michael@0: m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition)); michael@0: } michael@0: michael@0: void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType) michael@0: { michael@0: if (m_pattern.m_ignoreCase) { michael@0: UChar lo = Unicode::toLower(ch); michael@0: UChar hi = Unicode::toUpper(ch); michael@0: michael@0: if (lo != hi) { michael@0: m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); michael@0: return; michael@0: } michael@0: } michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); michael@0: } michael@0: michael@0: void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType) michael@0: { michael@0: m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition)); michael@0: michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; michael@0: } michael@0: michael@0: void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType) michael@0: { michael@0: ASSERT(subpatternId); michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition)); michael@0: michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; michael@0: } michael@0: michael@0: void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) michael@0: { michael@0: int beginTerm = m_bodyDisjunction->terms.size(); michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; michael@0: m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; michael@0: michael@0: m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); michael@0: m_currentAlternativeIndex = beginTerm + 1; michael@0: } michael@0: michael@0: void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) michael@0: { michael@0: int beginTerm = m_bodyDisjunction->terms.size(); michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition)); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; michael@0: m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; michael@0: michael@0: m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); michael@0: m_currentAlternativeIndex = beginTerm + 1; michael@0: } michael@0: michael@0: void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) michael@0: { michael@0: // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin, michael@0: // then fix this up at the end! - simplifying this should make it much clearer. michael@0: // https://bugs.webkit.org/show_bug.cgi?id=50136 michael@0: michael@0: int beginTerm = m_bodyDisjunction->terms.size(); michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; michael@0: m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; michael@0: michael@0: m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); michael@0: m_currentAlternativeIndex = beginTerm + 1; michael@0: } michael@0: michael@0: void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation) michael@0: { michael@0: int beginTerm = m_bodyDisjunction->terms.size(); michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0)); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; michael@0: m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); michael@0: m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; michael@0: michael@0: m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); michael@0: m_currentAlternativeIndex = beginTerm + 1; michael@0: } michael@0: michael@0: void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType) michael@0: { michael@0: unsigned beginTerm = popParenthesesStack(); michael@0: closeAlternative(beginTerm + 1); michael@0: unsigned endTerm = m_bodyDisjunction->terms.size(); michael@0: michael@0: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin); michael@0: michael@0: bool invert = m_bodyDisjunction->terms[beginTerm].invert(); michael@0: unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition)); michael@0: m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; michael@0: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; michael@0: m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; michael@0: michael@0: m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; michael@0: m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; michael@0: } michael@0: michael@0: void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored) michael@0: { michael@0: m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored)); michael@0: } michael@0: michael@0: unsigned popParenthesesStack() michael@0: { michael@0: ASSERT(m_parenthesesStack.size()); michael@0: int stackEnd = m_parenthesesStack.size() - 1; michael@0: unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm; michael@0: m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex; michael@0: m_parenthesesStack.shrink(stackEnd); michael@0: michael@0: ASSERT(beginTerm < m_bodyDisjunction->terms.size()); michael@0: ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size()); michael@0: michael@0: return beginTerm; michael@0: } michael@0: michael@0: #ifndef NDEBUG michael@0: void dumpDisjunction(ByteDisjunction* disjunction) michael@0: { michael@0: dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction); michael@0: for (unsigned i = 0; i < disjunction->terms.size(); ++i) michael@0: dataLogF("{ %d } ", disjunction->terms[i].type); michael@0: dataLogF("\n"); michael@0: } michael@0: #endif michael@0: michael@0: void closeAlternative(int beginTerm) michael@0: { michael@0: int origBeginTerm = beginTerm; michael@0: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin); michael@0: int endIndex = m_bodyDisjunction->terms.size(); michael@0: michael@0: unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; michael@0: michael@0: if (!m_bodyDisjunction->terms[beginTerm].alternative.next) michael@0: m_bodyDisjunction->terms.remove(beginTerm); michael@0: else { michael@0: while (m_bodyDisjunction->terms[beginTerm].alternative.next) { michael@0: beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; michael@0: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction); michael@0: m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; michael@0: m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; michael@0: } michael@0: michael@0: m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd()); michael@0: m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; michael@0: } michael@0: } michael@0: michael@0: void closeBodyAlternative() michael@0: { michael@0: int beginTerm = 0; michael@0: int origBeginTerm = 0; michael@0: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin); michael@0: int endIndex = m_bodyDisjunction->terms.size(); michael@0: michael@0: unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; michael@0: michael@0: while (m_bodyDisjunction->terms[beginTerm].alternative.next) { michael@0: beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; michael@0: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction); michael@0: m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; michael@0: m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; michael@0: } michael@0: michael@0: m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd()); michael@0: m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; michael@0: } michael@0: michael@0: void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) michael@0: { michael@0: unsigned beginTerm = popParenthesesStack(); michael@0: closeAlternative(beginTerm + 1); michael@0: unsigned endTerm = m_bodyDisjunction->terms.size(); michael@0: michael@0: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); michael@0: michael@0: ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm]; michael@0: michael@0: bool capture = parenthesesBegin.capture(); michael@0: unsigned subpatternId = parenthesesBegin.atom.subpatternId; michael@0: michael@0: unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; michael@0: ByteDisjunction* parenthesesDisjunction = newOrCrash(numSubpatterns, callFrameSize); michael@0: michael@0: parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1); michael@0: parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin()); michael@0: for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses) michael@0: parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]); michael@0: parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd()); michael@0: michael@0: m_bodyDisjunction->terms.shrink(beginTerm); michael@0: michael@0: m_allParenthesesInfo.append(parenthesesDisjunction); michael@0: m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition)); michael@0: michael@0: m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; michael@0: m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; michael@0: } michael@0: michael@0: void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType) michael@0: { michael@0: unsigned beginTerm = popParenthesesStack(); michael@0: closeAlternative(beginTerm + 1); michael@0: unsigned endTerm = m_bodyDisjunction->terms.size(); michael@0: michael@0: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); michael@0: michael@0: bool capture = m_bodyDisjunction->terms[beginTerm].capture(); michael@0: unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition)); michael@0: m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; michael@0: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; michael@0: m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; michael@0: michael@0: m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; michael@0: m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; michael@0: } michael@0: michael@0: void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType) michael@0: { michael@0: unsigned beginTerm = popParenthesesStack(); michael@0: closeAlternative(beginTerm + 1); michael@0: unsigned endTerm = m_bodyDisjunction->terms.size(); michael@0: michael@0: ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); michael@0: michael@0: bool capture = m_bodyDisjunction->terms[beginTerm].capture(); michael@0: unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; michael@0: michael@0: m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition)); michael@0: m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; michael@0: m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; michael@0: m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; michael@0: michael@0: m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; michael@0: m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); michael@0: m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; michael@0: } michael@0: michael@0: void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) michael@0: { michael@0: m_bodyDisjunction = adoptPtr(newOrCrash(numSubpatterns, callFrameSize)); michael@0: m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); michael@0: m_bodyDisjunction->terms[0].frameLocation = 0; michael@0: m_currentAlternativeIndex = 0; michael@0: } michael@0: michael@0: void regexEnd() michael@0: { michael@0: closeBodyAlternative(); michael@0: } michael@0: michael@0: void alternativeBodyDisjunction(bool onceThrough) michael@0: { michael@0: int newAlternativeIndex = m_bodyDisjunction->terms.size(); michael@0: m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; michael@0: m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough)); michael@0: michael@0: m_currentAlternativeIndex = newAlternativeIndex; michael@0: } michael@0: michael@0: void alternativeDisjunction() michael@0: { michael@0: int newAlternativeIndex = m_bodyDisjunction->terms.size(); michael@0: m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; michael@0: m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction()); michael@0: michael@0: m_currentAlternativeIndex = newAlternativeIndex; michael@0: } michael@0: michael@0: void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0) michael@0: { michael@0: for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { michael@0: unsigned currentCountAlreadyChecked = inputCountAlreadyChecked; michael@0: michael@0: PatternAlternative* alternative = disjunction->m_alternatives[alt]; michael@0: michael@0: if (alt) { michael@0: if (disjunction == m_pattern.m_body) michael@0: alternativeBodyDisjunction(alternative->onceThrough()); michael@0: else michael@0: alternativeDisjunction(); michael@0: } michael@0: michael@0: unsigned minimumSize = alternative->m_minimumSize; michael@0: ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked); michael@0: unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked; michael@0: michael@0: if (countToCheck) { michael@0: checkInput(countToCheck); michael@0: currentCountAlreadyChecked += countToCheck; michael@0: } michael@0: michael@0: for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { michael@0: PatternTerm& term = alternative->m_terms[i]; michael@0: michael@0: switch (term.type) { michael@0: case PatternTerm::TypeAssertionBOL: michael@0: assertionBOL(currentCountAlreadyChecked - term.inputPosition); michael@0: break; michael@0: michael@0: case PatternTerm::TypeAssertionEOL: michael@0: assertionEOL(currentCountAlreadyChecked - term.inputPosition); michael@0: break; michael@0: michael@0: case PatternTerm::TypeAssertionWordBoundary: michael@0: assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition); michael@0: break; michael@0: michael@0: case PatternTerm::TypePatternCharacter: michael@0: atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); michael@0: break; michael@0: michael@0: case PatternTerm::TypeCharacterClass: michael@0: atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); michael@0: break; michael@0: michael@0: case PatternTerm::TypeBackReference: michael@0: atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); michael@0: break; michael@0: michael@0: case PatternTerm::TypeForwardReference: michael@0: break; michael@0: michael@0: case PatternTerm::TypeParenthesesSubpattern: { michael@0: unsigned disjunctionAlreadyCheckedCount = 0; michael@0: if (term.quantityCount == 1 && !term.parentheses.isCopy) { michael@0: unsigned alternativeFrameLocation = term.frameLocation; michael@0: // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame. michael@0: if (term.quantityType == QuantifierFixedCount) michael@0: disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; michael@0: else michael@0: alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; michael@0: unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; michael@0: atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation); michael@0: emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); michael@0: atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); michael@0: } else if (term.parentheses.isTerminal) { michael@0: unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; michael@0: atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); michael@0: emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); michael@0: atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); michael@0: } else { michael@0: unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; michael@0: atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0); michael@0: emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); michael@0: atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); michael@0: } michael@0: break; michael@0: } michael@0: michael@0: case PatternTerm::TypeParentheticalAssertion: { michael@0: unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; michael@0: michael@0: ASSERT(currentCountAlreadyChecked >= static_cast(term.inputPosition)); michael@0: unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast(term.inputPosition); michael@0: unsigned uncheckAmount = 0; michael@0: if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) { michael@0: uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize; michael@0: uncheckInput(uncheckAmount); michael@0: currentCountAlreadyChecked -= uncheckAmount; michael@0: } michael@0: michael@0: atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation); michael@0: emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount); michael@0: atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType); michael@0: if (uncheckAmount) { michael@0: checkInput(uncheckAmount); michael@0: currentCountAlreadyChecked += uncheckAmount; michael@0: } michael@0: break; michael@0: } michael@0: michael@0: case PatternTerm::TypeDotStarEnclosure: michael@0: assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor); michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: private: michael@0: YarrPattern& m_pattern; michael@0: OwnPtr m_bodyDisjunction; michael@0: unsigned m_currentAlternativeIndex; michael@0: Vector m_parenthesesStack; michael@0: Vector m_allParenthesesInfo; michael@0: }; michael@0: michael@0: PassOwnPtr byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) michael@0: { michael@0: return ByteCompiler(pattern).compile(allocator); michael@0: } michael@0: michael@0: unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output) michael@0: { michael@0: #if YARR_8BIT_CHAR_SUPPORT michael@0: if (input.is8Bit()) michael@0: return Interpreter(cx, bytecode, output, input.characters8(), input.length(), start).interpret(); michael@0: #endif michael@0: return Interpreter(cx, bytecode, output, input.chars(), input.length(), start).interpret(); michael@0: } michael@0: michael@0: unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output) michael@0: { michael@0: return Interpreter(cx, bytecode, output, input, length, start).interpret(); michael@0: } michael@0: michael@0: unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output) michael@0: { michael@0: return Interpreter(cx, bytecode, output, input, length, start).interpret(); michael@0: } michael@0: michael@0: // These should be the same for both UChar & LChar. michael@0: COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter); michael@0: COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass); michael@0: COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference); michael@0: COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative); michael@0: COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion); michael@0: COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce); michael@0: COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses); michael@0: michael@0: michael@0: } }