michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * vim: set ts=8 sts=4 et sw=4 tw=99: michael@0: * michael@0: * Copyright (C) 2009, 2010 Apple Inc. All rights reserved. michael@0: * michael@0: * Redistribution and use in source and binary forms, with or without michael@0: * modification, are permitted provided that the following conditions michael@0: * are met: michael@0: * 1. Redistributions of source code must retain the above copyright michael@0: * notice, this list of conditions and the following disclaimer. michael@0: * 2. Redistributions in binary form must reproduce the above copyright michael@0: * notice, this list of conditions and the following disclaimer in the michael@0: * documentation and/or other materials provided with the distribution. michael@0: * michael@0: * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY michael@0: * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE michael@0: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR michael@0: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR michael@0: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, michael@0: * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, michael@0: * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR michael@0: * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY michael@0: * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: */ michael@0: michael@0: #ifndef yarr_YarrInterpreter_h michael@0: #define yarr_YarrInterpreter_h michael@0: michael@0: #include "yarr/YarrPattern.h" michael@0: michael@0: namespace WTF { michael@0: class BumpPointerAllocator; michael@0: } michael@0: using WTF::BumpPointerAllocator; michael@0: michael@0: namespace JSC { namespace Yarr { michael@0: michael@0: class ByteDisjunction; michael@0: michael@0: struct ByteTerm { michael@0: enum Type { michael@0: TypeBodyAlternativeBegin, michael@0: TypeBodyAlternativeDisjunction, michael@0: TypeBodyAlternativeEnd, michael@0: TypeAlternativeBegin, michael@0: TypeAlternativeDisjunction, michael@0: TypeAlternativeEnd, michael@0: TypeSubpatternBegin, michael@0: TypeSubpatternEnd, michael@0: TypeAssertionBOL, michael@0: TypeAssertionEOL, michael@0: TypeAssertionWordBoundary, michael@0: TypePatternCharacterOnce, michael@0: TypePatternCharacterFixed, michael@0: TypePatternCharacterGreedy, michael@0: TypePatternCharacterNonGreedy, michael@0: TypePatternCasedCharacterOnce, michael@0: TypePatternCasedCharacterFixed, michael@0: TypePatternCasedCharacterGreedy, michael@0: TypePatternCasedCharacterNonGreedy, michael@0: TypeCharacterClass, michael@0: TypeBackReference, michael@0: TypeParenthesesSubpattern, michael@0: TypeParenthesesSubpatternOnceBegin, michael@0: TypeParenthesesSubpatternOnceEnd, michael@0: TypeParenthesesSubpatternTerminalBegin, michael@0: TypeParenthesesSubpatternTerminalEnd, michael@0: TypeParentheticalAssertionBegin, michael@0: TypeParentheticalAssertionEnd, michael@0: TypeCheckInput, michael@0: TypeUncheckInput, michael@0: TypeDotStarEnclosure michael@0: } type; michael@0: union { michael@0: struct { michael@0: union { michael@0: UChar patternCharacter; michael@0: struct { michael@0: UChar lo; michael@0: UChar hi; michael@0: } casedCharacter; michael@0: CharacterClass* characterClass; michael@0: unsigned subpatternId; michael@0: }; michael@0: union { michael@0: ByteDisjunction* parenthesesDisjunction; michael@0: unsigned parenthesesWidth; michael@0: }; michael@0: QuantifierType quantityType; michael@0: unsigned quantityCount; michael@0: } atom; michael@0: struct { michael@0: int next; michael@0: int end; michael@0: bool onceThrough; michael@0: } alternative; michael@0: struct { michael@0: bool m_bol : 1; michael@0: bool m_eol : 1; michael@0: } anchors; michael@0: unsigned checkInputCount; michael@0: }; michael@0: unsigned frameLocation; michael@0: bool m_capture : 1; michael@0: bool m_invert : 1; michael@0: unsigned inputPosition; michael@0: michael@0: ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType) michael@0: : frameLocation(frameLocation) michael@0: , m_capture(false) michael@0: , m_invert(false) michael@0: { michael@0: switch (quantityType) { michael@0: case QuantifierFixedCount: michael@0: type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed; michael@0: break; michael@0: case QuantifierGreedy: michael@0: type = ByteTerm::TypePatternCharacterGreedy; michael@0: break; michael@0: case QuantifierNonGreedy: michael@0: type = ByteTerm::TypePatternCharacterNonGreedy; michael@0: break; michael@0: } michael@0: michael@0: atom.patternCharacter = ch; michael@0: atom.quantityType = quantityType; michael@0: atom.quantityCount = quantityCount.unsafeGet(); michael@0: inputPosition = inputPos; michael@0: } michael@0: michael@0: ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked quantityCount, QuantifierType quantityType) michael@0: : frameLocation(frameLocation) michael@0: , m_capture(false) michael@0: , m_invert(false) michael@0: { michael@0: switch (quantityType) { michael@0: case QuantifierFixedCount: michael@0: type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed; michael@0: break; michael@0: case QuantifierGreedy: michael@0: type = ByteTerm::TypePatternCasedCharacterGreedy; michael@0: break; michael@0: case QuantifierNonGreedy: michael@0: type = ByteTerm::TypePatternCasedCharacterNonGreedy; michael@0: break; michael@0: } michael@0: michael@0: atom.casedCharacter.lo = lo; michael@0: atom.casedCharacter.hi = hi; michael@0: atom.quantityType = quantityType; michael@0: atom.quantityCount = quantityCount.unsafeGet(); michael@0: inputPosition = inputPos; michael@0: } michael@0: michael@0: ByteTerm(CharacterClass* characterClass, bool invert, int inputPos) michael@0: : type(ByteTerm::TypeCharacterClass) michael@0: , m_capture(false) michael@0: , m_invert(invert) michael@0: { michael@0: atom.characterClass = characterClass; michael@0: atom.quantityType = QuantifierFixedCount; michael@0: atom.quantityCount = 1; michael@0: inputPosition = inputPos; michael@0: } michael@0: michael@0: ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos) michael@0: : type(type) michael@0: , m_capture(capture) michael@0: , m_invert(false) michael@0: { michael@0: atom.subpatternId = subpatternId; michael@0: atom.parenthesesDisjunction = parenthesesInfo; michael@0: atom.quantityType = QuantifierFixedCount; michael@0: atom.quantityCount = 1; michael@0: inputPosition = inputPos; michael@0: } michael@0: michael@0: ByteTerm(Type type, bool invert = false) michael@0: : type(type) michael@0: , m_capture(false) michael@0: , m_invert(invert) michael@0: { michael@0: atom.quantityType = QuantifierFixedCount; michael@0: atom.quantityCount = 1; michael@0: } michael@0: michael@0: ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos) michael@0: : type(type) michael@0: , m_capture(capture) michael@0: , m_invert(invert) michael@0: { michael@0: atom.subpatternId = subpatternId; michael@0: atom.quantityType = QuantifierFixedCount; michael@0: atom.quantityCount = 1; michael@0: inputPosition = inputPos; michael@0: } michael@0: michael@0: // For js::Vector. Does not create a valid object. michael@0: ByteTerm() michael@0: { michael@0: } michael@0: michael@0: static ByteTerm BOL(int inputPos) michael@0: { michael@0: ByteTerm term(TypeAssertionBOL); michael@0: term.inputPosition = inputPos; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm CheckInput(Checked count) michael@0: { michael@0: ByteTerm term(TypeCheckInput); michael@0: term.checkInputCount = count.unsafeGet(); michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm UncheckInput(Checked count) michael@0: { michael@0: ByteTerm term(TypeUncheckInput); michael@0: term.checkInputCount = count.unsafeGet(); michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm EOL(int inputPos) michael@0: { michael@0: ByteTerm term(TypeAssertionEOL); michael@0: term.inputPosition = inputPos; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm WordBoundary(bool invert, int inputPos) michael@0: { michael@0: ByteTerm term(TypeAssertionWordBoundary, invert); michael@0: term.inputPosition = inputPos; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm BackReference(unsigned subpatternId, int inputPos) michael@0: { michael@0: return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos); michael@0: } michael@0: michael@0: static ByteTerm BodyAlternativeBegin(bool onceThrough) michael@0: { michael@0: ByteTerm term(TypeBodyAlternativeBegin); michael@0: term.alternative.next = 0; michael@0: term.alternative.end = 0; michael@0: term.alternative.onceThrough = onceThrough; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm BodyAlternativeDisjunction(bool onceThrough) michael@0: { michael@0: ByteTerm term(TypeBodyAlternativeDisjunction); michael@0: term.alternative.next = 0; michael@0: term.alternative.end = 0; michael@0: term.alternative.onceThrough = onceThrough; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm BodyAlternativeEnd() michael@0: { michael@0: ByteTerm term(TypeBodyAlternativeEnd); michael@0: term.alternative.next = 0; michael@0: term.alternative.end = 0; michael@0: term.alternative.onceThrough = false; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm AlternativeBegin() michael@0: { michael@0: ByteTerm term(TypeAlternativeBegin); michael@0: term.alternative.next = 0; michael@0: term.alternative.end = 0; michael@0: term.alternative.onceThrough = false; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm AlternativeDisjunction() michael@0: { michael@0: ByteTerm term(TypeAlternativeDisjunction); michael@0: term.alternative.next = 0; michael@0: term.alternative.end = 0; michael@0: term.alternative.onceThrough = false; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm AlternativeEnd() michael@0: { michael@0: ByteTerm term(TypeAlternativeEnd); michael@0: term.alternative.next = 0; michael@0: term.alternative.end = 0; michael@0: term.alternative.onceThrough = false; michael@0: return term; michael@0: } michael@0: michael@0: static ByteTerm SubpatternBegin() michael@0: { michael@0: return ByteTerm(TypeSubpatternBegin); michael@0: } michael@0: michael@0: static ByteTerm SubpatternEnd() michael@0: { michael@0: return ByteTerm(TypeSubpatternEnd); michael@0: } michael@0: michael@0: static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor) michael@0: { michael@0: ByteTerm term(TypeDotStarEnclosure); michael@0: term.anchors.m_bol = bolAnchor; michael@0: term.anchors.m_eol = eolAnchor; michael@0: return term; michael@0: } michael@0: michael@0: bool invert() michael@0: { michael@0: return m_invert; michael@0: } michael@0: michael@0: bool capture() michael@0: { michael@0: return m_capture; michael@0: } michael@0: }; michael@0: michael@0: class ByteDisjunction { michael@0: WTF_MAKE_FAST_ALLOCATED; michael@0: public: michael@0: ByteDisjunction(unsigned numSubpatterns, unsigned frameSize) michael@0: : m_numSubpatterns(numSubpatterns) michael@0: , m_frameSize(frameSize) michael@0: { michael@0: } michael@0: michael@0: Vector terms; michael@0: unsigned m_numSubpatterns; michael@0: unsigned m_frameSize; michael@0: }; michael@0: michael@0: struct BytecodePattern { michael@0: WTF_MAKE_FAST_ALLOCATED; michael@0: public: michael@0: BytecodePattern(PassOwnPtr body, Vector &allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator) michael@0: : m_body(body) michael@0: , m_ignoreCase(pattern.m_ignoreCase) michael@0: , m_multiline(pattern.m_multiline) michael@0: , m_allocator(allocator) michael@0: { michael@0: newlineCharacterClass = pattern.newlineCharacterClass(); michael@0: wordcharCharacterClass = pattern.wordcharCharacterClass(); michael@0: michael@0: // Trick: 'Steal' the YarrPattern's ParenthesesInfo! michael@0: // The input vector isn't used afterwards anymore, michael@0: // that way we don't have to copy the input. michael@0: JS_ASSERT(m_allParenthesesInfo.size() == 0); michael@0: m_allParenthesesInfo.swap(allParenthesesInfo); michael@0: michael@0: // Trick: 'Steal' the YarrPattern's CharacterClasses! michael@0: // The input vector isn't used afterwards anymore, michael@0: // that way we don't have to copy the input. michael@0: JS_ASSERT(m_userCharacterClasses.size() == 0); michael@0: m_userCharacterClasses.swap(pattern.m_userCharacterClasses); michael@0: } michael@0: michael@0: ~BytecodePattern() michael@0: { michael@0: deleteAllValues(m_allParenthesesInfo); michael@0: deleteAllValues(m_userCharacterClasses); michael@0: } michael@0: michael@0: OwnPtr m_body; michael@0: bool m_ignoreCase; michael@0: bool m_multiline; michael@0: // Each BytecodePattern is associated with a RegExp, each RegExp is associated michael@0: // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator. michael@0: BumpPointerAllocator* m_allocator; michael@0: michael@0: CharacterClass* newlineCharacterClass; michael@0: CharacterClass* wordcharCharacterClass; michael@0: michael@0: private: michael@0: Vector m_allParenthesesInfo; michael@0: Vector m_userCharacterClasses; michael@0: }; michael@0: michael@0: JS_EXPORT_PRIVATE PassOwnPtr byteCompile(YarrPattern&, BumpPointerAllocator*); michael@0: JS_EXPORT_PRIVATE unsigned interpret(JSContext *cx, BytecodePattern*, const String& input, unsigned start, unsigned* output); michael@0: unsigned interpret(JSContext *cx, BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output); michael@0: unsigned interpret(JSContext *cx, BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output); michael@0: michael@0: } } // namespace JSC::Yarr michael@0: michael@0: #endif /* yarr_YarrInterpreter_h */