js/src/yarr/YarrInterpreter.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/yarr/YarrInterpreter.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,395 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99:
     1.6 + *
     1.7 + * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
     1.8 + *
     1.9 + * Redistribution and use in source and binary forms, with or without
    1.10 + * modification, are permitted provided that the following conditions
    1.11 + * are met:
    1.12 + * 1. Redistributions of source code must retain the above copyright
    1.13 + *    notice, this list of conditions and the following disclaimer.
    1.14 + * 2. Redistributions in binary form must reproduce the above copyright
    1.15 + *    notice, this list of conditions and the following disclaimer in the
    1.16 + *    documentation and/or other materials provided with the distribution.
    1.17 + *
    1.18 + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
    1.19 + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.20 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    1.21 + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
    1.22 + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    1.23 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    1.24 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    1.25 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    1.26 + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    1.27 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    1.28 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.29 + */
    1.30 +
    1.31 +#ifndef yarr_YarrInterpreter_h
    1.32 +#define yarr_YarrInterpreter_h
    1.33 +
    1.34 +#include "yarr/YarrPattern.h"
    1.35 +
    1.36 +namespace WTF {
    1.37 +class BumpPointerAllocator;
    1.38 +}
    1.39 +using WTF::BumpPointerAllocator;
    1.40 +
    1.41 +namespace JSC { namespace Yarr {
    1.42 +
    1.43 +class ByteDisjunction;
    1.44 +
    1.45 +struct ByteTerm {
    1.46 +    enum Type {
    1.47 +        TypeBodyAlternativeBegin,
    1.48 +        TypeBodyAlternativeDisjunction,
    1.49 +        TypeBodyAlternativeEnd,
    1.50 +        TypeAlternativeBegin,
    1.51 +        TypeAlternativeDisjunction,
    1.52 +        TypeAlternativeEnd,
    1.53 +        TypeSubpatternBegin,
    1.54 +        TypeSubpatternEnd,
    1.55 +        TypeAssertionBOL,
    1.56 +        TypeAssertionEOL,
    1.57 +        TypeAssertionWordBoundary,
    1.58 +        TypePatternCharacterOnce,
    1.59 +        TypePatternCharacterFixed,
    1.60 +        TypePatternCharacterGreedy,
    1.61 +        TypePatternCharacterNonGreedy,
    1.62 +        TypePatternCasedCharacterOnce,
    1.63 +        TypePatternCasedCharacterFixed,
    1.64 +        TypePatternCasedCharacterGreedy,
    1.65 +        TypePatternCasedCharacterNonGreedy,
    1.66 +        TypeCharacterClass,
    1.67 +        TypeBackReference,
    1.68 +        TypeParenthesesSubpattern,
    1.69 +        TypeParenthesesSubpatternOnceBegin,
    1.70 +        TypeParenthesesSubpatternOnceEnd,
    1.71 +        TypeParenthesesSubpatternTerminalBegin,
    1.72 +        TypeParenthesesSubpatternTerminalEnd,
    1.73 +        TypeParentheticalAssertionBegin,
    1.74 +        TypeParentheticalAssertionEnd,
    1.75 +        TypeCheckInput,
    1.76 +        TypeUncheckInput,
    1.77 +        TypeDotStarEnclosure
    1.78 +    } type;
    1.79 +    union {
    1.80 +        struct {
    1.81 +            union {
    1.82 +                UChar patternCharacter;
    1.83 +                struct {
    1.84 +                    UChar lo;
    1.85 +                    UChar hi;
    1.86 +                } casedCharacter;
    1.87 +                CharacterClass* characterClass;
    1.88 +                unsigned subpatternId;
    1.89 +            };
    1.90 +            union {
    1.91 +                ByteDisjunction* parenthesesDisjunction;
    1.92 +                unsigned parenthesesWidth;
    1.93 +            };
    1.94 +            QuantifierType quantityType;
    1.95 +            unsigned quantityCount;
    1.96 +        } atom;
    1.97 +        struct {
    1.98 +            int next;
    1.99 +            int end;
   1.100 +            bool onceThrough;
   1.101 +        } alternative;
   1.102 +        struct {
   1.103 +            bool m_bol : 1;
   1.104 +            bool m_eol : 1;
   1.105 +        } anchors;
   1.106 +        unsigned checkInputCount;
   1.107 +    };
   1.108 +    unsigned frameLocation;
   1.109 +    bool m_capture : 1;
   1.110 +    bool m_invert : 1;
   1.111 +    unsigned inputPosition;
   1.112 +
   1.113 +    ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
   1.114 +        : frameLocation(frameLocation)
   1.115 +        , m_capture(false)
   1.116 +        , m_invert(false)
   1.117 +    {
   1.118 +        switch (quantityType) {
   1.119 +        case QuantifierFixedCount:
   1.120 +            type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
   1.121 +            break;
   1.122 +        case QuantifierGreedy:
   1.123 +            type = ByteTerm::TypePatternCharacterGreedy;
   1.124 +            break;
   1.125 +        case QuantifierNonGreedy:
   1.126 +            type = ByteTerm::TypePatternCharacterNonGreedy;
   1.127 +            break;
   1.128 +        }
   1.129 +
   1.130 +        atom.patternCharacter = ch;
   1.131 +        atom.quantityType = quantityType;
   1.132 +        atom.quantityCount = quantityCount.unsafeGet();
   1.133 +        inputPosition = inputPos;
   1.134 +    }
   1.135 +
   1.136 +    ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
   1.137 +        : frameLocation(frameLocation)
   1.138 +        , m_capture(false)
   1.139 +        , m_invert(false)
   1.140 +    {
   1.141 +        switch (quantityType) {
   1.142 +        case QuantifierFixedCount:
   1.143 +            type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
   1.144 +            break;
   1.145 +        case QuantifierGreedy:
   1.146 +            type = ByteTerm::TypePatternCasedCharacterGreedy;
   1.147 +            break;
   1.148 +        case QuantifierNonGreedy:
   1.149 +            type = ByteTerm::TypePatternCasedCharacterNonGreedy;
   1.150 +            break;
   1.151 +        }
   1.152 +
   1.153 +        atom.casedCharacter.lo = lo;
   1.154 +        atom.casedCharacter.hi = hi;
   1.155 +        atom.quantityType = quantityType;
   1.156 +        atom.quantityCount = quantityCount.unsafeGet();
   1.157 +        inputPosition = inputPos;
   1.158 +    }
   1.159 +
   1.160 +    ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
   1.161 +        : type(ByteTerm::TypeCharacterClass)
   1.162 +        , m_capture(false)
   1.163 +        , m_invert(invert)
   1.164 +    {
   1.165 +        atom.characterClass = characterClass;
   1.166 +        atom.quantityType = QuantifierFixedCount;
   1.167 +        atom.quantityCount = 1;
   1.168 +        inputPosition = inputPos;
   1.169 +    }
   1.170 +
   1.171 +    ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
   1.172 +        : type(type)
   1.173 +        , m_capture(capture)
   1.174 +        , m_invert(false)
   1.175 +    {
   1.176 +        atom.subpatternId = subpatternId;
   1.177 +        atom.parenthesesDisjunction = parenthesesInfo;
   1.178 +        atom.quantityType = QuantifierFixedCount;
   1.179 +        atom.quantityCount = 1;
   1.180 +        inputPosition = inputPos;
   1.181 +    }
   1.182 +
   1.183 +    ByteTerm(Type type, bool invert = false)
   1.184 +        : type(type)
   1.185 +        , m_capture(false)
   1.186 +        , m_invert(invert)
   1.187 +    {
   1.188 +        atom.quantityType = QuantifierFixedCount;
   1.189 +        atom.quantityCount = 1;
   1.190 +    }
   1.191 +
   1.192 +    ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
   1.193 +        : type(type)
   1.194 +        , m_capture(capture)
   1.195 +        , m_invert(invert)
   1.196 +    {
   1.197 +        atom.subpatternId = subpatternId;
   1.198 +        atom.quantityType = QuantifierFixedCount;
   1.199 +        atom.quantityCount = 1;
   1.200 +        inputPosition = inputPos;
   1.201 +    }
   1.202 +
   1.203 +    // For js::Vector. Does not create a valid object.
   1.204 +    ByteTerm()
   1.205 +    {
   1.206 +    }
   1.207 +
   1.208 +    static ByteTerm BOL(int inputPos)
   1.209 +    {
   1.210 +        ByteTerm term(TypeAssertionBOL);
   1.211 +        term.inputPosition = inputPos;
   1.212 +        return term;
   1.213 +    }
   1.214 +
   1.215 +    static ByteTerm CheckInput(Checked<unsigned> count)
   1.216 +    {
   1.217 +        ByteTerm term(TypeCheckInput);
   1.218 +        term.checkInputCount = count.unsafeGet();
   1.219 +        return term;
   1.220 +    }
   1.221 +
   1.222 +    static ByteTerm UncheckInput(Checked<unsigned> count)
   1.223 +    {
   1.224 +        ByteTerm term(TypeUncheckInput);
   1.225 +        term.checkInputCount = count.unsafeGet();
   1.226 +        return term;
   1.227 +    }
   1.228 +
   1.229 +    static ByteTerm EOL(int inputPos)
   1.230 +    {
   1.231 +        ByteTerm term(TypeAssertionEOL);
   1.232 +        term.inputPosition = inputPos;
   1.233 +        return term;
   1.234 +    }
   1.235 +
   1.236 +    static ByteTerm WordBoundary(bool invert, int inputPos)
   1.237 +    {
   1.238 +        ByteTerm term(TypeAssertionWordBoundary, invert);
   1.239 +        term.inputPosition = inputPos;
   1.240 +        return term;
   1.241 +    }
   1.242 +
   1.243 +    static ByteTerm BackReference(unsigned subpatternId, int inputPos)
   1.244 +    {
   1.245 +        return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
   1.246 +    }
   1.247 +
   1.248 +    static ByteTerm BodyAlternativeBegin(bool onceThrough)
   1.249 +    {
   1.250 +        ByteTerm term(TypeBodyAlternativeBegin);
   1.251 +        term.alternative.next = 0;
   1.252 +        term.alternative.end = 0;
   1.253 +        term.alternative.onceThrough = onceThrough;
   1.254 +        return term;
   1.255 +    }
   1.256 +
   1.257 +    static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
   1.258 +    {
   1.259 +        ByteTerm term(TypeBodyAlternativeDisjunction);
   1.260 +        term.alternative.next = 0;
   1.261 +        term.alternative.end = 0;
   1.262 +        term.alternative.onceThrough = onceThrough;
   1.263 +        return term;
   1.264 +    }
   1.265 +
   1.266 +    static ByteTerm BodyAlternativeEnd()
   1.267 +    {
   1.268 +        ByteTerm term(TypeBodyAlternativeEnd);
   1.269 +        term.alternative.next = 0;
   1.270 +        term.alternative.end = 0;
   1.271 +        term.alternative.onceThrough = false;
   1.272 +        return term;
   1.273 +    }
   1.274 +
   1.275 +    static ByteTerm AlternativeBegin()
   1.276 +    {
   1.277 +        ByteTerm term(TypeAlternativeBegin);
   1.278 +        term.alternative.next = 0;
   1.279 +        term.alternative.end = 0;
   1.280 +        term.alternative.onceThrough = false;
   1.281 +        return term;
   1.282 +    }
   1.283 +
   1.284 +    static ByteTerm AlternativeDisjunction()
   1.285 +    {
   1.286 +        ByteTerm term(TypeAlternativeDisjunction);
   1.287 +        term.alternative.next = 0;
   1.288 +        term.alternative.end = 0;
   1.289 +        term.alternative.onceThrough = false;
   1.290 +        return term;
   1.291 +    }
   1.292 +
   1.293 +    static ByteTerm AlternativeEnd()
   1.294 +    {
   1.295 +        ByteTerm term(TypeAlternativeEnd);
   1.296 +        term.alternative.next = 0;
   1.297 +        term.alternative.end = 0;
   1.298 +        term.alternative.onceThrough = false;
   1.299 +        return term;
   1.300 +    }
   1.301 +
   1.302 +    static ByteTerm SubpatternBegin()
   1.303 +    {
   1.304 +        return ByteTerm(TypeSubpatternBegin);
   1.305 +    }
   1.306 +
   1.307 +    static ByteTerm SubpatternEnd()
   1.308 +    {
   1.309 +        return ByteTerm(TypeSubpatternEnd);
   1.310 +    }
   1.311 +
   1.312 +    static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
   1.313 +    {
   1.314 +        ByteTerm term(TypeDotStarEnclosure);
   1.315 +        term.anchors.m_bol = bolAnchor;
   1.316 +        term.anchors.m_eol = eolAnchor;
   1.317 +        return term;
   1.318 +    }
   1.319 +
   1.320 +    bool invert()
   1.321 +    {
   1.322 +        return m_invert;
   1.323 +    }
   1.324 +
   1.325 +    bool capture()
   1.326 +    {
   1.327 +        return m_capture;
   1.328 +    }
   1.329 +};
   1.330 +
   1.331 +class ByteDisjunction {
   1.332 +    WTF_MAKE_FAST_ALLOCATED;
   1.333 +public:
   1.334 +    ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
   1.335 +        : m_numSubpatterns(numSubpatterns)
   1.336 +        , m_frameSize(frameSize)
   1.337 +    {
   1.338 +    }
   1.339 +
   1.340 +    Vector<ByteTerm> terms;
   1.341 +    unsigned m_numSubpatterns;
   1.342 +    unsigned m_frameSize;
   1.343 +};
   1.344 +
   1.345 +struct BytecodePattern {
   1.346 +    WTF_MAKE_FAST_ALLOCATED;
   1.347 +public:
   1.348 +    BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> &allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
   1.349 +        : m_body(body)
   1.350 +        , m_ignoreCase(pattern.m_ignoreCase)
   1.351 +        , m_multiline(pattern.m_multiline)
   1.352 +        , m_allocator(allocator)
   1.353 +    {
   1.354 +        newlineCharacterClass = pattern.newlineCharacterClass();
   1.355 +        wordcharCharacterClass = pattern.wordcharCharacterClass();
   1.356 +
   1.357 +        // Trick: 'Steal' the YarrPattern's ParenthesesInfo!
   1.358 +        // The input vector isn't used afterwards anymore,
   1.359 +        // that way we don't have to copy the input.
   1.360 +        JS_ASSERT(m_allParenthesesInfo.size() == 0);
   1.361 +        m_allParenthesesInfo.swap(allParenthesesInfo);
   1.362 +
   1.363 +        // Trick: 'Steal' the YarrPattern's CharacterClasses!
   1.364 +        // The input vector isn't used afterwards anymore,
   1.365 +        // that way we don't have to copy the input.
   1.366 +        JS_ASSERT(m_userCharacterClasses.size() == 0);
   1.367 +        m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
   1.368 +    }
   1.369 +
   1.370 +    ~BytecodePattern()
   1.371 +    {
   1.372 +        deleteAllValues(m_allParenthesesInfo);
   1.373 +        deleteAllValues(m_userCharacterClasses);
   1.374 +    }
   1.375 +
   1.376 +    OwnPtr<ByteDisjunction> m_body;
   1.377 +    bool m_ignoreCase;
   1.378 +    bool m_multiline;
   1.379 +    // Each BytecodePattern is associated with a RegExp, each RegExp is associated
   1.380 +    // with a JSGlobalData.  Cache a pointer to out JSGlobalData's m_regExpAllocator.
   1.381 +    BumpPointerAllocator* m_allocator;
   1.382 +
   1.383 +    CharacterClass* newlineCharacterClass;
   1.384 +    CharacterClass* wordcharCharacterClass;
   1.385 +
   1.386 +private:
   1.387 +    Vector<ByteDisjunction*> m_allParenthesesInfo;
   1.388 +    Vector<CharacterClass*> m_userCharacterClasses;
   1.389 +};
   1.390 +
   1.391 +JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
   1.392 +JS_EXPORT_PRIVATE unsigned interpret(JSContext *cx, BytecodePattern*, const String& input, unsigned start, unsigned* output);
   1.393 +unsigned interpret(JSContext *cx, BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
   1.394 +unsigned interpret(JSContext *cx, BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
   1.395 +
   1.396 +} } // namespace JSC::Yarr
   1.397 +
   1.398 +#endif /* yarr_YarrInterpreter_h */

mercurial