js/src/yarr/YarrInterpreter.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/yarr/YarrInterpreter.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1994 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99:
     1.6 + *
     1.7 + * Copyright (C) 2009 Apple Inc. All rights reserved.
     1.8 + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
     1.9 + *
    1.10 + * Redistribution and use in source and binary forms, with or without
    1.11 + * modification, are permitted provided that the following conditions
    1.12 + * are met:
    1.13 + * 1. Redistributions of source code must retain the above copyright
    1.14 + *    notice, this list of conditions and the following disclaimer.
    1.15 + * 2. Redistributions in binary form must reproduce the above copyright
    1.16 + *    notice, this list of conditions and the following disclaimer in the
    1.17 + *    documentation and/or other materials provided with the distribution.
    1.18 + *
    1.19 + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
    1.20 + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    1.22 + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
    1.23 + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    1.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    1.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    1.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    1.27 + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    1.28 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    1.29 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.30 + */
    1.31 +
    1.32 +#include "yarr/YarrInterpreter.h"
    1.33 +
    1.34 +#include "jscntxt.h"
    1.35 +
    1.36 +#include "yarr/Yarr.h"
    1.37 +#include "yarr/YarrCanonicalizeUCS2.h"
    1.38 +#include "yarr/BumpPointerAllocator.h"
    1.39 +
    1.40 +using namespace WTF;
    1.41 +
    1.42 +namespace JSC { namespace Yarr {
    1.43 +
    1.44 +template<typename CharType>
    1.45 +class Interpreter {
    1.46 +public:
    1.47 +    struct ParenthesesDisjunctionContext;
    1.48 +
    1.49 +    struct BackTrackInfoPatternCharacter {
    1.50 +        uintptr_t matchAmount;
    1.51 +    };
    1.52 +    struct BackTrackInfoCharacterClass {
    1.53 +        uintptr_t matchAmount;
    1.54 +    };
    1.55 +    struct BackTrackInfoBackReference {
    1.56 +        uintptr_t begin; // Not really needed for greedy quantifiers.
    1.57 +        uintptr_t matchAmount; // Not really needed for fixed quantifiers.
    1.58 +    };
    1.59 +    struct BackTrackInfoAlternative {
    1.60 +        uintptr_t offset;
    1.61 +    };
    1.62 +    struct BackTrackInfoParentheticalAssertion {
    1.63 +        uintptr_t begin;
    1.64 +    };
    1.65 +    struct BackTrackInfoParenthesesOnce {
    1.66 +        uintptr_t begin;
    1.67 +    };
    1.68 +    struct BackTrackInfoParenthesesTerminal {
    1.69 +        uintptr_t begin;
    1.70 +    };
    1.71 +    struct BackTrackInfoParentheses {
    1.72 +        uintptr_t matchAmount;
    1.73 +        ParenthesesDisjunctionContext* lastContext;
    1.74 +    };
    1.75 +
    1.76 +    static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
    1.77 +    {
    1.78 +        context->next = backTrack->lastContext;
    1.79 +        backTrack->lastContext = context;
    1.80 +        ++backTrack->matchAmount;
    1.81 +    }
    1.82 +
    1.83 +    static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
    1.84 +    {
    1.85 +        ASSERT(backTrack->matchAmount);
    1.86 +        ASSERT(backTrack->lastContext);
    1.87 +        backTrack->lastContext = backTrack->lastContext->next;
    1.88 +        --backTrack->matchAmount;
    1.89 +    }
    1.90 +
    1.91 +    struct DisjunctionContext
    1.92 +    {
    1.93 +        DisjunctionContext()
    1.94 +            : term(0)
    1.95 +        {
    1.96 +        }
    1.97 +
    1.98 +        void* operator new(size_t, void* where)
    1.99 +        {
   1.100 +            return where;
   1.101 +        }
   1.102 +
   1.103 +        int term;
   1.104 +        unsigned matchBegin;
   1.105 +        unsigned matchEnd;
   1.106 +        uintptr_t frame[1];
   1.107 +    };
   1.108 +
   1.109 +    DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
   1.110 +    {
   1.111 +        size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
   1.112 +        allocatorPool = allocatorPool->ensureCapacity(size);
   1.113 +        if (!allocatorPool)
   1.114 +            CRASH();
   1.115 +        return new (allocatorPool->alloc(size)) DisjunctionContext();
   1.116 +    }
   1.117 +
   1.118 +    void freeDisjunctionContext(DisjunctionContext* context)
   1.119 +    {
   1.120 +        allocatorPool = allocatorPool->dealloc(context);
   1.121 +    }
   1.122 +
   1.123 +    struct ParenthesesDisjunctionContext
   1.124 +    {
   1.125 +        ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
   1.126 +            : next(0)
   1.127 +        {
   1.128 +            unsigned firstSubpatternId = term.atom.subpatternId;
   1.129 +            unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
   1.130 +
   1.131 +            for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
   1.132 +                subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
   1.133 +                output[(firstSubpatternId << 1) + i] = offsetNoMatch;
   1.134 +            }
   1.135 +
   1.136 +            new (getDisjunctionContext(term)) DisjunctionContext();
   1.137 +        }
   1.138 +
   1.139 +        void* operator new(size_t, void* where)
   1.140 +        {
   1.141 +            return where;
   1.142 +        }
   1.143 +
   1.144 +        void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
   1.145 +        {
   1.146 +            for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
   1.147 +                output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
   1.148 +        }
   1.149 +
   1.150 +        DisjunctionContext* getDisjunctionContext(ByteTerm& term)
   1.151 +        {
   1.152 +            return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
   1.153 +        }
   1.154 +
   1.155 +        ParenthesesDisjunctionContext* next;
   1.156 +        unsigned subpatternBackup[1];
   1.157 +    };
   1.158 +
   1.159 +    ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
   1.160 +    {
   1.161 +        size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
   1.162 +        size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext));
   1.163 +        allocatorPool = allocatorPool->ensureCapacity(size);
   1.164 +        if (!allocatorPool)
   1.165 +            CRASH();
   1.166 +        return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
   1.167 +    }
   1.168 +
   1.169 +    void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
   1.170 +    {
   1.171 +        allocatorPool = allocatorPool->dealloc(context);
   1.172 +    }
   1.173 +
   1.174 +    class InputStream {
   1.175 +    public:
   1.176 +        InputStream(const CharType* input, unsigned start, unsigned length)
   1.177 +            : input(input)
   1.178 +            , pos(start)
   1.179 +            , length(length)
   1.180 +        {
   1.181 +        }
   1.182 +
   1.183 +        void next()
   1.184 +        {
   1.185 +            ++pos;
   1.186 +        }
   1.187 +
   1.188 +        void rewind(unsigned amount)
   1.189 +        {
   1.190 +            ASSERT(pos >= amount);
   1.191 +            pos -= amount;
   1.192 +        }
   1.193 +
   1.194 +        int read()
   1.195 +        {
   1.196 +            ASSERT(pos < length);
   1.197 +            if (pos < length)
   1.198 +                return input[pos];
   1.199 +            return -1;
   1.200 +        }
   1.201 +
   1.202 +        int readPair()
   1.203 +        {
   1.204 +            ASSERT(pos + 1 < length);
   1.205 +            return input[pos] | input[pos + 1] << 16;
   1.206 +        }
   1.207 +
   1.208 +        int readChecked(unsigned negativePositionOffest)
   1.209 +        {
   1.210 +            if (pos < negativePositionOffest)
   1.211 +                CRASH();
   1.212 +            unsigned p = pos - negativePositionOffest;
   1.213 +            ASSERT(p < length);
   1.214 +            return input[p];
   1.215 +        }
   1.216 +
   1.217 +        int reread(unsigned from)
   1.218 +        {
   1.219 +            ASSERT(from < length);
   1.220 +            return input[from];
   1.221 +        }
   1.222 +
   1.223 +        int prev()
   1.224 +        {
   1.225 +            ASSERT(!(pos > length));
   1.226 +            if (pos && length)
   1.227 +                return input[pos - 1];
   1.228 +            return -1;
   1.229 +        }
   1.230 +
   1.231 +        unsigned getPos()
   1.232 +        {
   1.233 +            return pos;
   1.234 +        }
   1.235 +
   1.236 +        void setPos(unsigned p)
   1.237 +        {
   1.238 +            pos = p;
   1.239 +        }
   1.240 +
   1.241 +        bool atStart()
   1.242 +        {
   1.243 +            return pos == 0;
   1.244 +        }
   1.245 +
   1.246 +        bool atEnd()
   1.247 +        {
   1.248 +            return pos == length;
   1.249 +        }
   1.250 +
   1.251 +        unsigned end()
   1.252 +        {
   1.253 +            return length;
   1.254 +        }
   1.255 +
   1.256 +        bool checkInput(unsigned count)
   1.257 +        {
   1.258 +            if (((pos + count) <= length) && ((pos + count) >= pos)) {
   1.259 +                pos += count;
   1.260 +                return true;
   1.261 +            }
   1.262 +            return false;
   1.263 +        }
   1.264 +
   1.265 +        void uncheckInput(unsigned count)
   1.266 +        {
   1.267 +            if (pos < count)
   1.268 +                CRASH();
   1.269 +            pos -= count;
   1.270 +        }
   1.271 +
   1.272 +        bool atStart(unsigned negativePositionOffest)
   1.273 +        {
   1.274 +            return pos == negativePositionOffest;
   1.275 +        }
   1.276 +
   1.277 +        bool atEnd(unsigned negativePositionOffest)
   1.278 +        {
   1.279 +            if (pos < negativePositionOffest)
   1.280 +                CRASH();
   1.281 +            return (pos - negativePositionOffest) == length;
   1.282 +        }
   1.283 +
   1.284 +        bool isAvailableInput(unsigned offset)
   1.285 +        {
   1.286 +            return (((pos + offset) <= length) && ((pos + offset) >= pos));
   1.287 +        }
   1.288 +
   1.289 +    private:
   1.290 +        const CharType* input;
   1.291 +        unsigned pos;
   1.292 +        unsigned length;
   1.293 +    };
   1.294 +
   1.295 +    bool testCharacterClass(CharacterClass* characterClass, int ch)
   1.296 +    {
   1.297 +        if (ch & 0xFF80) {
   1.298 +            for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
   1.299 +                if (ch == characterClass->m_matchesUnicode[i])
   1.300 +                    return true;
   1.301 +            for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
   1.302 +                if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
   1.303 +                    return true;
   1.304 +        } else {
   1.305 +            for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
   1.306 +                if (ch == characterClass->m_matches[i])
   1.307 +                    return true;
   1.308 +            for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
   1.309 +                if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
   1.310 +                    return true;
   1.311 +        }
   1.312 +
   1.313 +        return false;
   1.314 +    }
   1.315 +
   1.316 +    bool checkCharacter(int testChar, unsigned negativeInputOffset)
   1.317 +    {
   1.318 +        return testChar == input.readChecked(negativeInputOffset);
   1.319 +    }
   1.320 +
   1.321 +    bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
   1.322 +    {
   1.323 +        int ch = input.readChecked(negativeInputOffset);
   1.324 +        return (loChar == ch) || (hiChar == ch);
   1.325 +    }
   1.326 +
   1.327 +    bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
   1.328 +    {
   1.329 +        bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
   1.330 +        return invert ? !match : match;
   1.331 +    }
   1.332 +
   1.333 +    bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
   1.334 +    {
   1.335 +        unsigned matchSize = (unsigned)(matchEnd - matchBegin);
   1.336 +
   1.337 +        if (!input.checkInput(matchSize))
   1.338 +            return false;
   1.339 +
   1.340 +        if (pattern->m_ignoreCase) {
   1.341 +            for (unsigned i = 0; i < matchSize; ++i) {
   1.342 +                int oldCh = input.reread(matchBegin + i);
   1.343 +                int ch = input.readChecked(negativeInputOffset + matchSize - i);
   1.344 +
   1.345 +                if (oldCh == ch)
   1.346 +                    continue;
   1.347 +
   1.348 +                // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
   1.349 +                // unicode values are never allowed to match against ascii ones.
   1.350 +                if (isASCII(oldCh) || isASCII(ch)) {
   1.351 +                    if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
   1.352 +                        continue;
   1.353 +                } else if (areCanonicallyEquivalent(oldCh, ch))
   1.354 +                    continue;
   1.355 +
   1.356 +                input.uncheckInput(matchSize);
   1.357 +                return false;
   1.358 +            }
   1.359 +        } else {
   1.360 +            for (unsigned i = 0; i < matchSize; ++i) {
   1.361 +                if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
   1.362 +                    input.uncheckInput(matchSize);
   1.363 +                    return false;
   1.364 +                }
   1.365 +            }
   1.366 +        }
   1.367 +
   1.368 +        return true;
   1.369 +    }
   1.370 +
   1.371 +    bool matchAssertionBOL(ByteTerm& term)
   1.372 +    {
   1.373 +        return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
   1.374 +    }
   1.375 +
   1.376 +    bool matchAssertionEOL(ByteTerm& term)
   1.377 +    {
   1.378 +        if (term.inputPosition)
   1.379 +            return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
   1.380 +
   1.381 +        return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
   1.382 +    }
   1.383 +
   1.384 +    bool matchAssertionWordBoundary(ByteTerm& term)
   1.385 +    {
   1.386 +        bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
   1.387 +        bool readIsWordchar;
   1.388 +        if (term.inputPosition)
   1.389 +            readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
   1.390 +        else
   1.391 +            readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
   1.392 +
   1.393 +        bool wordBoundary = prevIsWordchar != readIsWordchar;
   1.394 +        return term.invert() ? !wordBoundary : wordBoundary;
   1.395 +    }
   1.396 +
   1.397 +    bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
   1.398 +    {
   1.399 +        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
   1.400 +
   1.401 +        switch (term.atom.quantityType) {
   1.402 +        case QuantifierFixedCount:
   1.403 +            break;
   1.404 +
   1.405 +        case QuantifierGreedy:
   1.406 +            if (backTrack->matchAmount) {
   1.407 +                --backTrack->matchAmount;
   1.408 +                input.uncheckInput(1);
   1.409 +                return true;
   1.410 +            }
   1.411 +            break;
   1.412 +
   1.413 +        case QuantifierNonGreedy:
   1.414 +            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
   1.415 +                ++backTrack->matchAmount;
   1.416 +                if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
   1.417 +                    return true;
   1.418 +            }
   1.419 +            input.uncheckInput(backTrack->matchAmount);
   1.420 +            break;
   1.421 +        }
   1.422 +
   1.423 +        return false;
   1.424 +    }
   1.425 +
   1.426 +    bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
   1.427 +    {
   1.428 +        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
   1.429 +
   1.430 +        switch (term.atom.quantityType) {
   1.431 +        case QuantifierFixedCount:
   1.432 +            break;
   1.433 +
   1.434 +        case QuantifierGreedy:
   1.435 +            if (backTrack->matchAmount) {
   1.436 +                --backTrack->matchAmount;
   1.437 +                input.uncheckInput(1);
   1.438 +                return true;
   1.439 +            }
   1.440 +            break;
   1.441 +
   1.442 +        case QuantifierNonGreedy:
   1.443 +            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
   1.444 +                ++backTrack->matchAmount;
   1.445 +                if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
   1.446 +                    return true;
   1.447 +            }
   1.448 +            input.uncheckInput(backTrack->matchAmount);
   1.449 +            break;
   1.450 +        }
   1.451 +
   1.452 +        return false;
   1.453 +    }
   1.454 +
   1.455 +    bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
   1.456 +    {
   1.457 +        ASSERT(term.type == ByteTerm::TypeCharacterClass);
   1.458 +        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
   1.459 +
   1.460 +        switch (term.atom.quantityType) {
   1.461 +        case QuantifierFixedCount: {
   1.462 +            for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
   1.463 +                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
   1.464 +                    return false;
   1.465 +            }
   1.466 +            return true;
   1.467 +        }
   1.468 +
   1.469 +        case QuantifierGreedy: {
   1.470 +            unsigned matchAmount = 0;
   1.471 +            while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
   1.472 +                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
   1.473 +                    input.uncheckInput(1);
   1.474 +                    break;
   1.475 +                }
   1.476 +                ++matchAmount;
   1.477 +            }
   1.478 +            backTrack->matchAmount = matchAmount;
   1.479 +
   1.480 +            return true;
   1.481 +        }
   1.482 +
   1.483 +        case QuantifierNonGreedy:
   1.484 +            backTrack->matchAmount = 0;
   1.485 +            return true;
   1.486 +        }
   1.487 +
   1.488 +        ASSERT_NOT_REACHED();
   1.489 +        return false;
   1.490 +    }
   1.491 +
   1.492 +    bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
   1.493 +    {
   1.494 +        ASSERT(term.type == ByteTerm::TypeCharacterClass);
   1.495 +        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
   1.496 +
   1.497 +        switch (term.atom.quantityType) {
   1.498 +        case QuantifierFixedCount:
   1.499 +            break;
   1.500 +
   1.501 +        case QuantifierGreedy:
   1.502 +            if (backTrack->matchAmount) {
   1.503 +                --backTrack->matchAmount;
   1.504 +                input.uncheckInput(1);
   1.505 +                return true;
   1.506 +            }
   1.507 +            break;
   1.508 +
   1.509 +        case QuantifierNonGreedy:
   1.510 +            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
   1.511 +                ++backTrack->matchAmount;
   1.512 +                if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
   1.513 +                    return true;
   1.514 +            }
   1.515 +            input.uncheckInput(backTrack->matchAmount);
   1.516 +            break;
   1.517 +        }
   1.518 +
   1.519 +        return false;
   1.520 +    }
   1.521 +
   1.522 +    bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
   1.523 +    {
   1.524 +        ASSERT(term.type == ByteTerm::TypeBackReference);
   1.525 +        BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
   1.526 +
   1.527 +        unsigned matchBegin = output[(term.atom.subpatternId << 1)];
   1.528 +        unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
   1.529 +
   1.530 +        // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
   1.531 +        // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
   1.532 +        // Eg.: /(a\1)/
   1.533 +        if (matchEnd == offsetNoMatch)
   1.534 +            return true;
   1.535 +
   1.536 +        if (matchBegin == offsetNoMatch)
   1.537 +            return true;
   1.538 +
   1.539 +        ASSERT(matchBegin <= matchEnd);
   1.540 +
   1.541 +        if (matchBegin == matchEnd)
   1.542 +            return true;
   1.543 +
   1.544 +        switch (term.atom.quantityType) {
   1.545 +        case QuantifierFixedCount: {
   1.546 +            backTrack->begin = input.getPos();
   1.547 +            for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
   1.548 +                if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
   1.549 +                    input.setPos(backTrack->begin);
   1.550 +                    return false;
   1.551 +                }
   1.552 +            }
   1.553 +            return true;
   1.554 +        }
   1.555 +
   1.556 +        case QuantifierGreedy: {
   1.557 +            unsigned matchAmount = 0;
   1.558 +            while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
   1.559 +                ++matchAmount;
   1.560 +            backTrack->matchAmount = matchAmount;
   1.561 +            return true;
   1.562 +        }
   1.563 +
   1.564 +        case QuantifierNonGreedy:
   1.565 +            backTrack->begin = input.getPos();
   1.566 +            backTrack->matchAmount = 0;
   1.567 +            return true;
   1.568 +        }
   1.569 +
   1.570 +        ASSERT_NOT_REACHED();
   1.571 +        return false;
   1.572 +    }
   1.573 +
   1.574 +    bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
   1.575 +    {
   1.576 +        ASSERT(term.type == ByteTerm::TypeBackReference);
   1.577 +        BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
   1.578 +
   1.579 +        unsigned matchBegin = output[(term.atom.subpatternId << 1)];
   1.580 +        unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
   1.581 +
   1.582 +        if (matchBegin == offsetNoMatch)
   1.583 +            return false;
   1.584 +
   1.585 +        ASSERT(matchBegin <= matchEnd);
   1.586 +
   1.587 +        if (matchBegin == matchEnd)
   1.588 +            return false;
   1.589 +
   1.590 +        switch (term.atom.quantityType) {
   1.591 +        case QuantifierFixedCount:
   1.592 +            // for quantityCount == 1, could rewind.
   1.593 +            input.setPos(backTrack->begin);
   1.594 +            break;
   1.595 +
   1.596 +        case QuantifierGreedy:
   1.597 +            if (backTrack->matchAmount) {
   1.598 +                --backTrack->matchAmount;
   1.599 +                input.rewind(matchEnd - matchBegin);
   1.600 +                return true;
   1.601 +            }
   1.602 +            break;
   1.603 +
   1.604 +        case QuantifierNonGreedy:
   1.605 +            if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
   1.606 +                ++backTrack->matchAmount;
   1.607 +                return true;
   1.608 +            }
   1.609 +            input.setPos(backTrack->begin);
   1.610 +            break;
   1.611 +        }
   1.612 +
   1.613 +        return false;
   1.614 +    }
   1.615 +
   1.616 +    void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
   1.617 +    {
   1.618 +        if (term.capture()) {
   1.619 +            unsigned subpatternId = term.atom.subpatternId;
   1.620 +            output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
   1.621 +            output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
   1.622 +        }
   1.623 +    }
   1.624 +    void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
   1.625 +    {
   1.626 +        unsigned firstSubpatternId = term.atom.subpatternId;
   1.627 +        unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
   1.628 +        context->restoreOutput(output, firstSubpatternId, count);
   1.629 +    }
   1.630 +    JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
   1.631 +    {
   1.632 +        while (backTrack->matchAmount) {
   1.633 +            ParenthesesDisjunctionContext* context = backTrack->lastContext;
   1.634 +
   1.635 +            JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
   1.636 +            if (result == JSRegExpMatch)
   1.637 +                return JSRegExpMatch;
   1.638 +
   1.639 +            resetMatches(term, context);
   1.640 +            popParenthesesDisjunctionContext(backTrack);
   1.641 +            freeParenthesesDisjunctionContext(context);
   1.642 +
   1.643 +            if (result != JSRegExpNoMatch)
   1.644 +                return result;
   1.645 +        }
   1.646 +
   1.647 +        return JSRegExpNoMatch;
   1.648 +    }
   1.649 +
   1.650 +    bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
   1.651 +    {
   1.652 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
   1.653 +        ASSERT(term.atom.quantityCount == 1);
   1.654 +
   1.655 +        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
   1.656 +
   1.657 +        switch (term.atom.quantityType) {
   1.658 +        case QuantifierGreedy: {
   1.659 +            // set this speculatively; if we get to the parens end this will be true.
   1.660 +            backTrack->begin = input.getPos();
   1.661 +            break;
   1.662 +        }
   1.663 +        case QuantifierNonGreedy: {
   1.664 +            backTrack->begin = notFound;
   1.665 +            context->term += term.atom.parenthesesWidth;
   1.666 +            return true;
   1.667 +        }
   1.668 +        case QuantifierFixedCount:
   1.669 +            break;
   1.670 +        }
   1.671 +
   1.672 +        if (term.capture()) {
   1.673 +            unsigned subpatternId = term.atom.subpatternId;
   1.674 +            output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
   1.675 +        }
   1.676 +
   1.677 +        return true;
   1.678 +    }
   1.679 +
   1.680 +    bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
   1.681 +    {
   1.682 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
   1.683 +        ASSERT(term.atom.quantityCount == 1);
   1.684 +
   1.685 +        if (term.capture()) {
   1.686 +            unsigned subpatternId = term.atom.subpatternId;
   1.687 +            output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
   1.688 +        }
   1.689 +
   1.690 +        if (term.atom.quantityType == QuantifierFixedCount)
   1.691 +            return true;
   1.692 +
   1.693 +        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
   1.694 +        return backTrack->begin != input.getPos();
   1.695 +    }
   1.696 +
   1.697 +    bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
   1.698 +    {
   1.699 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
   1.700 +        ASSERT(term.atom.quantityCount == 1);
   1.701 +
   1.702 +        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
   1.703 +
   1.704 +        if (term.capture()) {
   1.705 +            unsigned subpatternId = term.atom.subpatternId;
   1.706 +            output[(subpatternId << 1)] = offsetNoMatch;
   1.707 +            output[(subpatternId << 1) + 1] = offsetNoMatch;
   1.708 +        }
   1.709 +
   1.710 +        switch (term.atom.quantityType) {
   1.711 +        case QuantifierGreedy:
   1.712 +            // if we backtrack to this point, there is another chance - try matching nothing.
   1.713 +            ASSERT(backTrack->begin != notFound);
   1.714 +            backTrack->begin = notFound;
   1.715 +            context->term += term.atom.parenthesesWidth;
   1.716 +            return true;
   1.717 +        case QuantifierNonGreedy:
   1.718 +            ASSERT(backTrack->begin != notFound);
   1.719 +        case QuantifierFixedCount:
   1.720 +            break;
   1.721 +        }
   1.722 +
   1.723 +        return false;
   1.724 +    }
   1.725 +
   1.726 +    bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
   1.727 +    {
   1.728 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
   1.729 +        ASSERT(term.atom.quantityCount == 1);
   1.730 +
   1.731 +        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
   1.732 +
   1.733 +        switch (term.atom.quantityType) {
   1.734 +        case QuantifierGreedy:
   1.735 +            if (backTrack->begin == notFound) {
   1.736 +                context->term -= term.atom.parenthesesWidth;
   1.737 +                return false;
   1.738 +            }
   1.739 +        case QuantifierNonGreedy:
   1.740 +            if (backTrack->begin == notFound) {
   1.741 +                backTrack->begin = input.getPos();
   1.742 +                if (term.capture()) {
   1.743 +                    // Technically this access to inputPosition should be accessing the begin term's
   1.744 +                    // inputPosition, but for repeats other than fixed these values should be
   1.745 +                    // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
   1.746 +                    ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
   1.747 +
   1.748 +		    // Disabled, see bug 808478
   1.749 +#if 0
   1.750 +                    ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
   1.751 +#endif
   1.752 +                    unsigned subpatternId = term.atom.subpatternId;
   1.753 +                    output[subpatternId << 1] = input.getPos() + term.inputPosition;
   1.754 +                }
   1.755 +                context->term -= term.atom.parenthesesWidth;
   1.756 +                return true;
   1.757 +            }
   1.758 +        case QuantifierFixedCount:
   1.759 +            break;
   1.760 +        }
   1.761 +
   1.762 +        return false;
   1.763 +    }
   1.764 +
   1.765 +    bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
   1.766 +    {
   1.767 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
   1.768 +        ASSERT(term.atom.quantityType == QuantifierGreedy);
   1.769 +        ASSERT(term.atom.quantityCount == quantifyInfinite);
   1.770 +        ASSERT(!term.capture());
   1.771 +
   1.772 +        BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
   1.773 +        backTrack->begin = input.getPos();
   1.774 +        return true;
   1.775 +    }
   1.776 +
   1.777 +    bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
   1.778 +    {
   1.779 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
   1.780 +
   1.781 +        BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
   1.782 +        // Empty match is a failed match.
   1.783 +        if (backTrack->begin == input.getPos())
   1.784 +            return false;
   1.785 +
   1.786 +        // Successful match! Okay, what's next? - loop around and try to match moar!
   1.787 +        context->term -= (term.atom.parenthesesWidth + 1);
   1.788 +        return true;
   1.789 +    }
   1.790 +
   1.791 +    bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
   1.792 +    {
   1.793 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
   1.794 +        ASSERT(term.atom.quantityType == QuantifierGreedy);
   1.795 +        ASSERT(term.atom.quantityCount == quantifyInfinite);
   1.796 +        ASSERT(!term.capture());
   1.797 +
   1.798 +        // If we backtrack to this point, we have failed to match this iteration of the parens.
   1.799 +        // Since this is greedy / zero minimum a failed is also accepted as a match!
   1.800 +        context->term += term.atom.parenthesesWidth;
   1.801 +        return true;
   1.802 +    }
   1.803 +
   1.804 +    bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
   1.805 +    {
   1.806 +        // 'Terminal' parentheses are at the end of the regex, and as such a match past end
   1.807 +        // should always be returned as a successful match - we should never backtrack to here.
   1.808 +        ASSERT_NOT_REACHED();
   1.809 +        return false;
   1.810 +    }
   1.811 +
   1.812 +    bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
   1.813 +    {
   1.814 +        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
   1.815 +        ASSERT(term.atom.quantityCount == 1);
   1.816 +
   1.817 +        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
   1.818 +
   1.819 +        backTrack->begin = input.getPos();
   1.820 +        return true;
   1.821 +    }
   1.822 +
   1.823 +    bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
   1.824 +    {
   1.825 +        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
   1.826 +        ASSERT(term.atom.quantityCount == 1);
   1.827 +
   1.828 +        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
   1.829 +
   1.830 +        input.setPos(backTrack->begin);
   1.831 +
   1.832 +        // We've reached the end of the parens; if they are inverted, this is failure.
   1.833 +        if (term.invert()) {
   1.834 +            context->term -= term.atom.parenthesesWidth;
   1.835 +            return false;
   1.836 +        }
   1.837 +
   1.838 +        return true;
   1.839 +    }
   1.840 +
   1.841 +    bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
   1.842 +    {
   1.843 +        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
   1.844 +        ASSERT(term.atom.quantityCount == 1);
   1.845 +
   1.846 +        // We've failed to match parens; if they are inverted, this is win!
   1.847 +        if (term.invert()) {
   1.848 +            context->term += term.atom.parenthesesWidth;
   1.849 +            return true;
   1.850 +        }
   1.851 +
   1.852 +        return false;
   1.853 +    }
   1.854 +
   1.855 +    bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
   1.856 +    {
   1.857 +        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
   1.858 +        ASSERT(term.atom.quantityCount == 1);
   1.859 +
   1.860 +        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
   1.861 +
   1.862 +        input.setPos(backTrack->begin);
   1.863 +
   1.864 +        context->term -= term.atom.parenthesesWidth;
   1.865 +        return false;
   1.866 +    }
   1.867 +
   1.868 +    JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
   1.869 +    {
   1.870 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
   1.871 +
   1.872 +        BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
   1.873 +        ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
   1.874 +
   1.875 +        backTrack->matchAmount = 0;
   1.876 +        backTrack->lastContext = 0;
   1.877 +
   1.878 +        switch (term.atom.quantityType) {
   1.879 +        case QuantifierFixedCount: {
   1.880 +            // While we haven't yet reached our fixed limit,
   1.881 +            while (backTrack->matchAmount < term.atom.quantityCount) {
   1.882 +                // Try to do a match, and it it succeeds, add it to the list.
   1.883 +                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
   1.884 +                JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
   1.885 +                if (result == JSRegExpMatch)
   1.886 +                    appendParenthesesDisjunctionContext(backTrack, context);
   1.887 +                else {
   1.888 +                    // The match failed; try to find an alternate point to carry on from.
   1.889 +                    resetMatches(term, context);
   1.890 +                    freeParenthesesDisjunctionContext(context);
   1.891 +
   1.892 +                    if (result != JSRegExpNoMatch)
   1.893 +                        return result;
   1.894 +                    JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
   1.895 +                    if (backtrackResult != JSRegExpMatch)
   1.896 +                        return backtrackResult;
   1.897 +                }
   1.898 +            }
   1.899 +
   1.900 +            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
   1.901 +            ParenthesesDisjunctionContext* context = backTrack->lastContext;
   1.902 +            recordParenthesesMatch(term, context);
   1.903 +            return JSRegExpMatch;
   1.904 +        }
   1.905 +
   1.906 +        case QuantifierGreedy: {
   1.907 +            while (backTrack->matchAmount < term.atom.quantityCount) {
   1.908 +                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
   1.909 +                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
   1.910 +                if (result == JSRegExpMatch)
   1.911 +                    appendParenthesesDisjunctionContext(backTrack, context);
   1.912 +                else {
   1.913 +                    resetMatches(term, context);
   1.914 +                    freeParenthesesDisjunctionContext(context);
   1.915 +
   1.916 +                    if (result != JSRegExpNoMatch)
   1.917 +                        return result;
   1.918 +
   1.919 +                    break;
   1.920 +                }
   1.921 +            }
   1.922 +
   1.923 +            if (backTrack->matchAmount) {
   1.924 +                ParenthesesDisjunctionContext* context = backTrack->lastContext;
   1.925 +                recordParenthesesMatch(term, context);
   1.926 +            }
   1.927 +            return JSRegExpMatch;
   1.928 +        }
   1.929 +
   1.930 +        case QuantifierNonGreedy:
   1.931 +            return JSRegExpMatch;
   1.932 +        }
   1.933 +
   1.934 +        ASSERT_NOT_REACHED();
   1.935 +        return JSRegExpErrorNoMatch;
   1.936 +    }
   1.937 +
   1.938 +    // Rules for backtracking differ depending on whether this is greedy or non-greedy.
   1.939 +    //
   1.940 +    // Greedy matches never should try just adding more - you should already have done
   1.941 +    // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
   1.942 +    // you backtrack an item off the list needs checking, since we'll never have matched
   1.943 +    // the one less case.  Tracking forwards, still add as much as possible.
   1.944 +    //
   1.945 +    // Non-greedy, we've already done the one less case, so don't match on popping.
   1.946 +    // We haven't done the one more case, so always try to add that.
   1.947 +    //
   1.948 +    JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
   1.949 +    {
   1.950 +        ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
   1.951 +
   1.952 +        BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
   1.953 +        ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
   1.954 +
   1.955 +        switch (term.atom.quantityType) {
   1.956 +        case QuantifierFixedCount: {
   1.957 +            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
   1.958 +
   1.959 +            ParenthesesDisjunctionContext* context = 0;
   1.960 +            JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
   1.961 +
   1.962 +            if (result != JSRegExpMatch)
   1.963 +                return result;
   1.964 +
   1.965 +            // While we haven't yet reached our fixed limit,
   1.966 +            while (backTrack->matchAmount < term.atom.quantityCount) {
   1.967 +                // Try to do a match, and it it succeeds, add it to the list.
   1.968 +                context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
   1.969 +                result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
   1.970 +
   1.971 +                if (result == JSRegExpMatch)
   1.972 +                    appendParenthesesDisjunctionContext(backTrack, context);
   1.973 +                else {
   1.974 +                    // The match failed; try to find an alternate point to carry on from.
   1.975 +                    resetMatches(term, context);
   1.976 +                    freeParenthesesDisjunctionContext(context);
   1.977 +
   1.978 +                    if (result != JSRegExpNoMatch)
   1.979 +                        return result;
   1.980 +                    JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
   1.981 +                    if (backtrackResult != JSRegExpMatch)
   1.982 +                        return backtrackResult;
   1.983 +                }
   1.984 +            }
   1.985 +
   1.986 +            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
   1.987 +            context = backTrack->lastContext;
   1.988 +            recordParenthesesMatch(term, context);
   1.989 +            return JSRegExpMatch;
   1.990 +        }
   1.991 +
   1.992 +        case QuantifierGreedy: {
   1.993 +            if (!backTrack->matchAmount)
   1.994 +                return JSRegExpNoMatch;
   1.995 +
   1.996 +            ParenthesesDisjunctionContext* context = backTrack->lastContext;
   1.997 +            JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
   1.998 +            if (result == JSRegExpMatch) {
   1.999 +                while (backTrack->matchAmount < term.atom.quantityCount) {
  1.1000 +                    ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
  1.1001 +                    JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
  1.1002 +                    if (parenthesesResult == JSRegExpMatch)
  1.1003 +                        appendParenthesesDisjunctionContext(backTrack, context);
  1.1004 +                    else {
  1.1005 +                        resetMatches(term, context);
  1.1006 +                        freeParenthesesDisjunctionContext(context);
  1.1007 +
  1.1008 +                        if (parenthesesResult != JSRegExpNoMatch)
  1.1009 +                            return parenthesesResult;
  1.1010 +
  1.1011 +                        break;
  1.1012 +                    }
  1.1013 +                }
  1.1014 +            } else {
  1.1015 +                // Avoid a topcrash before it occurs.
  1.1016 +                if (!backTrack->lastContext) {
  1.1017 +                    ASSERT(!"Tripped Bug 856796!");
  1.1018 +                    return JSRegExpErrorInternal;
  1.1019 +                }
  1.1020 +
  1.1021 +                resetMatches(term, context);
  1.1022 +                popParenthesesDisjunctionContext(backTrack);
  1.1023 +                freeParenthesesDisjunctionContext(context);
  1.1024 +
  1.1025 +                if (result != JSRegExpNoMatch)
  1.1026 +                    return result;
  1.1027 +            }
  1.1028 +
  1.1029 +            if (backTrack->matchAmount) {
  1.1030 +                ParenthesesDisjunctionContext* context = backTrack->lastContext;
  1.1031 +                recordParenthesesMatch(term, context);
  1.1032 +            }
  1.1033 +            return JSRegExpMatch;
  1.1034 +        }
  1.1035 +
  1.1036 +        case QuantifierNonGreedy: {
  1.1037 +            // If we've not reached the limit, try to add one more match.
  1.1038 +            if (backTrack->matchAmount < term.atom.quantityCount) {
  1.1039 +                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
  1.1040 +                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
  1.1041 +                if (result == JSRegExpMatch) {
  1.1042 +                    appendParenthesesDisjunctionContext(backTrack, context);
  1.1043 +                    recordParenthesesMatch(term, context);
  1.1044 +                    return JSRegExpMatch;
  1.1045 +                }
  1.1046 +
  1.1047 +                resetMatches(term, context);
  1.1048 +                freeParenthesesDisjunctionContext(context);
  1.1049 +
  1.1050 +                if (result != JSRegExpNoMatch)
  1.1051 +                    return result;
  1.1052 +            }
  1.1053 +
  1.1054 +            // Nope - okay backtrack looking for an alternative.
  1.1055 +            while (backTrack->matchAmount) {
  1.1056 +                ParenthesesDisjunctionContext* context = backTrack->lastContext;
  1.1057 +                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
  1.1058 +                if (result == JSRegExpMatch) {
  1.1059 +                    // successful backtrack! we're back in the game!
  1.1060 +                    if (backTrack->matchAmount) {
  1.1061 +                        context = backTrack->lastContext;
  1.1062 +                        recordParenthesesMatch(term, context);
  1.1063 +                    }
  1.1064 +                    return JSRegExpMatch;
  1.1065 +                }
  1.1066 +
  1.1067 +                // Avoid a topcrash before it occurs.
  1.1068 +                if (!backTrack->lastContext) {
  1.1069 +                    ASSERT(!"Tripped Bug 856796!");
  1.1070 +                    return JSRegExpErrorInternal;
  1.1071 +                }
  1.1072 +
  1.1073 +                // pop a match off the stack
  1.1074 +                resetMatches(term, context);
  1.1075 +                popParenthesesDisjunctionContext(backTrack);
  1.1076 +                freeParenthesesDisjunctionContext(context);
  1.1077 +
  1.1078 +                if (result != JSRegExpNoMatch)
  1.1079 +                    return result;
  1.1080 +            }
  1.1081 +
  1.1082 +            return JSRegExpNoMatch;
  1.1083 +        }
  1.1084 +        }
  1.1085 +
  1.1086 +        ASSERT_NOT_REACHED();
  1.1087 +        return JSRegExpErrorNoMatch;
  1.1088 +    }
  1.1089 +
  1.1090 +    bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
  1.1091 +    {
  1.1092 +        UNUSED_PARAM(term);
  1.1093 +        unsigned matchBegin = context->matchBegin;
  1.1094 +
  1.1095 +        if (matchBegin) {
  1.1096 +            for (matchBegin--; true; matchBegin--) {
  1.1097 +                if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
  1.1098 +                    ++matchBegin;
  1.1099 +                    break;
  1.1100 +                }
  1.1101 +
  1.1102 +                if (!matchBegin)
  1.1103 +                    break;
  1.1104 +            }
  1.1105 +        }
  1.1106 +
  1.1107 +        unsigned matchEnd = input.getPos();
  1.1108 +
  1.1109 +        for (; (matchEnd != input.end())
  1.1110 +             && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
  1.1111 +
  1.1112 +        if (((matchBegin && term.anchors.m_bol)
  1.1113 +             || ((matchEnd != input.end()) && term.anchors.m_eol))
  1.1114 +            && !pattern->m_multiline)
  1.1115 +            return false;
  1.1116 +
  1.1117 +        context->matchBegin = matchBegin;
  1.1118 +        context->matchEnd = matchEnd;
  1.1119 +        return true;
  1.1120 +    }
  1.1121 +
  1.1122 +#define MATCH_NEXT() { ++context->term; goto matchAgain; }
  1.1123 +#define BACKTRACK() { --context->term; goto backtrack; }
  1.1124 +#define currentTerm() (disjunction->terms[context->term])
  1.1125 +    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
  1.1126 +    {
  1.1127 +        if (!--remainingMatchCount)
  1.1128 +            return JSRegExpErrorHitLimit;
  1.1129 +
  1.1130 +        if (btrack)
  1.1131 +            BACKTRACK();
  1.1132 +
  1.1133 +        context->matchBegin = input.getPos();
  1.1134 +        context->term = 0;
  1.1135 +
  1.1136 +    matchAgain:
  1.1137 +        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
  1.1138 +
  1.1139 +        // Prevent jank resulting from getting stuck in Yarr for a long time.
  1.1140 +        if (!CheckForInterrupt(this->cx))
  1.1141 +            return JSRegExpErrorInternal;
  1.1142 +
  1.1143 +        switch (currentTerm().type) {
  1.1144 +        case ByteTerm::TypeSubpatternBegin:
  1.1145 +            MATCH_NEXT();
  1.1146 +        case ByteTerm::TypeSubpatternEnd:
  1.1147 +            context->matchEnd = input.getPos();
  1.1148 +            return JSRegExpMatch;
  1.1149 +
  1.1150 +        case ByteTerm::TypeBodyAlternativeBegin:
  1.1151 +            MATCH_NEXT();
  1.1152 +        case ByteTerm::TypeBodyAlternativeDisjunction:
  1.1153 +        case ByteTerm::TypeBodyAlternativeEnd:
  1.1154 +            context->matchEnd = input.getPos();
  1.1155 +            return JSRegExpMatch;
  1.1156 +
  1.1157 +        case ByteTerm::TypeAlternativeBegin:
  1.1158 +            MATCH_NEXT();
  1.1159 +        case ByteTerm::TypeAlternativeDisjunction:
  1.1160 +        case ByteTerm::TypeAlternativeEnd: {
  1.1161 +            int offset = currentTerm().alternative.end;
  1.1162 +            BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
  1.1163 +            backTrack->offset = offset;
  1.1164 +            context->term += offset;
  1.1165 +            MATCH_NEXT();
  1.1166 +        }
  1.1167 +
  1.1168 +        case ByteTerm::TypeAssertionBOL:
  1.1169 +            if (matchAssertionBOL(currentTerm()))
  1.1170 +                MATCH_NEXT();
  1.1171 +            BACKTRACK();
  1.1172 +        case ByteTerm::TypeAssertionEOL:
  1.1173 +            if (matchAssertionEOL(currentTerm()))
  1.1174 +                MATCH_NEXT();
  1.1175 +            BACKTRACK();
  1.1176 +        case ByteTerm::TypeAssertionWordBoundary:
  1.1177 +            if (matchAssertionWordBoundary(currentTerm()))
  1.1178 +                MATCH_NEXT();
  1.1179 +            BACKTRACK();
  1.1180 +
  1.1181 +        case ByteTerm::TypePatternCharacterOnce:
  1.1182 +        case ByteTerm::TypePatternCharacterFixed: {
  1.1183 +            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
  1.1184 +                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
  1.1185 +                    BACKTRACK();
  1.1186 +            }
  1.1187 +            MATCH_NEXT();
  1.1188 +        }
  1.1189 +        case ByteTerm::TypePatternCharacterGreedy: {
  1.1190 +            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
  1.1191 +            unsigned matchAmount = 0;
  1.1192 +            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
  1.1193 +                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
  1.1194 +                    input.uncheckInput(1);
  1.1195 +                    break;
  1.1196 +                }
  1.1197 +                ++matchAmount;
  1.1198 +            }
  1.1199 +            backTrack->matchAmount = matchAmount;
  1.1200 +
  1.1201 +            MATCH_NEXT();
  1.1202 +        }
  1.1203 +        case ByteTerm::TypePatternCharacterNonGreedy: {
  1.1204 +            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
  1.1205 +            backTrack->matchAmount = 0;
  1.1206 +            MATCH_NEXT();
  1.1207 +        }
  1.1208 +
  1.1209 +        case ByteTerm::TypePatternCasedCharacterOnce:
  1.1210 +        case ByteTerm::TypePatternCasedCharacterFixed: {
  1.1211 +            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
  1.1212 +                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
  1.1213 +                    BACKTRACK();
  1.1214 +            }
  1.1215 +            MATCH_NEXT();
  1.1216 +        }
  1.1217 +        case ByteTerm::TypePatternCasedCharacterGreedy: {
  1.1218 +            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
  1.1219 +            unsigned matchAmount = 0;
  1.1220 +            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
  1.1221 +                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
  1.1222 +                    input.uncheckInput(1);
  1.1223 +                    break;
  1.1224 +                }
  1.1225 +                ++matchAmount;
  1.1226 +            }
  1.1227 +            backTrack->matchAmount = matchAmount;
  1.1228 +
  1.1229 +            MATCH_NEXT();
  1.1230 +        }
  1.1231 +        case ByteTerm::TypePatternCasedCharacterNonGreedy: {
  1.1232 +            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
  1.1233 +            backTrack->matchAmount = 0;
  1.1234 +            MATCH_NEXT();
  1.1235 +        }
  1.1236 +
  1.1237 +        case ByteTerm::TypeCharacterClass:
  1.1238 +            if (matchCharacterClass(currentTerm(), context))
  1.1239 +                MATCH_NEXT();
  1.1240 +            BACKTRACK();
  1.1241 +        case ByteTerm::TypeBackReference:
  1.1242 +            if (matchBackReference(currentTerm(), context))
  1.1243 +                MATCH_NEXT();
  1.1244 +            BACKTRACK();
  1.1245 +        case ByteTerm::TypeParenthesesSubpattern: {
  1.1246 +            JSRegExpResult result = matchParentheses(currentTerm(), context);
  1.1247 +
  1.1248 +            if (result == JSRegExpMatch) {
  1.1249 +                MATCH_NEXT();
  1.1250 +            }  else if (result != JSRegExpNoMatch)
  1.1251 +                return result;
  1.1252 +
  1.1253 +            BACKTRACK();
  1.1254 +        }
  1.1255 +        case ByteTerm::TypeParenthesesSubpatternOnceBegin:
  1.1256 +            if (matchParenthesesOnceBegin(currentTerm(), context))
  1.1257 +                MATCH_NEXT();
  1.1258 +            BACKTRACK();
  1.1259 +        case ByteTerm::TypeParenthesesSubpatternOnceEnd:
  1.1260 +            if (matchParenthesesOnceEnd(currentTerm(), context))
  1.1261 +                MATCH_NEXT();
  1.1262 +            BACKTRACK();
  1.1263 +        case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
  1.1264 +            if (matchParenthesesTerminalBegin(currentTerm(), context))
  1.1265 +                MATCH_NEXT();
  1.1266 +            BACKTRACK();
  1.1267 +        case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
  1.1268 +            if (matchParenthesesTerminalEnd(currentTerm(), context))
  1.1269 +                MATCH_NEXT();
  1.1270 +            BACKTRACK();
  1.1271 +        case ByteTerm::TypeParentheticalAssertionBegin:
  1.1272 +            if (matchParentheticalAssertionBegin(currentTerm(), context))
  1.1273 +                MATCH_NEXT();
  1.1274 +            BACKTRACK();
  1.1275 +        case ByteTerm::TypeParentheticalAssertionEnd:
  1.1276 +            if (matchParentheticalAssertionEnd(currentTerm(), context))
  1.1277 +                MATCH_NEXT();
  1.1278 +            BACKTRACK();
  1.1279 +
  1.1280 +        case ByteTerm::TypeCheckInput:
  1.1281 +            if (input.checkInput(currentTerm().checkInputCount))
  1.1282 +                MATCH_NEXT();
  1.1283 +            BACKTRACK();
  1.1284 +
  1.1285 +        case ByteTerm::TypeUncheckInput:
  1.1286 +            input.uncheckInput(currentTerm().checkInputCount);
  1.1287 +            MATCH_NEXT();
  1.1288 +
  1.1289 +        case ByteTerm::TypeDotStarEnclosure:
  1.1290 +            if (matchDotStarEnclosure(currentTerm(), context))
  1.1291 +                return JSRegExpMatch;
  1.1292 +            BACKTRACK();
  1.1293 +        }
  1.1294 +
  1.1295 +        // We should never fall-through to here.
  1.1296 +        ASSERT_NOT_REACHED();
  1.1297 +
  1.1298 +    backtrack:
  1.1299 +        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
  1.1300 +
  1.1301 +        // Prevent jank resulting from getting stuck in Yarr for a long time.
  1.1302 +        if (!CheckForInterrupt(this->cx))
  1.1303 +            return JSRegExpErrorInternal;
  1.1304 +
  1.1305 +        switch (currentTerm().type) {
  1.1306 +        case ByteTerm::TypeSubpatternBegin:
  1.1307 +            return JSRegExpNoMatch;
  1.1308 +        case ByteTerm::TypeSubpatternEnd:
  1.1309 +            ASSERT_NOT_REACHED();
  1.1310 +
  1.1311 +        case ByteTerm::TypeBodyAlternativeBegin:
  1.1312 +        case ByteTerm::TypeBodyAlternativeDisjunction: {
  1.1313 +            int offset = currentTerm().alternative.next;
  1.1314 +            context->term += offset;
  1.1315 +            if (offset > 0)
  1.1316 +                MATCH_NEXT();
  1.1317 +
  1.1318 +            if (input.atEnd())
  1.1319 +                return JSRegExpNoMatch;
  1.1320 +
  1.1321 +            input.next();
  1.1322 +
  1.1323 +            context->matchBegin = input.getPos();
  1.1324 +
  1.1325 +            if (currentTerm().alternative.onceThrough)
  1.1326 +                context->term += currentTerm().alternative.next;
  1.1327 +
  1.1328 +            MATCH_NEXT();
  1.1329 +        }
  1.1330 +        case ByteTerm::TypeBodyAlternativeEnd:
  1.1331 +            ASSERT_NOT_REACHED();
  1.1332 +
  1.1333 +        case ByteTerm::TypeAlternativeBegin:
  1.1334 +        case ByteTerm::TypeAlternativeDisjunction: {
  1.1335 +            int offset = currentTerm().alternative.next;
  1.1336 +            context->term += offset;
  1.1337 +            if (offset > 0)
  1.1338 +                MATCH_NEXT();
  1.1339 +            BACKTRACK();
  1.1340 +        }
  1.1341 +        case ByteTerm::TypeAlternativeEnd: {
  1.1342 +            // We should never backtrack back into an alternative of the main body of the regex.
  1.1343 +            BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
  1.1344 +            unsigned offset = backTrack->offset;
  1.1345 +            context->term -= offset;
  1.1346 +            BACKTRACK();
  1.1347 +        }
  1.1348 +
  1.1349 +        case ByteTerm::TypeAssertionBOL:
  1.1350 +        case ByteTerm::TypeAssertionEOL:
  1.1351 +        case ByteTerm::TypeAssertionWordBoundary:
  1.1352 +            BACKTRACK();
  1.1353 +
  1.1354 +        case ByteTerm::TypePatternCharacterOnce:
  1.1355 +        case ByteTerm::TypePatternCharacterFixed:
  1.1356 +        case ByteTerm::TypePatternCharacterGreedy:
  1.1357 +        case ByteTerm::TypePatternCharacterNonGreedy:
  1.1358 +            if (backtrackPatternCharacter(currentTerm(), context))
  1.1359 +                MATCH_NEXT();
  1.1360 +            BACKTRACK();
  1.1361 +        case ByteTerm::TypePatternCasedCharacterOnce:
  1.1362 +        case ByteTerm::TypePatternCasedCharacterFixed:
  1.1363 +        case ByteTerm::TypePatternCasedCharacterGreedy:
  1.1364 +        case ByteTerm::TypePatternCasedCharacterNonGreedy:
  1.1365 +            if (backtrackPatternCasedCharacter(currentTerm(), context))
  1.1366 +                MATCH_NEXT();
  1.1367 +            BACKTRACK();
  1.1368 +        case ByteTerm::TypeCharacterClass:
  1.1369 +            if (backtrackCharacterClass(currentTerm(), context))
  1.1370 +                MATCH_NEXT();
  1.1371 +            BACKTRACK();
  1.1372 +        case ByteTerm::TypeBackReference:
  1.1373 +            if (backtrackBackReference(currentTerm(), context))
  1.1374 +                MATCH_NEXT();
  1.1375 +            BACKTRACK();
  1.1376 +        case ByteTerm::TypeParenthesesSubpattern: {
  1.1377 +            JSRegExpResult result = backtrackParentheses(currentTerm(), context);
  1.1378 +
  1.1379 +            if (result == JSRegExpMatch) {
  1.1380 +                MATCH_NEXT();
  1.1381 +            } else if (result != JSRegExpNoMatch)
  1.1382 +                return result;
  1.1383 +
  1.1384 +            BACKTRACK();
  1.1385 +        }
  1.1386 +        case ByteTerm::TypeParenthesesSubpatternOnceBegin:
  1.1387 +            if (backtrackParenthesesOnceBegin(currentTerm(), context))
  1.1388 +                MATCH_NEXT();
  1.1389 +            BACKTRACK();
  1.1390 +        case ByteTerm::TypeParenthesesSubpatternOnceEnd:
  1.1391 +            if (backtrackParenthesesOnceEnd(currentTerm(), context))
  1.1392 +                MATCH_NEXT();
  1.1393 +            BACKTRACK();
  1.1394 +        case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
  1.1395 +            if (backtrackParenthesesTerminalBegin(currentTerm(), context))
  1.1396 +                MATCH_NEXT();
  1.1397 +            BACKTRACK();
  1.1398 +        case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
  1.1399 +            if (backtrackParenthesesTerminalEnd(currentTerm(), context))
  1.1400 +                MATCH_NEXT();
  1.1401 +            BACKTRACK();
  1.1402 +        case ByteTerm::TypeParentheticalAssertionBegin:
  1.1403 +            if (backtrackParentheticalAssertionBegin(currentTerm(), context))
  1.1404 +                MATCH_NEXT();
  1.1405 +            BACKTRACK();
  1.1406 +        case ByteTerm::TypeParentheticalAssertionEnd:
  1.1407 +            if (backtrackParentheticalAssertionEnd(currentTerm(), context))
  1.1408 +                MATCH_NEXT();
  1.1409 +            BACKTRACK();
  1.1410 +
  1.1411 +        case ByteTerm::TypeCheckInput:
  1.1412 +            input.uncheckInput(currentTerm().checkInputCount);
  1.1413 +            BACKTRACK();
  1.1414 +
  1.1415 +        case ByteTerm::TypeUncheckInput:
  1.1416 +            input.checkInput(currentTerm().checkInputCount);
  1.1417 +            BACKTRACK();
  1.1418 +
  1.1419 +        case ByteTerm::TypeDotStarEnclosure:
  1.1420 +            ASSERT_NOT_REACHED();
  1.1421 +        }
  1.1422 +
  1.1423 +        ASSERT_NOT_REACHED();
  1.1424 +        return JSRegExpErrorNoMatch;
  1.1425 +    }
  1.1426 +
  1.1427 +    JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
  1.1428 +    {
  1.1429 +        JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
  1.1430 +
  1.1431 +        if (result == JSRegExpMatch) {
  1.1432 +            while (context->matchBegin == context->matchEnd) {
  1.1433 +                result = matchDisjunction(disjunction, context, true);
  1.1434 +                if (result != JSRegExpMatch)
  1.1435 +                    return result;
  1.1436 +            }
  1.1437 +            return JSRegExpMatch;
  1.1438 +        }
  1.1439 +
  1.1440 +        return result;
  1.1441 +    }
  1.1442 +
  1.1443 +    unsigned interpret()
  1.1444 +    {
  1.1445 +        if (!input.isAvailableInput(0))
  1.1446 +            return offsetNoMatch;
  1.1447 +
  1.1448 +        for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
  1.1449 +            output[i << 1] = offsetNoMatch;
  1.1450 +
  1.1451 +        allocatorPool = pattern->m_allocator->startAllocator();
  1.1452 +        if (!allocatorPool)
  1.1453 +            CRASH();
  1.1454 +
  1.1455 +        DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
  1.1456 +
  1.1457 +        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
  1.1458 +        if (result == JSRegExpMatch) {
  1.1459 +            output[0] = context->matchBegin;
  1.1460 +            output[1] = context->matchEnd;
  1.1461 +        }
  1.1462 +
  1.1463 +        freeDisjunctionContext(context);
  1.1464 +
  1.1465 +        pattern->m_allocator->stopAllocator();
  1.1466 +
  1.1467 +        ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
  1.1468 +        return output[0];
  1.1469 +    }
  1.1470 +
  1.1471 +    Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
  1.1472 +        : cx(cx)
  1.1473 +        , pattern(pattern)
  1.1474 +        , output(output)
  1.1475 +        , input(input, start, length)
  1.1476 +        , allocatorPool(0)
  1.1477 +        , remainingMatchCount(matchLimit)
  1.1478 +    {
  1.1479 +    }
  1.1480 +
  1.1481 +private:
  1.1482 +    JSContext *cx;
  1.1483 +    BytecodePattern* pattern;
  1.1484 +    unsigned* output;
  1.1485 +    InputStream input;
  1.1486 +    BumpPointerPool* allocatorPool;
  1.1487 +    unsigned remainingMatchCount;
  1.1488 +};
  1.1489 +
  1.1490 +
  1.1491 +
  1.1492 +class ByteCompiler {
  1.1493 +    struct ParenthesesStackEntry {
  1.1494 +        unsigned beginTerm;
  1.1495 +        unsigned savedAlternativeIndex;
  1.1496 +		
  1.1497 +        // For js::Vector. Does not create a valid object.
  1.1498 +        ParenthesesStackEntry() { }
  1.1499 +
  1.1500 +        ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
  1.1501 +            : beginTerm(beginTerm)
  1.1502 +            , savedAlternativeIndex(savedAlternativeIndex)
  1.1503 +        {
  1.1504 +        }
  1.1505 +    };
  1.1506 +
  1.1507 +public:
  1.1508 +    ByteCompiler(YarrPattern& pattern)
  1.1509 +        : m_pattern(pattern)
  1.1510 +    {
  1.1511 +        m_currentAlternativeIndex = 0;
  1.1512 +    }
  1.1513 +
  1.1514 +    PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
  1.1515 +    {
  1.1516 +        regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
  1.1517 +        emitDisjunction(m_pattern.m_body);
  1.1518 +        regexEnd();
  1.1519 +
  1.1520 +        return adoptPtr(newOrCrash<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator));
  1.1521 +    }
  1.1522 +
  1.1523 +    void checkInput(unsigned count)
  1.1524 +    {
  1.1525 +        m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
  1.1526 +    }
  1.1527 +
  1.1528 +    void uncheckInput(unsigned count)
  1.1529 +    {
  1.1530 +        m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
  1.1531 +    }
  1.1532 +
  1.1533 +    void assertionBOL(unsigned inputPosition)
  1.1534 +    {
  1.1535 +        m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
  1.1536 +    }
  1.1537 +
  1.1538 +    void assertionEOL(unsigned inputPosition)
  1.1539 +    {
  1.1540 +        m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
  1.1541 +    }
  1.1542 +
  1.1543 +    void assertionWordBoundary(bool invert, unsigned inputPosition)
  1.1544 +    {
  1.1545 +        m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
  1.1546 +    }
  1.1547 +
  1.1548 +    void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1.1549 +    {
  1.1550 +        if (m_pattern.m_ignoreCase) {
  1.1551 +            UChar lo = Unicode::toLower(ch);
  1.1552 +            UChar hi = Unicode::toUpper(ch);
  1.1553 +
  1.1554 +            if (lo != hi) {
  1.1555 +                m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
  1.1556 +                return;
  1.1557 +            }
  1.1558 +        }
  1.1559 +
  1.1560 +        m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
  1.1561 +    }
  1.1562 +
  1.1563 +    void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1.1564 +    {
  1.1565 +        m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
  1.1566 +
  1.1567 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
  1.1568 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
  1.1569 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1.1570 +    }
  1.1571 +
  1.1572 +    void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1.1573 +    {
  1.1574 +        ASSERT(subpatternId);
  1.1575 +
  1.1576 +        m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
  1.1577 +
  1.1578 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
  1.1579 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
  1.1580 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1.1581 +    }
  1.1582 +
  1.1583 +    void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
  1.1584 +    {
  1.1585 +        int beginTerm = m_bodyDisjunction->terms.size();
  1.1586 +
  1.1587 +        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
  1.1588 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1.1589 +        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
  1.1590 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
  1.1591 +
  1.1592 +        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
  1.1593 +        m_currentAlternativeIndex = beginTerm + 1;
  1.1594 +    }
  1.1595 +
  1.1596 +    void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
  1.1597 +    {
  1.1598 +        int beginTerm = m_bodyDisjunction->terms.size();
  1.1599 +
  1.1600 +        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
  1.1601 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1.1602 +        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
  1.1603 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
  1.1604 +
  1.1605 +        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
  1.1606 +        m_currentAlternativeIndex = beginTerm + 1;
  1.1607 +    }
  1.1608 +
  1.1609 +    void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
  1.1610 +    {
  1.1611 +        // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
  1.1612 +        // then fix this up at the end! - simplifying this should make it much clearer.
  1.1613 +        // https://bugs.webkit.org/show_bug.cgi?id=50136
  1.1614 +
  1.1615 +        int beginTerm = m_bodyDisjunction->terms.size();
  1.1616 +
  1.1617 +        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
  1.1618 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1.1619 +        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
  1.1620 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
  1.1621 +
  1.1622 +        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
  1.1623 +        m_currentAlternativeIndex = beginTerm + 1;
  1.1624 +    }
  1.1625 +
  1.1626 +    void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
  1.1627 +    {
  1.1628 +        int beginTerm = m_bodyDisjunction->terms.size();
  1.1629 +
  1.1630 +        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
  1.1631 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
  1.1632 +        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
  1.1633 +        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
  1.1634 +
  1.1635 +        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
  1.1636 +        m_currentAlternativeIndex = beginTerm + 1;
  1.1637 +    }
  1.1638 +
  1.1639 +    void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1.1640 +    {
  1.1641 +        unsigned beginTerm = popParenthesesStack();
  1.1642 +        closeAlternative(beginTerm + 1);
  1.1643 +        unsigned endTerm = m_bodyDisjunction->terms.size();
  1.1644 +
  1.1645 +        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
  1.1646 +
  1.1647 +        bool invert = m_bodyDisjunction->terms[beginTerm].invert();
  1.1648 +        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
  1.1649 +
  1.1650 +        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
  1.1651 +        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1.1652 +        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1.1653 +        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
  1.1654 +
  1.1655 +        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
  1.1656 +        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
  1.1657 +        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
  1.1658 +        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
  1.1659 +    }
  1.1660 +
  1.1661 +    void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
  1.1662 +    {
  1.1663 +        m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
  1.1664 +    }
  1.1665 +
  1.1666 +    unsigned popParenthesesStack()
  1.1667 +    {
  1.1668 +        ASSERT(m_parenthesesStack.size());
  1.1669 +        int stackEnd = m_parenthesesStack.size() - 1;
  1.1670 +        unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
  1.1671 +        m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
  1.1672 +        m_parenthesesStack.shrink(stackEnd);
  1.1673 +
  1.1674 +        ASSERT(beginTerm < m_bodyDisjunction->terms.size());
  1.1675 +        ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
  1.1676 +
  1.1677 +        return beginTerm;
  1.1678 +    }
  1.1679 +
  1.1680 +#ifndef NDEBUG
  1.1681 +    void dumpDisjunction(ByteDisjunction* disjunction)
  1.1682 +    {
  1.1683 +        dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction);
  1.1684 +        for (unsigned i = 0; i < disjunction->terms.size(); ++i)
  1.1685 +            dataLogF("{ %d } ", disjunction->terms[i].type);
  1.1686 +        dataLogF("\n");
  1.1687 +    }
  1.1688 +#endif
  1.1689 +
  1.1690 +    void closeAlternative(int beginTerm)
  1.1691 +    {
  1.1692 +        int origBeginTerm = beginTerm;
  1.1693 +        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
  1.1694 +        int endIndex = m_bodyDisjunction->terms.size();
  1.1695 +
  1.1696 +        unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
  1.1697 +
  1.1698 +        if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
  1.1699 +            m_bodyDisjunction->terms.remove(beginTerm);
  1.1700 +        else {
  1.1701 +            while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
  1.1702 +                beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
  1.1703 +                ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
  1.1704 +                m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
  1.1705 +                m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
  1.1706 +            }
  1.1707 +
  1.1708 +            m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
  1.1709 +
  1.1710 +            m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
  1.1711 +            m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
  1.1712 +        }
  1.1713 +    }
  1.1714 +
  1.1715 +    void closeBodyAlternative()
  1.1716 +    {
  1.1717 +        int beginTerm = 0;
  1.1718 +        int origBeginTerm = 0;
  1.1719 +        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
  1.1720 +        int endIndex = m_bodyDisjunction->terms.size();
  1.1721 +
  1.1722 +        unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
  1.1723 +
  1.1724 +        while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
  1.1725 +            beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
  1.1726 +            ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
  1.1727 +            m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
  1.1728 +            m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
  1.1729 +        }
  1.1730 +
  1.1731 +        m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
  1.1732 +
  1.1733 +        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
  1.1734 +        m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
  1.1735 +    }
  1.1736 +
  1.1737 +    void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
  1.1738 +    {
  1.1739 +        unsigned beginTerm = popParenthesesStack();
  1.1740 +        closeAlternative(beginTerm + 1);
  1.1741 +        unsigned endTerm = m_bodyDisjunction->terms.size();
  1.1742 +
  1.1743 +        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
  1.1744 +
  1.1745 +        ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
  1.1746 +
  1.1747 +        bool capture = parenthesesBegin.capture();
  1.1748 +        unsigned subpatternId = parenthesesBegin.atom.subpatternId;
  1.1749 +
  1.1750 +        unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
  1.1751 +        ByteDisjunction* parenthesesDisjunction = newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize);
  1.1752 +
  1.1753 +        parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1);
  1.1754 +        parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
  1.1755 +        for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
  1.1756 +            parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
  1.1757 +        parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
  1.1758 +
  1.1759 +        m_bodyDisjunction->terms.shrink(beginTerm);
  1.1760 +
  1.1761 +        m_allParenthesesInfo.append(parenthesesDisjunction);
  1.1762 +        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
  1.1763 +
  1.1764 +        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
  1.1765 +        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
  1.1766 +        m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
  1.1767 +    }
  1.1768 +
  1.1769 +    void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1.1770 +    {
  1.1771 +        unsigned beginTerm = popParenthesesStack();
  1.1772 +        closeAlternative(beginTerm + 1);
  1.1773 +        unsigned endTerm = m_bodyDisjunction->terms.size();
  1.1774 +
  1.1775 +        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
  1.1776 +
  1.1777 +        bool capture = m_bodyDisjunction->terms[beginTerm].capture();
  1.1778 +        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
  1.1779 +
  1.1780 +        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
  1.1781 +        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1.1782 +        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1.1783 +        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
  1.1784 +
  1.1785 +        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
  1.1786 +        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
  1.1787 +        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
  1.1788 +        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
  1.1789 +    }
  1.1790 +
  1.1791 +    void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
  1.1792 +    {
  1.1793 +        unsigned beginTerm = popParenthesesStack();
  1.1794 +        closeAlternative(beginTerm + 1);
  1.1795 +        unsigned endTerm = m_bodyDisjunction->terms.size();
  1.1796 +
  1.1797 +        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
  1.1798 +
  1.1799 +        bool capture = m_bodyDisjunction->terms[beginTerm].capture();
  1.1800 +        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
  1.1801 +
  1.1802 +        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
  1.1803 +        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1.1804 +        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
  1.1805 +        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
  1.1806 +
  1.1807 +        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
  1.1808 +        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
  1.1809 +        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
  1.1810 +        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
  1.1811 +    }
  1.1812 +
  1.1813 +    void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
  1.1814 +    {
  1.1815 +        m_bodyDisjunction = adoptPtr(newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize));
  1.1816 +        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
  1.1817 +        m_bodyDisjunction->terms[0].frameLocation = 0;
  1.1818 +        m_currentAlternativeIndex = 0;
  1.1819 +    }
  1.1820 +
  1.1821 +    void regexEnd()
  1.1822 +    {
  1.1823 +        closeBodyAlternative();
  1.1824 +    }
  1.1825 +
  1.1826 +    void alternativeBodyDisjunction(bool onceThrough)
  1.1827 +    {
  1.1828 +        int newAlternativeIndex = m_bodyDisjunction->terms.size();
  1.1829 +        m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
  1.1830 +        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
  1.1831 +
  1.1832 +        m_currentAlternativeIndex = newAlternativeIndex;
  1.1833 +    }
  1.1834 +
  1.1835 +    void alternativeDisjunction()
  1.1836 +    {
  1.1837 +        int newAlternativeIndex = m_bodyDisjunction->terms.size();
  1.1838 +        m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
  1.1839 +        m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
  1.1840 +
  1.1841 +        m_currentAlternativeIndex = newAlternativeIndex;
  1.1842 +    }
  1.1843 +
  1.1844 +    void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
  1.1845 +    {
  1.1846 +        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
  1.1847 +            unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
  1.1848 +
  1.1849 +            PatternAlternative* alternative = disjunction->m_alternatives[alt];
  1.1850 +
  1.1851 +            if (alt) {
  1.1852 +                if (disjunction == m_pattern.m_body)
  1.1853 +                    alternativeBodyDisjunction(alternative->onceThrough());
  1.1854 +                else
  1.1855 +                    alternativeDisjunction();
  1.1856 +            }
  1.1857 +
  1.1858 +            unsigned minimumSize = alternative->m_minimumSize;
  1.1859 +            ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
  1.1860 +            unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
  1.1861 +
  1.1862 +            if (countToCheck) {
  1.1863 +                checkInput(countToCheck);
  1.1864 +                currentCountAlreadyChecked += countToCheck;
  1.1865 +            }
  1.1866 +
  1.1867 +            for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
  1.1868 +                PatternTerm& term = alternative->m_terms[i];
  1.1869 +
  1.1870 +                switch (term.type) {
  1.1871 +                case PatternTerm::TypeAssertionBOL:
  1.1872 +                    assertionBOL(currentCountAlreadyChecked - term.inputPosition);
  1.1873 +                    break;
  1.1874 +
  1.1875 +                case PatternTerm::TypeAssertionEOL:
  1.1876 +                    assertionEOL(currentCountAlreadyChecked - term.inputPosition);
  1.1877 +                    break;
  1.1878 +
  1.1879 +                case PatternTerm::TypeAssertionWordBoundary:
  1.1880 +                    assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
  1.1881 +                    break;
  1.1882 +
  1.1883 +                case PatternTerm::TypePatternCharacter:
  1.1884 +                    atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
  1.1885 +                    break;
  1.1886 +
  1.1887 +                case PatternTerm::TypeCharacterClass:
  1.1888 +                    atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
  1.1889 +                    break;
  1.1890 +
  1.1891 +                case PatternTerm::TypeBackReference:
  1.1892 +                    atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
  1.1893 +                        break;
  1.1894 +
  1.1895 +                case PatternTerm::TypeForwardReference:
  1.1896 +                    break;
  1.1897 +
  1.1898 +                case PatternTerm::TypeParenthesesSubpattern: {
  1.1899 +                    unsigned disjunctionAlreadyCheckedCount = 0;
  1.1900 +                    if (term.quantityCount == 1 && !term.parentheses.isCopy) {
  1.1901 +                        unsigned alternativeFrameLocation = term.frameLocation;
  1.1902 +                        // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
  1.1903 +                        if (term.quantityType == QuantifierFixedCount)
  1.1904 +                            disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
  1.1905 +                        else
  1.1906 +                            alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
  1.1907 +                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
  1.1908 +                        atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
  1.1909 +                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
  1.1910 +                        atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
  1.1911 +                    } else if (term.parentheses.isTerminal) {
  1.1912 +                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
  1.1913 +                        atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
  1.1914 +                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
  1.1915 +                        atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
  1.1916 +                    } else {
  1.1917 +                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
  1.1918 +                        atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
  1.1919 +                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
  1.1920 +                        atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
  1.1921 +                    }
  1.1922 +                    break;
  1.1923 +                }
  1.1924 +
  1.1925 +                case PatternTerm::TypeParentheticalAssertion: {
  1.1926 +                    unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
  1.1927 +
  1.1928 +                    ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
  1.1929 +                    unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
  1.1930 +                    unsigned uncheckAmount = 0;
  1.1931 +                    if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
  1.1932 +                        uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
  1.1933 +                        uncheckInput(uncheckAmount);
  1.1934 +                        currentCountAlreadyChecked -= uncheckAmount;
  1.1935 +                    }
  1.1936 +
  1.1937 +                    atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
  1.1938 +                    emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
  1.1939 +                    atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
  1.1940 +                    if (uncheckAmount) {
  1.1941 +                        checkInput(uncheckAmount);
  1.1942 +                        currentCountAlreadyChecked += uncheckAmount;
  1.1943 +                    }
  1.1944 +                    break;
  1.1945 +                }
  1.1946 +
  1.1947 +                case PatternTerm::TypeDotStarEnclosure:
  1.1948 +                    assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
  1.1949 +                    break;
  1.1950 +                }
  1.1951 +            }
  1.1952 +        }
  1.1953 +    }
  1.1954 +
  1.1955 +private:
  1.1956 +    YarrPattern& m_pattern;
  1.1957 +    OwnPtr<ByteDisjunction> m_bodyDisjunction;
  1.1958 +    unsigned m_currentAlternativeIndex;
  1.1959 +    Vector<ParenthesesStackEntry> m_parenthesesStack;
  1.1960 +    Vector<ByteDisjunction*> m_allParenthesesInfo;
  1.1961 +};
  1.1962 +
  1.1963 +PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
  1.1964 +{
  1.1965 +    return ByteCompiler(pattern).compile(allocator);
  1.1966 +}
  1.1967 +
  1.1968 +unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
  1.1969 +{
  1.1970 +#if YARR_8BIT_CHAR_SUPPORT
  1.1971 +    if (input.is8Bit())
  1.1972 +        return Interpreter<LChar>(cx, bytecode, output, input.characters8(), input.length(), start).interpret();
  1.1973 +#endif
  1.1974 +    return Interpreter<UChar>(cx, bytecode, output, input.chars(), input.length(), start).interpret();
  1.1975 +}
  1.1976 +
  1.1977 +unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
  1.1978 +{
  1.1979 +    return Interpreter<LChar>(cx, bytecode, output, input, length, start).interpret();
  1.1980 +}
  1.1981 +
  1.1982 +unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
  1.1983 +{
  1.1984 +    return Interpreter<UChar>(cx, bytecode, output, input, length, start).interpret();
  1.1985 +}
  1.1986 +
  1.1987 +// These should be the same for both UChar & LChar.
  1.1988 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
  1.1989 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
  1.1990 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
  1.1991 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
  1.1992 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
  1.1993 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
  1.1994 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
  1.1995 +
  1.1996 +
  1.1997 +} }

mercurial