js/src/yarr/YarrPattern.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/yarr/YarrPattern.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,933 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99:
     1.6 + *
     1.7 + * Copyright (C) 2009 Apple Inc. All rights reserved.
     1.8 + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
     1.9 + *
    1.10 + * Redistribution and use in source and binary forms, with or without
    1.11 + * modification, are permitted provided that the following conditions
    1.12 + * are met:
    1.13 + * 1. Redistributions of source code must retain the above copyright
    1.14 + *    notice, this list of conditions and the following disclaimer.
    1.15 + * 2. Redistributions in binary form must reproduce the above copyright
    1.16 + *    notice, this list of conditions and the following disclaimer in the
    1.17 + *    documentation and/or other materials provided with the distribution.
    1.18 + *
    1.19 + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
    1.20 + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    1.22 + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
    1.23 + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    1.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    1.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    1.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
    1.27 + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    1.28 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    1.29 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.30 + */
    1.31 +
    1.32 +#include "yarr/YarrPattern.h"
    1.33 +
    1.34 +#include "yarr/Yarr.h"
    1.35 +#include "yarr/YarrCanonicalizeUCS2.h"
    1.36 +#include "yarr/YarrParser.h"
    1.37 +
    1.38 +using namespace WTF;
    1.39 +
    1.40 +namespace JSC { namespace Yarr {
    1.41 +
    1.42 +#include "yarr/RegExpJitTables.h"
    1.43 +
    1.44 +#if WTF_CPU_SPARC
    1.45 +# define BASE_FRAME_SIZE 24
    1.46 +#else
    1.47 +# define BASE_FRAME_SIZE 0
    1.48 +#endif
    1.49 +
    1.50 +// Thanks, windows.h!
    1.51 +#undef min
    1.52 +#undef max
    1.53 +
    1.54 +class CharacterClassConstructor {
    1.55 +public:
    1.56 +    CharacterClassConstructor(bool isCaseInsensitive = false)
    1.57 +        : m_isCaseInsensitive(isCaseInsensitive)
    1.58 +    {
    1.59 +    }
    1.60 +
    1.61 +    void reset()
    1.62 +    {
    1.63 +        m_matches.clear();
    1.64 +        m_ranges.clear();
    1.65 +        m_matchesUnicode.clear();
    1.66 +        m_rangesUnicode.clear();
    1.67 +    }
    1.68 +
    1.69 +    void append(const CharacterClass* other)
    1.70 +    {
    1.71 +        for (size_t i = 0; i < other->m_matches.size(); ++i)
    1.72 +            addSorted(m_matches, other->m_matches[i]);
    1.73 +        for (size_t i = 0; i < other->m_ranges.size(); ++i)
    1.74 +            addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end);
    1.75 +        for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i)
    1.76 +            addSorted(m_matchesUnicode, other->m_matchesUnicode[i]);
    1.77 +        for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i)
    1.78 +            addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
    1.79 +    }
    1.80 +
    1.81 +    void putChar(UChar ch)
    1.82 +    {
    1.83 +        // Handle ascii cases.
    1.84 +        if (ch <= 0x7f) {
    1.85 +            if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
    1.86 +                addSorted(m_matches, toASCIIUpper(ch));
    1.87 +                addSorted(m_matches, toASCIILower(ch));
    1.88 +            } else
    1.89 +                addSorted(m_matches, ch);
    1.90 +            return;
    1.91 +        }
    1.92 +
    1.93 +        // Simple case, not a case-insensitive match.
    1.94 +        if (!m_isCaseInsensitive) {
    1.95 +            addSorted(m_matchesUnicode, ch);
    1.96 +            return;
    1.97 +        }
    1.98 +
    1.99 +        // Add multiple matches, if necessary.
   1.100 +        const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
   1.101 +        if (info->type == CanonicalizeUnique)
   1.102 +            addSorted(m_matchesUnicode, ch);
   1.103 +        else
   1.104 +            putUnicodeIgnoreCase(ch, info);
   1.105 +    }
   1.106 +
   1.107 +    void putUnicodeIgnoreCase(UChar ch, const UCS2CanonicalizationRange* info)
   1.108 +    {
   1.109 +        ASSERT(m_isCaseInsensitive);
   1.110 +        ASSERT(ch > 0x7f);
   1.111 +        ASSERT(ch >= info->begin && ch <= info->end);
   1.112 +        ASSERT(info->type != CanonicalizeUnique);
   1.113 +        if (info->type == CanonicalizeSet) {
   1.114 +            for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
   1.115 +                addSorted(m_matchesUnicode, ch);
   1.116 +        } else {
   1.117 +            addSorted(m_matchesUnicode, ch);
   1.118 +            addSorted(m_matchesUnicode, getCanonicalPair(info, ch));
   1.119 +        }
   1.120 +    }
   1.121 +
   1.122 +    void putRange(UChar lo, UChar hi)
   1.123 +    {
   1.124 +        if (lo <= 0x7f) {
   1.125 +            char asciiLo = lo;
   1.126 +            char asciiHi = std::min(hi, (UChar)0x7f);
   1.127 +            addSortedRange(m_ranges, lo, asciiHi);
   1.128 +
   1.129 +            if (m_isCaseInsensitive) {
   1.130 +                if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
   1.131 +                    addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
   1.132 +                if ((asciiLo <= 'z') && (asciiHi >= 'a'))
   1.133 +                    addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
   1.134 +            }
   1.135 +        }
   1.136 +        if (hi <= 0x7f)
   1.137 +            return;
   1.138 +
   1.139 +        lo = std::max(lo, (UChar)0x80);
   1.140 +        addSortedRange(m_rangesUnicode, lo, hi);
   1.141 +
   1.142 +        if (!m_isCaseInsensitive)
   1.143 +            return;
   1.144 +
   1.145 +        const UCS2CanonicalizationRange* info = rangeInfoFor(lo);
   1.146 +        while (true) {
   1.147 +            // Handle the range [lo .. end]
   1.148 +            UChar end = std::min<UChar>(info->end, hi);
   1.149 +
   1.150 +            switch (info->type) {
   1.151 +            case CanonicalizeUnique:
   1.152 +                // Nothing to do - no canonical equivalents.
   1.153 +                break;
   1.154 +            case CanonicalizeSet: {
   1.155 +                UChar ch;
   1.156 +                for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
   1.157 +                    addSorted(m_matchesUnicode, ch);
   1.158 +                break;
   1.159 +            }
   1.160 +            case CanonicalizeRangeLo:
   1.161 +                addSortedRange(m_rangesUnicode, lo + info->value, end + info->value);
   1.162 +                break;
   1.163 +            case CanonicalizeRangeHi:
   1.164 +                addSortedRange(m_rangesUnicode, lo - info->value, end - info->value);
   1.165 +                break;
   1.166 +            case CanonicalizeAlternatingAligned:
   1.167 +                // Use addSortedRange since there is likely an abutting range to combine with.
   1.168 +                if (lo & 1)
   1.169 +                    addSortedRange(m_rangesUnicode, lo - 1, lo - 1);
   1.170 +                if (!(end & 1))
   1.171 +                    addSortedRange(m_rangesUnicode, end + 1, end + 1);
   1.172 +                break;
   1.173 +            case CanonicalizeAlternatingUnaligned:
   1.174 +                // Use addSortedRange since there is likely an abutting range to combine with.
   1.175 +                if (!(lo & 1))
   1.176 +                    addSortedRange(m_rangesUnicode, lo - 1, lo - 1);
   1.177 +                if (end & 1)
   1.178 +                    addSortedRange(m_rangesUnicode, end + 1, end + 1);
   1.179 +                break;
   1.180 +            }
   1.181 +
   1.182 +            if (hi == end)
   1.183 +                return;
   1.184 +
   1.185 +            ++info;
   1.186 +            lo = info->begin;
   1.187 +        };
   1.188 +
   1.189 +    }
   1.190 +
   1.191 +    CharacterClass* charClass()
   1.192 +    {
   1.193 +        CharacterClass* characterClass = newOrCrash<CharacterClass>();
   1.194 +
   1.195 +        characterClass->m_matches.swap(m_matches);
   1.196 +        characterClass->m_ranges.swap(m_ranges);
   1.197 +        characterClass->m_matchesUnicode.swap(m_matchesUnicode);
   1.198 +        characterClass->m_rangesUnicode.swap(m_rangesUnicode);
   1.199 +
   1.200 +        return characterClass;
   1.201 +    }
   1.202 +
   1.203 +private:
   1.204 +    void addSorted(Vector<UChar>& matches, UChar ch)
   1.205 +    {
   1.206 +        unsigned pos = 0;
   1.207 +        unsigned range = matches.size();
   1.208 +
   1.209 +        // binary chop, find position to insert char.
   1.210 +        while (range) {
   1.211 +            unsigned index = range >> 1;
   1.212 +
   1.213 +            int val = matches[pos+index] - ch;
   1.214 +            if (!val)
   1.215 +                return;
   1.216 +            else if (val > 0)
   1.217 +                range = index;
   1.218 +            else {
   1.219 +                pos += (index+1);
   1.220 +                range -= (index+1);
   1.221 +            }
   1.222 +        }
   1.223 +
   1.224 +        if (pos == matches.size())
   1.225 +            matches.append(ch);
   1.226 +        else
   1.227 +            matches.insert(pos, ch);
   1.228 +    }
   1.229 +
   1.230 +    void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
   1.231 +    {
   1.232 +        unsigned end = ranges.size();
   1.233 +
   1.234 +        // Simple linear scan - I doubt there are that many ranges anyway...
   1.235 +        // feel free to fix this with something faster (eg binary chop).
   1.236 +        for (unsigned i = 0; i < end; ++i) {
   1.237 +            // does the new range fall before the current position in the array
   1.238 +            if (hi < ranges[i].begin) {
   1.239 +                // optional optimization: concatenate appending ranges? - may not be worthwhile.
   1.240 +                if (hi == (ranges[i].begin - 1)) {
   1.241 +                    ranges[i].begin = lo;
   1.242 +                    return;
   1.243 +                }
   1.244 +                ranges.insert(i, CharacterRange(lo, hi));
   1.245 +                return;
   1.246 +            }
   1.247 +            // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
   1.248 +            // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
   1.249 +            // end of the last range they concatenate, which is just as good.
   1.250 +            if (lo <= (ranges[i].end + 1)) {
   1.251 +                // found an intersect! we'll replace this entry in the array.
   1.252 +                ranges[i].begin = std::min(ranges[i].begin, lo);
   1.253 +                ranges[i].end = std::max(ranges[i].end, hi);
   1.254 +
   1.255 +                // now check if the new range can subsume any subsequent ranges.
   1.256 +                unsigned next = i+1;
   1.257 +                // each iteration of the loop we will either remove something from the list, or break the loop.
   1.258 +                while (next < ranges.size()) {
   1.259 +                    if (ranges[next].begin <= (ranges[i].end + 1)) {
   1.260 +                        // the next entry now overlaps / concatenates this one.
   1.261 +                        ranges[i].end = std::max(ranges[i].end, ranges[next].end);
   1.262 +                        ranges.remove(next);
   1.263 +                    } else
   1.264 +                        break;
   1.265 +                }
   1.266 +
   1.267 +                return;
   1.268 +            }
   1.269 +        }
   1.270 +
   1.271 +        // CharacterRange comes after all existing ranges.
   1.272 +        ranges.append(CharacterRange(lo, hi));
   1.273 +    }
   1.274 +
   1.275 +    bool m_isCaseInsensitive;
   1.276 +
   1.277 +    Vector<UChar> m_matches;
   1.278 +    Vector<CharacterRange> m_ranges;
   1.279 +    Vector<UChar> m_matchesUnicode;
   1.280 +    Vector<CharacterRange> m_rangesUnicode;
   1.281 +};
   1.282 +
   1.283 +class YarrPatternConstructor {
   1.284 +public:
   1.285 +    YarrPatternConstructor(YarrPattern& pattern)
   1.286 +        : m_pattern(pattern)
   1.287 +        , m_stackBase(nullptr)
   1.288 +        , m_characterClassConstructor(pattern.m_ignoreCase)
   1.289 +        , m_invertParentheticalAssertion(false)
   1.290 +    {
   1.291 +        m_pattern.m_body = newOrCrash<PatternDisjunction>();
   1.292 +        m_alternative = m_pattern.m_body->addNewAlternative();
   1.293 +        m_pattern.m_disjunctions.append(m_pattern.m_body);
   1.294 +    }
   1.295 +
   1.296 +    ~YarrPatternConstructor()
   1.297 +    {
   1.298 +    }
   1.299 +
   1.300 +    void reset()
   1.301 +    {
   1.302 +        m_pattern.reset();
   1.303 +        m_characterClassConstructor.reset();
   1.304 +
   1.305 +        m_pattern.m_body = newOrCrash<PatternDisjunction>();
   1.306 +        m_alternative = m_pattern.m_body->addNewAlternative();
   1.307 +        m_pattern.m_disjunctions.append(m_pattern.m_body);
   1.308 +    }
   1.309 +
   1.310 +    void assertionBOL()
   1.311 +    {
   1.312 +        if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
   1.313 +            m_alternative->m_startsWithBOL = true;
   1.314 +            m_alternative->m_containsBOL = true;
   1.315 +            m_pattern.m_containsBOL = true;
   1.316 +        }
   1.317 +        m_alternative->m_terms.append(PatternTerm::BOL());
   1.318 +    }
   1.319 +    void assertionEOL()
   1.320 +    {
   1.321 +        m_alternative->m_terms.append(PatternTerm::EOL());
   1.322 +    }
   1.323 +    void assertionWordBoundary(bool invert)
   1.324 +    {
   1.325 +        m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
   1.326 +    }
   1.327 +
   1.328 +    void atomPatternCharacter(UChar ch)
   1.329 +    {
   1.330 +        // We handle case-insensitive checking of unicode characters which do have both
   1.331 +        // cases by handling them as if they were defined using a CharacterClass.
   1.332 +        if (!m_pattern.m_ignoreCase || isASCII(ch)) {
   1.333 +            m_alternative->m_terms.append(PatternTerm(ch));
   1.334 +            return;
   1.335 +        }
   1.336 +
   1.337 +        const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
   1.338 +        if (info->type == CanonicalizeUnique) {
   1.339 +            m_alternative->m_terms.append(PatternTerm(ch));
   1.340 +            return;
   1.341 +        }
   1.342 +
   1.343 +        m_characterClassConstructor.putUnicodeIgnoreCase(ch, info);
   1.344 +        CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
   1.345 +        m_pattern.m_userCharacterClasses.append(newCharacterClass);
   1.346 +        m_alternative->m_terms.append(PatternTerm(newCharacterClass, false));
   1.347 +    }
   1.348 +
   1.349 +    void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
   1.350 +    {
   1.351 +        switch (classID) {
   1.352 +        case DigitClassID:
   1.353 +            m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
   1.354 +            break;
   1.355 +        case SpaceClassID:
   1.356 +            m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
   1.357 +            break;
   1.358 +        case WordClassID:
   1.359 +            m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
   1.360 +            break;
   1.361 +        case NewlineClassID:
   1.362 +            m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
   1.363 +            break;
   1.364 +        }
   1.365 +    }
   1.366 +
   1.367 +    void atomCharacterClassBegin(bool invert = false)
   1.368 +    {
   1.369 +        m_invertCharacterClass = invert;
   1.370 +    }
   1.371 +
   1.372 +    void atomCharacterClassAtom(UChar ch)
   1.373 +    {
   1.374 +        m_characterClassConstructor.putChar(ch);
   1.375 +    }
   1.376 +
   1.377 +    void atomCharacterClassRange(UChar begin, UChar end)
   1.378 +    {
   1.379 +        m_characterClassConstructor.putRange(begin, end);
   1.380 +    }
   1.381 +
   1.382 +    void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
   1.383 +    {
   1.384 +        ASSERT(classID != NewlineClassID);
   1.385 +
   1.386 +        switch (classID) {
   1.387 +        case DigitClassID:
   1.388 +            m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
   1.389 +            break;
   1.390 +
   1.391 +        case SpaceClassID:
   1.392 +            m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
   1.393 +            break;
   1.394 +
   1.395 +        case WordClassID:
   1.396 +            m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
   1.397 +            break;
   1.398 +
   1.399 +        default:
   1.400 +            ASSERT_NOT_REACHED();
   1.401 +        }
   1.402 +    }
   1.403 +
   1.404 +    void atomCharacterClassEnd()
   1.405 +    {
   1.406 +        CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
   1.407 +        m_pattern.m_userCharacterClasses.append(newCharacterClass);
   1.408 +        m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
   1.409 +    }
   1.410 +
   1.411 +    void atomParenthesesSubpatternBegin(bool capture = true)
   1.412 +    {
   1.413 +        unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
   1.414 +        if (capture)
   1.415 +            m_pattern.m_numSubpatterns++;
   1.416 +
   1.417 +        PatternDisjunction* parenthesesDisjunction = newOrCrash<PatternDisjunction>(m_alternative);
   1.418 +        m_pattern.m_disjunctions.append(parenthesesDisjunction);
   1.419 +        m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false));
   1.420 +        m_alternative = parenthesesDisjunction->addNewAlternative();
   1.421 +    }
   1.422 +
   1.423 +    void atomParentheticalAssertionBegin(bool invert = false)
   1.424 +    {
   1.425 +        PatternDisjunction* parenthesesDisjunction = newOrCrash<PatternDisjunction>(m_alternative);
   1.426 +        m_pattern.m_disjunctions.append(parenthesesDisjunction);
   1.427 +        m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert));
   1.428 +        m_alternative = parenthesesDisjunction->addNewAlternative();
   1.429 +        m_invertParentheticalAssertion = invert;
   1.430 +    }
   1.431 +
   1.432 +    void atomParenthesesEnd()
   1.433 +    {
   1.434 +        ASSERT(m_alternative->m_parent);
   1.435 +        ASSERT(m_alternative->m_parent->m_parent);
   1.436 +
   1.437 +        PatternDisjunction* parenthesesDisjunction = m_alternative->m_parent;
   1.438 +        m_alternative = m_alternative->m_parent->m_parent;
   1.439 +
   1.440 +        PatternTerm& lastTerm = m_alternative->lastTerm();
   1.441 +
   1.442 +        unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size();
   1.443 +        unsigned numBOLAnchoredAlts = 0;
   1.444 +
   1.445 +        for (unsigned i = 0; i < numParenAlternatives; i++) {
   1.446 +            // Bubble up BOL flags
   1.447 +            if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL)
   1.448 +                numBOLAnchoredAlts++;
   1.449 +        }
   1.450 +
   1.451 +        if (numBOLAnchoredAlts) {
   1.452 +            m_alternative->m_containsBOL = true;
   1.453 +            // If all the alternatives in parens start with BOL, then so does this one
   1.454 +            if (numBOLAnchoredAlts == numParenAlternatives)
   1.455 +                m_alternative->m_startsWithBOL = true;
   1.456 +        }
   1.457 +
   1.458 +        lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
   1.459 +        m_invertParentheticalAssertion = false;
   1.460 +    }
   1.461 +
   1.462 +    void atomBackReference(unsigned subpatternId)
   1.463 +    {
   1.464 +        ASSERT(subpatternId);
   1.465 +        m_pattern.m_containsBackreferences = true;
   1.466 +        m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
   1.467 +
   1.468 +        if (subpatternId > m_pattern.m_numSubpatterns) {
   1.469 +            m_alternative->m_terms.append(PatternTerm::ForwardReference());
   1.470 +            return;
   1.471 +        }
   1.472 +
   1.473 +        PatternAlternative* currentAlternative = m_alternative;
   1.474 +        ASSERT(currentAlternative);
   1.475 +
   1.476 +        // Note to self: if we waited until the AST was baked, we could also remove forwards refs
   1.477 +        while ((currentAlternative = currentAlternative->m_parent->m_parent)) {
   1.478 +            PatternTerm& term = currentAlternative->lastTerm();
   1.479 +            ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
   1.480 +
   1.481 +            if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.capture() && (subpatternId == term.parentheses.subpatternId)) {
   1.482 +                m_alternative->m_terms.append(PatternTerm::ForwardReference());
   1.483 +                return;
   1.484 +            }
   1.485 +        }
   1.486 +
   1.487 +        m_alternative->m_terms.append(PatternTerm(subpatternId));
   1.488 +    }
   1.489 +
   1.490 +    // deep copy the argument disjunction.  If filterStartsWithBOL is true,
   1.491 +    // skip alternatives with m_startsWithBOL set true.
   1.492 +    PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
   1.493 +    {
   1.494 +        PatternDisjunction* newDisjunction = 0;
   1.495 +        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
   1.496 +            PatternAlternative* alternative = disjunction->m_alternatives[alt];
   1.497 +            if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
   1.498 +                if (!newDisjunction) {
   1.499 +                    newDisjunction = newOrCrash<PatternDisjunction>();
   1.500 +                    newDisjunction->m_parent = disjunction->m_parent;
   1.501 +                }
   1.502 +                PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
   1.503 +                newAlternative->m_terms.reserve(alternative->m_terms.size());
   1.504 +                for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
   1.505 +                    newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
   1.506 +            }
   1.507 +        }
   1.508 +
   1.509 +        if (newDisjunction)
   1.510 +            m_pattern.m_disjunctions.append(newDisjunction);
   1.511 +        return newDisjunction;
   1.512 +    }
   1.513 +
   1.514 +    PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
   1.515 +    {
   1.516 +        if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
   1.517 +            return PatternTerm(term);
   1.518 +
   1.519 +        PatternTerm termCopy = term;
   1.520 +        termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL);
   1.521 +        return termCopy;
   1.522 +    }
   1.523 +
   1.524 +    void quantifyAtom(unsigned min, unsigned max, bool greedy)
   1.525 +    {
   1.526 +        ASSERT(min <= max);
   1.527 +        ASSERT(m_alternative->m_terms.size());
   1.528 +
   1.529 +        if (!max) {
   1.530 +            m_alternative->removeLastTerm();
   1.531 +            return;
   1.532 +        }
   1.533 +
   1.534 +        PatternTerm& term = m_alternative->lastTerm();
   1.535 +        ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
   1.536 +        ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
   1.537 +
   1.538 +        if (term.type == PatternTerm::TypeParentheticalAssertion) {
   1.539 +            // If an assertion is quantified with a minimum count of zero, it can simply be removed.
   1.540 +            // This arises from the RepeatMatcher behaviour in the spec. Matching an assertion never
   1.541 +            // results in any input being consumed, however the continuation passed to the assertion
   1.542 +            // (called in steps, 8c and 9 of the RepeatMatcher definition, ES5.1 15.10.2.5) will
   1.543 +            // reject all zero length matches (see step 2.1). A match from the continuation of the
   1.544 +            // expression will still be accepted regardless (via steps 8a and 11) - the upshot of all
   1.545 +            // this is that matches from the assertion are not required, and won't be accepted anyway,
   1.546 +            // so no need to ever run it.
   1.547 +            if (!min)
   1.548 +                m_alternative->removeLastTerm();
   1.549 +            // We never need to run an assertion more than once. Subsequent interations will be run
   1.550 +            // with the same start index (since assertions are non-capturing) and the same captures
   1.551 +            // (per step 4 of RepeatMatcher in ES5.1 15.10.2.5), and as such will always produce the
   1.552 +            // same result and captures. If the first match succeeds then the subsequent (min - 1)
   1.553 +            // matches will too. Any additional optional matches will fail (on the same basis as the
   1.554 +            // minimum zero quantified assertions, above), but this will still result in a match.
   1.555 +            return;
   1.556 +        }
   1.557 +
   1.558 +        if (min == 0)
   1.559 +            term.quantify(max, greedy   ? QuantifierGreedy : QuantifierNonGreedy);
   1.560 +        else if (min == max)
   1.561 +            term.quantify(min, QuantifierFixedCount);
   1.562 +        else {
   1.563 +            term.quantify(min, QuantifierFixedCount);
   1.564 +            m_alternative->m_terms.append(copyTerm(term));
   1.565 +            // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
   1.566 +            m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
   1.567 +            if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
   1.568 +                m_alternative->lastTerm().parentheses.isCopy = true;
   1.569 +        }
   1.570 +    }
   1.571 +
   1.572 +    void disjunction()
   1.573 +    {
   1.574 +        m_alternative = m_alternative->m_parent->addNewAlternative();
   1.575 +    }
   1.576 +
   1.577 +    ErrorCode setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition,
   1.578 +                                      unsigned *callFrameSizeOut)
   1.579 +    {
   1.580 +        /*
   1.581 +         * Attempt detection of over-recursion:
   1.582 +         * "1MB should be enough stack for anyone."
   1.583 +         */
   1.584 +        uint8_t stackDummy_;
   1.585 +        if (m_stackBase - &stackDummy_ > 1024*1024)
   1.586 +            return PatternTooLarge;
   1.587 +
   1.588 +        alternative->m_hasFixedSize = true;
   1.589 +        Checked<unsigned> currentInputPosition = initialInputPosition;
   1.590 +
   1.591 +        for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
   1.592 +            PatternTerm& term = alternative->m_terms[i];
   1.593 +
   1.594 +            switch (term.type) {
   1.595 +            case PatternTerm::TypeAssertionBOL:
   1.596 +            case PatternTerm::TypeAssertionEOL:
   1.597 +            case PatternTerm::TypeAssertionWordBoundary:
   1.598 +                if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
   1.599 +                    return RuntimeError;
   1.600 +                break;
   1.601 +
   1.602 +            case PatternTerm::TypeBackReference:
   1.603 +                if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
   1.604 +                    return RuntimeError;
   1.605 +                term.frameLocation = currentCallFrameSize;
   1.606 +                currentCallFrameSize += YarrStackSpaceForBackTrackInfoBackReference;
   1.607 +                alternative->m_hasFixedSize = false;
   1.608 +                break;
   1.609 +
   1.610 +            case PatternTerm::TypeForwardReference:
   1.611 +                break;
   1.612 +
   1.613 +            case PatternTerm::TypePatternCharacter:
   1.614 +                if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
   1.615 +                    return RuntimeError;
   1.616 +                if (term.quantityType != QuantifierFixedCount) {
   1.617 +                    term.frameLocation = currentCallFrameSize;
   1.618 +                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter;
   1.619 +                    alternative->m_hasFixedSize = false;
   1.620 +                } else
   1.621 +                    currentInputPosition += term.quantityCount;
   1.622 +                break;
   1.623 +
   1.624 +            case PatternTerm::TypeCharacterClass:
   1.625 +                if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
   1.626 +                    return RuntimeError;
   1.627 +                if (term.quantityType != QuantifierFixedCount) {
   1.628 +                    term.frameLocation = currentCallFrameSize;
   1.629 +                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass;
   1.630 +                    alternative->m_hasFixedSize = false;
   1.631 +                } else
   1.632 +                    currentInputPosition += term.quantityCount;
   1.633 +                break;
   1.634 +
   1.635 +            case PatternTerm::TypeParenthesesSubpattern:
   1.636 +                // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
   1.637 +                term.frameLocation = currentCallFrameSize;
   1.638 +                unsigned position;
   1.639 +                if (currentInputPosition.safeGet(position))
   1.640 +                    return RuntimeError;
   1.641 +                if (term.quantityCount == 1 && !term.parentheses.isCopy) {
   1.642 +                    if (term.quantityType != QuantifierFixedCount)
   1.643 +                        currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
   1.644 +                    if (ErrorCode error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, position, &currentCallFrameSize))
   1.645 +                        return error;
   1.646 +                    // If quantity is fixed, then pre-check its minimum size.
   1.647 +                    if (term.quantityType == QuantifierFixedCount)
   1.648 +                        currentInputPosition += term.parentheses.disjunction->m_minimumSize;
   1.649 +                    if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
   1.650 +                        return RuntimeError;
   1.651 +                } else if (term.parentheses.isTerminal) {
   1.652 +                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesTerminal;
   1.653 +                    if (ErrorCode error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, position, &currentCallFrameSize))
   1.654 +                        return error;
   1.655 +                    if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
   1.656 +                        return RuntimeError;
   1.657 +                } else {
   1.658 +                    if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
   1.659 +                        return RuntimeError;
   1.660 +                    unsigned dummy;
   1.661 +                    if (ErrorCode error = setupDisjunctionOffsets(term.parentheses.disjunction, BASE_FRAME_SIZE, position, &dummy))
   1.662 +                        return error;
   1.663 +                    currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
   1.664 +                }
   1.665 +                // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
   1.666 +                alternative->m_hasFixedSize = false;
   1.667 +                break;
   1.668 +
   1.669 +            case PatternTerm::TypeParentheticalAssertion:
   1.670 +                if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
   1.671 +                    return RuntimeError;
   1.672 +                term.frameLocation = currentCallFrameSize;
   1.673 +                if (ErrorCode error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, term.inputPosition, &currentCallFrameSize))
   1.674 +                    return error;
   1.675 +                break;
   1.676 +
   1.677 +            case PatternTerm::TypeDotStarEnclosure:
   1.678 +                alternative->m_hasFixedSize = false;
   1.679 +                term.inputPosition = initialInputPosition;
   1.680 +                break;
   1.681 +            }
   1.682 +        }
   1.683 +
   1.684 +        if ((currentInputPosition - initialInputPosition).safeGet(alternative->m_minimumSize))
   1.685 +            return RuntimeError;
   1.686 +        *callFrameSizeOut = currentCallFrameSize;
   1.687 +        return NoError;
   1.688 +    }
   1.689 +
   1.690 +    ErrorCode setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition, unsigned *maximumCallFrameSizeOut)
   1.691 +    {
   1.692 +        if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
   1.693 +            initialCallFrameSize += YarrStackSpaceForBackTrackInfoAlternative;
   1.694 +
   1.695 +        unsigned minimumInputSize = UINT_MAX;
   1.696 +        unsigned maximumCallFrameSize = 0;
   1.697 +        bool hasFixedSize = true;
   1.698 +
   1.699 +        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
   1.700 +            PatternAlternative* alternative = disjunction->m_alternatives[alt];
   1.701 +            unsigned currentAlternativeCallFrameSize;
   1.702 +            if (ErrorCode error = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition, &currentAlternativeCallFrameSize))
   1.703 +                return error;
   1.704 +            minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize);
   1.705 +            maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize);
   1.706 +            hasFixedSize &= alternative->m_hasFixedSize;
   1.707 +        }
   1.708 +
   1.709 +        ASSERT(minimumInputSize != UINT_MAX);
   1.710 +        if (minimumInputSize == UINT_MAX)
   1.711 +            return PatternTooLarge;
   1.712 +
   1.713 +        ASSERT(minimumInputSize != UINT_MAX);
   1.714 +        ASSERT(maximumCallFrameSize >= initialCallFrameSize);
   1.715 +
   1.716 +        disjunction->m_hasFixedSize = hasFixedSize;
   1.717 +        disjunction->m_minimumSize = minimumInputSize;
   1.718 +        disjunction->m_callFrameSize = maximumCallFrameSize;
   1.719 +        *maximumCallFrameSizeOut = maximumCallFrameSize;
   1.720 +        return NoError;
   1.721 +    }
   1.722 +
   1.723 +    ErrorCode setupOffsets()
   1.724 +    {
   1.725 +        unsigned dummy;
   1.726 +        return setupDisjunctionOffsets(m_pattern.m_body, BASE_FRAME_SIZE, 0, &dummy);
   1.727 +    }
   1.728 +
   1.729 +    // This optimization identifies sets of parentheses that we will never need to backtrack.
   1.730 +    // In these cases we do not need to store state from prior iterations.
   1.731 +    // We can presently avoid backtracking for:
   1.732 +    //   * where the parens are at the end of the regular expression (last term in any of the
   1.733 +    //     alternatives of the main body disjunction).
   1.734 +    //   * where the parens are non-capturing, and quantified unbounded greedy (*).
   1.735 +    //   * where the parens do not contain any capturing subpatterns.
   1.736 +    void checkForTerminalParentheses()
   1.737 +    {
   1.738 +        // This check is much too crude; should be just checking whether the candidate
   1.739 +        // node contains nested capturing subpatterns, not the whole expression!
   1.740 +        if (m_pattern.m_numSubpatterns)
   1.741 +            return;
   1.742 +
   1.743 +        Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
   1.744 +        for (size_t i = 0; i < alternatives.size(); ++i) {
   1.745 +            Vector<PatternTerm>& terms = alternatives[i]->m_terms;
   1.746 +            if (terms.size()) {
   1.747 +                PatternTerm& term = terms.last();
   1.748 +                if (term.type == PatternTerm::TypeParenthesesSubpattern
   1.749 +                    && term.quantityType == QuantifierGreedy
   1.750 +                    && term.quantityCount == quantifyInfinite
   1.751 +                    && !term.capture())
   1.752 +                    term.parentheses.isTerminal = true;
   1.753 +            }
   1.754 +        }
   1.755 +    }
   1.756 +
   1.757 +    void optimizeBOL()
   1.758 +    {
   1.759 +        // Look for expressions containing beginning of line (^) anchoring and unroll them.
   1.760 +        // e.g. /^a|^b|c/ becomes /^a|^b|c/ which is executed once followed by /c/ which loops
   1.761 +        // This code relies on the parsing code tagging alternatives with m_containsBOL and
   1.762 +        // m_startsWithBOL and rolling those up to containing alternatives.
   1.763 +        // At this point, this is only valid for non-multiline expressions.
   1.764 +        PatternDisjunction* disjunction = m_pattern.m_body;
   1.765 +
   1.766 +        if (!m_pattern.m_containsBOL || m_pattern.m_multiline)
   1.767 +            return;
   1.768 +
   1.769 +        PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true);
   1.770 +
   1.771 +        // Set alternatives in disjunction to "onceThrough"
   1.772 +        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt)
   1.773 +            disjunction->m_alternatives[alt]->setOnceThrough();
   1.774 +
   1.775 +        if (loopDisjunction) {
   1.776 +            // Move alternatives from loopDisjunction to disjunction
   1.777 +            for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
   1.778 +                disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]);
   1.779 +
   1.780 +            loopDisjunction->m_alternatives.clear();
   1.781 +        }
   1.782 +    }
   1.783 +
   1.784 +    bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex)
   1.785 +    {
   1.786 +        Vector<PatternTerm>& terms = alternative->m_terms;
   1.787 +
   1.788 +        for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) {
   1.789 +            PatternTerm& term = terms[termIndex];
   1.790 +
   1.791 +            if (term.m_capture)
   1.792 +                return true;
   1.793 +
   1.794 +            if (term.type == PatternTerm::TypeParenthesesSubpattern) {
   1.795 +                PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
   1.796 +                for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
   1.797 +                    PatternAlternative *pattern = nestedDisjunction->m_alternatives[alt];
   1.798 +                    if (pattern->m_terms.size() == 0)
   1.799 +                        continue;
   1.800 +                    if (containsCapturingTerms(pattern, 0, pattern->m_terms.size() - 1))
   1.801 +                        return true;
   1.802 +                }
   1.803 +            }
   1.804 +        }
   1.805 +
   1.806 +        return false;
   1.807 +    }
   1.808 +
   1.809 +    // This optimization identifies alternatives in the form of
   1.810 +    // [^].*[?]<expression>.*[$] for expressions that don't have any
   1.811 +    // capturing terms. The alternative is changed to <expression>
   1.812 +    // followed by processing of the dot stars to find and adjust the
   1.813 +    // beginning and the end of the match.
   1.814 +    void optimizeDotStarWrappedExpressions()
   1.815 +    {
   1.816 +        Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
   1.817 +        if (alternatives.size() != 1)
   1.818 +            return;
   1.819 +
   1.820 +        PatternAlternative* alternative = alternatives[0];
   1.821 +        Vector<PatternTerm>& terms = alternative->m_terms;
   1.822 +        if (terms.size() >= 3) {
   1.823 +            bool startsWithBOL = false;
   1.824 +            bool endsWithEOL = false;
   1.825 +            size_t termIndex, firstExpressionTerm, lastExpressionTerm;
   1.826 +
   1.827 +            termIndex = 0;
   1.828 +            if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) {
   1.829 +                startsWithBOL = true;
   1.830 +                ++termIndex;
   1.831 +            }
   1.832 +
   1.833 +            PatternTerm& firstNonAnchorTerm = terms[termIndex];
   1.834 +            if ((firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (firstNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || !((firstNonAnchorTerm.quantityType == QuantifierGreedy) || (firstNonAnchorTerm.quantityType == QuantifierNonGreedy)))
   1.835 +                return;
   1.836 +
   1.837 +            firstExpressionTerm = termIndex + 1;
   1.838 +
   1.839 +            termIndex = terms.size() - 1;
   1.840 +            if (terms[termIndex].type == PatternTerm::TypeAssertionEOL) {
   1.841 +                endsWithEOL = true;
   1.842 +                --termIndex;
   1.843 +            }
   1.844 +
   1.845 +            PatternTerm& lastNonAnchorTerm = terms[termIndex];
   1.846 +            if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy))
   1.847 +                return;
   1.848 +
   1.849 +            lastExpressionTerm = termIndex - 1;
   1.850 +
   1.851 +            if (firstExpressionTerm > lastExpressionTerm)
   1.852 +                return;
   1.853 +
   1.854 +            if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) {
   1.855 +                for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex)
   1.856 +                    terms.remove(termIndex);
   1.857 +
   1.858 +                for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex)
   1.859 +                    terms.remove(termIndex - 1);
   1.860 +
   1.861 +                terms.append(PatternTerm(startsWithBOL, endsWithEOL));
   1.862 +
   1.863 +                m_pattern.m_containsBOL = false;
   1.864 +            }
   1.865 +        }
   1.866 +    }
   1.867 +
   1.868 +    void setStackBase(uint8_t *stackBase) {
   1.869 +        m_stackBase = stackBase;
   1.870 +    }
   1.871 +
   1.872 +private:
   1.873 +    YarrPattern& m_pattern;
   1.874 +    uint8_t * m_stackBase;
   1.875 +    PatternAlternative* m_alternative;
   1.876 +    CharacterClassConstructor m_characterClassConstructor;
   1.877 +    bool m_invertCharacterClass;
   1.878 +    bool m_invertParentheticalAssertion;
   1.879 +};
   1.880 +
   1.881 +ErrorCode YarrPattern::compile(const String& patternString)
   1.882 +{
   1.883 +    YarrPatternConstructor constructor(*this);
   1.884 +
   1.885 +    if (ErrorCode error = parse(constructor, patternString))
   1.886 +        return error;
   1.887 +
   1.888 +    // If the pattern contains illegal backreferences reset & reparse.
   1.889 +    // Quoting Netscape's "What's new in JavaScript 1.2",
   1.890 +    //      "Note: if the number of left parentheses is less than the number specified
   1.891 +    //       in \#, the \# is taken as an octal escape as described in the next row."
   1.892 +    if (containsIllegalBackReference()) {
   1.893 +        unsigned numSubpatterns = m_numSubpatterns;
   1.894 +
   1.895 +        constructor.reset();
   1.896 +#if !ASSERT_DISABLED
   1.897 +        ErrorCode error =
   1.898 +#endif
   1.899 +            parse(constructor, patternString, numSubpatterns);
   1.900 +
   1.901 +        ASSERT(!error);
   1.902 +        ASSERT(numSubpatterns == m_numSubpatterns);
   1.903 +    }
   1.904 +
   1.905 +    uint8_t stackDummy_;
   1.906 +    constructor.setStackBase(&stackDummy_);
   1.907 +
   1.908 +    constructor.checkForTerminalParentheses();
   1.909 +    constructor.optimizeDotStarWrappedExpressions();
   1.910 +    constructor.optimizeBOL();
   1.911 +
   1.912 +    if (ErrorCode error = constructor.setupOffsets())
   1.913 +        return error;
   1.914 +
   1.915 +    return NoError;
   1.916 +}
   1.917 +
   1.918 +YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, ErrorCode* error)
   1.919 +    : m_ignoreCase(ignoreCase)
   1.920 +    , m_multiline(multiline)
   1.921 +    , m_containsBackreferences(false)
   1.922 +    , m_containsBOL(false)
   1.923 +    , m_numSubpatterns(0)
   1.924 +    , m_maxBackReference(0)
   1.925 +    , newlineCached(0)
   1.926 +    , digitsCached(0)
   1.927 +    , spacesCached(0)
   1.928 +    , wordcharCached(0)
   1.929 +    , nondigitsCached(0)
   1.930 +    , nonspacesCached(0)
   1.931 +    , nonwordcharCached(0)
   1.932 +{
   1.933 +    *error = compile(pattern);
   1.934 +}
   1.935 +
   1.936 +} }

mercurial