1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/yarr/YarrInterpreter.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1994 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99: 1.6 + * 1.7 + * Copyright (C) 2009 Apple Inc. All rights reserved. 1.8 + * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged 1.9 + * 1.10 + * Redistribution and use in source and binary forms, with or without 1.11 + * modification, are permitted provided that the following conditions 1.12 + * are met: 1.13 + * 1. Redistributions of source code must retain the above copyright 1.14 + * notice, this list of conditions and the following disclaimer. 1.15 + * 2. Redistributions in binary form must reproduce the above copyright 1.16 + * notice, this list of conditions and the following disclaimer in the 1.17 + * documentation and/or other materials provided with the distribution. 1.18 + * 1.19 + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 1.20 + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 1.22 + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 1.23 + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 1.24 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 1.25 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 1.26 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 1.27 + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 1.28 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.29 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.30 + */ 1.31 + 1.32 +#include "yarr/YarrInterpreter.h" 1.33 + 1.34 +#include "jscntxt.h" 1.35 + 1.36 +#include "yarr/Yarr.h" 1.37 +#include "yarr/YarrCanonicalizeUCS2.h" 1.38 +#include "yarr/BumpPointerAllocator.h" 1.39 + 1.40 +using namespace WTF; 1.41 + 1.42 +namespace JSC { namespace Yarr { 1.43 + 1.44 +template<typename CharType> 1.45 +class Interpreter { 1.46 +public: 1.47 + struct ParenthesesDisjunctionContext; 1.48 + 1.49 + struct BackTrackInfoPatternCharacter { 1.50 + uintptr_t matchAmount; 1.51 + }; 1.52 + struct BackTrackInfoCharacterClass { 1.53 + uintptr_t matchAmount; 1.54 + }; 1.55 + struct BackTrackInfoBackReference { 1.56 + uintptr_t begin; // Not really needed for greedy quantifiers. 1.57 + uintptr_t matchAmount; // Not really needed for fixed quantifiers. 1.58 + }; 1.59 + struct BackTrackInfoAlternative { 1.60 + uintptr_t offset; 1.61 + }; 1.62 + struct BackTrackInfoParentheticalAssertion { 1.63 + uintptr_t begin; 1.64 + }; 1.65 + struct BackTrackInfoParenthesesOnce { 1.66 + uintptr_t begin; 1.67 + }; 1.68 + struct BackTrackInfoParenthesesTerminal { 1.69 + uintptr_t begin; 1.70 + }; 1.71 + struct BackTrackInfoParentheses { 1.72 + uintptr_t matchAmount; 1.73 + ParenthesesDisjunctionContext* lastContext; 1.74 + }; 1.75 + 1.76 + static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context) 1.77 + { 1.78 + context->next = backTrack->lastContext; 1.79 + backTrack->lastContext = context; 1.80 + ++backTrack->matchAmount; 1.81 + } 1.82 + 1.83 + static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack) 1.84 + { 1.85 + ASSERT(backTrack->matchAmount); 1.86 + ASSERT(backTrack->lastContext); 1.87 + backTrack->lastContext = backTrack->lastContext->next; 1.88 + --backTrack->matchAmount; 1.89 + } 1.90 + 1.91 + struct DisjunctionContext 1.92 + { 1.93 + DisjunctionContext() 1.94 + : term(0) 1.95 + { 1.96 + } 1.97 + 1.98 + void* operator new(size_t, void* where) 1.99 + { 1.100 + return where; 1.101 + } 1.102 + 1.103 + int term; 1.104 + unsigned matchBegin; 1.105 + unsigned matchEnd; 1.106 + uintptr_t frame[1]; 1.107 + }; 1.108 + 1.109 + DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction) 1.110 + { 1.111 + size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); 1.112 + allocatorPool = allocatorPool->ensureCapacity(size); 1.113 + if (!allocatorPool) 1.114 + CRASH(); 1.115 + return new (allocatorPool->alloc(size)) DisjunctionContext(); 1.116 + } 1.117 + 1.118 + void freeDisjunctionContext(DisjunctionContext* context) 1.119 + { 1.120 + allocatorPool = allocatorPool->dealloc(context); 1.121 + } 1.122 + 1.123 + struct ParenthesesDisjunctionContext 1.124 + { 1.125 + ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term) 1.126 + : next(0) 1.127 + { 1.128 + unsigned firstSubpatternId = term.atom.subpatternId; 1.129 + unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns; 1.130 + 1.131 + for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) { 1.132 + subpatternBackup[i] = output[(firstSubpatternId << 1) + i]; 1.133 + output[(firstSubpatternId << 1) + i] = offsetNoMatch; 1.134 + } 1.135 + 1.136 + new (getDisjunctionContext(term)) DisjunctionContext(); 1.137 + } 1.138 + 1.139 + void* operator new(size_t, void* where) 1.140 + { 1.141 + return where; 1.142 + } 1.143 + 1.144 + void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns) 1.145 + { 1.146 + for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) 1.147 + output[(firstSubpatternId << 1) + i] = subpatternBackup[i]; 1.148 + } 1.149 + 1.150 + DisjunctionContext* getDisjunctionContext(ByteTerm& term) 1.151 + { 1.152 + return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1])); 1.153 + } 1.154 + 1.155 + ParenthesesDisjunctionContext* next; 1.156 + unsigned subpatternBackup[1]; 1.157 + }; 1.158 + 1.159 + ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) 1.160 + { 1.161 + size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); 1.162 + size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext)); 1.163 + allocatorPool = allocatorPool->ensureCapacity(size); 1.164 + if (!allocatorPool) 1.165 + CRASH(); 1.166 + return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); 1.167 + } 1.168 + 1.169 + void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context) 1.170 + { 1.171 + allocatorPool = allocatorPool->dealloc(context); 1.172 + } 1.173 + 1.174 + class InputStream { 1.175 + public: 1.176 + InputStream(const CharType* input, unsigned start, unsigned length) 1.177 + : input(input) 1.178 + , pos(start) 1.179 + , length(length) 1.180 + { 1.181 + } 1.182 + 1.183 + void next() 1.184 + { 1.185 + ++pos; 1.186 + } 1.187 + 1.188 + void rewind(unsigned amount) 1.189 + { 1.190 + ASSERT(pos >= amount); 1.191 + pos -= amount; 1.192 + } 1.193 + 1.194 + int read() 1.195 + { 1.196 + ASSERT(pos < length); 1.197 + if (pos < length) 1.198 + return input[pos]; 1.199 + return -1; 1.200 + } 1.201 + 1.202 + int readPair() 1.203 + { 1.204 + ASSERT(pos + 1 < length); 1.205 + return input[pos] | input[pos + 1] << 16; 1.206 + } 1.207 + 1.208 + int readChecked(unsigned negativePositionOffest) 1.209 + { 1.210 + if (pos < negativePositionOffest) 1.211 + CRASH(); 1.212 + unsigned p = pos - negativePositionOffest; 1.213 + ASSERT(p < length); 1.214 + return input[p]; 1.215 + } 1.216 + 1.217 + int reread(unsigned from) 1.218 + { 1.219 + ASSERT(from < length); 1.220 + return input[from]; 1.221 + } 1.222 + 1.223 + int prev() 1.224 + { 1.225 + ASSERT(!(pos > length)); 1.226 + if (pos && length) 1.227 + return input[pos - 1]; 1.228 + return -1; 1.229 + } 1.230 + 1.231 + unsigned getPos() 1.232 + { 1.233 + return pos; 1.234 + } 1.235 + 1.236 + void setPos(unsigned p) 1.237 + { 1.238 + pos = p; 1.239 + } 1.240 + 1.241 + bool atStart() 1.242 + { 1.243 + return pos == 0; 1.244 + } 1.245 + 1.246 + bool atEnd() 1.247 + { 1.248 + return pos == length; 1.249 + } 1.250 + 1.251 + unsigned end() 1.252 + { 1.253 + return length; 1.254 + } 1.255 + 1.256 + bool checkInput(unsigned count) 1.257 + { 1.258 + if (((pos + count) <= length) && ((pos + count) >= pos)) { 1.259 + pos += count; 1.260 + return true; 1.261 + } 1.262 + return false; 1.263 + } 1.264 + 1.265 + void uncheckInput(unsigned count) 1.266 + { 1.267 + if (pos < count) 1.268 + CRASH(); 1.269 + pos -= count; 1.270 + } 1.271 + 1.272 + bool atStart(unsigned negativePositionOffest) 1.273 + { 1.274 + return pos == negativePositionOffest; 1.275 + } 1.276 + 1.277 + bool atEnd(unsigned negativePositionOffest) 1.278 + { 1.279 + if (pos < negativePositionOffest) 1.280 + CRASH(); 1.281 + return (pos - negativePositionOffest) == length; 1.282 + } 1.283 + 1.284 + bool isAvailableInput(unsigned offset) 1.285 + { 1.286 + return (((pos + offset) <= length) && ((pos + offset) >= pos)); 1.287 + } 1.288 + 1.289 + private: 1.290 + const CharType* input; 1.291 + unsigned pos; 1.292 + unsigned length; 1.293 + }; 1.294 + 1.295 + bool testCharacterClass(CharacterClass* characterClass, int ch) 1.296 + { 1.297 + if (ch & 0xFF80) { 1.298 + for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) 1.299 + if (ch == characterClass->m_matchesUnicode[i]) 1.300 + return true; 1.301 + for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i) 1.302 + if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end)) 1.303 + return true; 1.304 + } else { 1.305 + for (unsigned i = 0; i < characterClass->m_matches.size(); ++i) 1.306 + if (ch == characterClass->m_matches[i]) 1.307 + return true; 1.308 + for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i) 1.309 + if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end)) 1.310 + return true; 1.311 + } 1.312 + 1.313 + return false; 1.314 + } 1.315 + 1.316 + bool checkCharacter(int testChar, unsigned negativeInputOffset) 1.317 + { 1.318 + return testChar == input.readChecked(negativeInputOffset); 1.319 + } 1.320 + 1.321 + bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset) 1.322 + { 1.323 + int ch = input.readChecked(negativeInputOffset); 1.324 + return (loChar == ch) || (hiChar == ch); 1.325 + } 1.326 + 1.327 + bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset) 1.328 + { 1.329 + bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset)); 1.330 + return invert ? !match : match; 1.331 + } 1.332 + 1.333 + bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset) 1.334 + { 1.335 + unsigned matchSize = (unsigned)(matchEnd - matchBegin); 1.336 + 1.337 + if (!input.checkInput(matchSize)) 1.338 + return false; 1.339 + 1.340 + if (pattern->m_ignoreCase) { 1.341 + for (unsigned i = 0; i < matchSize; ++i) { 1.342 + int oldCh = input.reread(matchBegin + i); 1.343 + int ch = input.readChecked(negativeInputOffset + matchSize - i); 1.344 + 1.345 + if (oldCh == ch) 1.346 + continue; 1.347 + 1.348 + // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that 1.349 + // unicode values are never allowed to match against ascii ones. 1.350 + if (isASCII(oldCh) || isASCII(ch)) { 1.351 + if (toASCIIUpper(oldCh) == toASCIIUpper(ch)) 1.352 + continue; 1.353 + } else if (areCanonicallyEquivalent(oldCh, ch)) 1.354 + continue; 1.355 + 1.356 + input.uncheckInput(matchSize); 1.357 + return false; 1.358 + } 1.359 + } else { 1.360 + for (unsigned i = 0; i < matchSize; ++i) { 1.361 + if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) { 1.362 + input.uncheckInput(matchSize); 1.363 + return false; 1.364 + } 1.365 + } 1.366 + } 1.367 + 1.368 + return true; 1.369 + } 1.370 + 1.371 + bool matchAssertionBOL(ByteTerm& term) 1.372 + { 1.373 + return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); 1.374 + } 1.375 + 1.376 + bool matchAssertionEOL(ByteTerm& term) 1.377 + { 1.378 + if (term.inputPosition) 1.379 + return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); 1.380 + 1.381 + return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read())); 1.382 + } 1.383 + 1.384 + bool matchAssertionWordBoundary(ByteTerm& term) 1.385 + { 1.386 + bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1)); 1.387 + bool readIsWordchar; 1.388 + if (term.inputPosition) 1.389 + readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition)); 1.390 + else 1.391 + readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read()); 1.392 + 1.393 + bool wordBoundary = prevIsWordchar != readIsWordchar; 1.394 + return term.invert() ? !wordBoundary : wordBoundary; 1.395 + } 1.396 + 1.397 + bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context) 1.398 + { 1.399 + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); 1.400 + 1.401 + switch (term.atom.quantityType) { 1.402 + case QuantifierFixedCount: 1.403 + break; 1.404 + 1.405 + case QuantifierGreedy: 1.406 + if (backTrack->matchAmount) { 1.407 + --backTrack->matchAmount; 1.408 + input.uncheckInput(1); 1.409 + return true; 1.410 + } 1.411 + break; 1.412 + 1.413 + case QuantifierNonGreedy: 1.414 + if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { 1.415 + ++backTrack->matchAmount; 1.416 + if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1)) 1.417 + return true; 1.418 + } 1.419 + input.uncheckInput(backTrack->matchAmount); 1.420 + break; 1.421 + } 1.422 + 1.423 + return false; 1.424 + } 1.425 + 1.426 + bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context) 1.427 + { 1.428 + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); 1.429 + 1.430 + switch (term.atom.quantityType) { 1.431 + case QuantifierFixedCount: 1.432 + break; 1.433 + 1.434 + case QuantifierGreedy: 1.435 + if (backTrack->matchAmount) { 1.436 + --backTrack->matchAmount; 1.437 + input.uncheckInput(1); 1.438 + return true; 1.439 + } 1.440 + break; 1.441 + 1.442 + case QuantifierNonGreedy: 1.443 + if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { 1.444 + ++backTrack->matchAmount; 1.445 + if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1)) 1.446 + return true; 1.447 + } 1.448 + input.uncheckInput(backTrack->matchAmount); 1.449 + break; 1.450 + } 1.451 + 1.452 + return false; 1.453 + } 1.454 + 1.455 + bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) 1.456 + { 1.457 + ASSERT(term.type == ByteTerm::TypeCharacterClass); 1.458 + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); 1.459 + 1.460 + switch (term.atom.quantityType) { 1.461 + case QuantifierFixedCount: { 1.462 + for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { 1.463 + if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) 1.464 + return false; 1.465 + } 1.466 + return true; 1.467 + } 1.468 + 1.469 + case QuantifierGreedy: { 1.470 + unsigned matchAmount = 0; 1.471 + while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { 1.472 + if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { 1.473 + input.uncheckInput(1); 1.474 + break; 1.475 + } 1.476 + ++matchAmount; 1.477 + } 1.478 + backTrack->matchAmount = matchAmount; 1.479 + 1.480 + return true; 1.481 + } 1.482 + 1.483 + case QuantifierNonGreedy: 1.484 + backTrack->matchAmount = 0; 1.485 + return true; 1.486 + } 1.487 + 1.488 + ASSERT_NOT_REACHED(); 1.489 + return false; 1.490 + } 1.491 + 1.492 + bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) 1.493 + { 1.494 + ASSERT(term.type == ByteTerm::TypeCharacterClass); 1.495 + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); 1.496 + 1.497 + switch (term.atom.quantityType) { 1.498 + case QuantifierFixedCount: 1.499 + break; 1.500 + 1.501 + case QuantifierGreedy: 1.502 + if (backTrack->matchAmount) { 1.503 + --backTrack->matchAmount; 1.504 + input.uncheckInput(1); 1.505 + return true; 1.506 + } 1.507 + break; 1.508 + 1.509 + case QuantifierNonGreedy: 1.510 + if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { 1.511 + ++backTrack->matchAmount; 1.512 + if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) 1.513 + return true; 1.514 + } 1.515 + input.uncheckInput(backTrack->matchAmount); 1.516 + break; 1.517 + } 1.518 + 1.519 + return false; 1.520 + } 1.521 + 1.522 + bool matchBackReference(ByteTerm& term, DisjunctionContext* context) 1.523 + { 1.524 + ASSERT(term.type == ByteTerm::TypeBackReference); 1.525 + BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); 1.526 + 1.527 + unsigned matchBegin = output[(term.atom.subpatternId << 1)]; 1.528 + unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1]; 1.529 + 1.530 + // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that. 1.531 + // In this case the result of match is empty string like when it references to a parentheses with zero-width match. 1.532 + // Eg.: /(a\1)/ 1.533 + if (matchEnd == offsetNoMatch) 1.534 + return true; 1.535 + 1.536 + if (matchBegin == offsetNoMatch) 1.537 + return true; 1.538 + 1.539 + ASSERT(matchBegin <= matchEnd); 1.540 + 1.541 + if (matchBegin == matchEnd) 1.542 + return true; 1.543 + 1.544 + switch (term.atom.quantityType) { 1.545 + case QuantifierFixedCount: { 1.546 + backTrack->begin = input.getPos(); 1.547 + for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { 1.548 + if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { 1.549 + input.setPos(backTrack->begin); 1.550 + return false; 1.551 + } 1.552 + } 1.553 + return true; 1.554 + } 1.555 + 1.556 + case QuantifierGreedy: { 1.557 + unsigned matchAmount = 0; 1.558 + while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) 1.559 + ++matchAmount; 1.560 + backTrack->matchAmount = matchAmount; 1.561 + return true; 1.562 + } 1.563 + 1.564 + case QuantifierNonGreedy: 1.565 + backTrack->begin = input.getPos(); 1.566 + backTrack->matchAmount = 0; 1.567 + return true; 1.568 + } 1.569 + 1.570 + ASSERT_NOT_REACHED(); 1.571 + return false; 1.572 + } 1.573 + 1.574 + bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context) 1.575 + { 1.576 + ASSERT(term.type == ByteTerm::TypeBackReference); 1.577 + BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); 1.578 + 1.579 + unsigned matchBegin = output[(term.atom.subpatternId << 1)]; 1.580 + unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1]; 1.581 + 1.582 + if (matchBegin == offsetNoMatch) 1.583 + return false; 1.584 + 1.585 + ASSERT(matchBegin <= matchEnd); 1.586 + 1.587 + if (matchBegin == matchEnd) 1.588 + return false; 1.589 + 1.590 + switch (term.atom.quantityType) { 1.591 + case QuantifierFixedCount: 1.592 + // for quantityCount == 1, could rewind. 1.593 + input.setPos(backTrack->begin); 1.594 + break; 1.595 + 1.596 + case QuantifierGreedy: 1.597 + if (backTrack->matchAmount) { 1.598 + --backTrack->matchAmount; 1.599 + input.rewind(matchEnd - matchBegin); 1.600 + return true; 1.601 + } 1.602 + break; 1.603 + 1.604 + case QuantifierNonGreedy: 1.605 + if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { 1.606 + ++backTrack->matchAmount; 1.607 + return true; 1.608 + } 1.609 + input.setPos(backTrack->begin); 1.610 + break; 1.611 + } 1.612 + 1.613 + return false; 1.614 + } 1.615 + 1.616 + void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context) 1.617 + { 1.618 + if (term.capture()) { 1.619 + unsigned subpatternId = term.atom.subpatternId; 1.620 + output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; 1.621 + output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; 1.622 + } 1.623 + } 1.624 + void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) 1.625 + { 1.626 + unsigned firstSubpatternId = term.atom.subpatternId; 1.627 + unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; 1.628 + context->restoreOutput(output, firstSubpatternId, count); 1.629 + } 1.630 + JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) 1.631 + { 1.632 + while (backTrack->matchAmount) { 1.633 + ParenthesesDisjunctionContext* context = backTrack->lastContext; 1.634 + 1.635 + JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true); 1.636 + if (result == JSRegExpMatch) 1.637 + return JSRegExpMatch; 1.638 + 1.639 + resetMatches(term, context); 1.640 + popParenthesesDisjunctionContext(backTrack); 1.641 + freeParenthesesDisjunctionContext(context); 1.642 + 1.643 + if (result != JSRegExpNoMatch) 1.644 + return result; 1.645 + } 1.646 + 1.647 + return JSRegExpNoMatch; 1.648 + } 1.649 + 1.650 + bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) 1.651 + { 1.652 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); 1.653 + ASSERT(term.atom.quantityCount == 1); 1.654 + 1.655 + BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); 1.656 + 1.657 + switch (term.atom.quantityType) { 1.658 + case QuantifierGreedy: { 1.659 + // set this speculatively; if we get to the parens end this will be true. 1.660 + backTrack->begin = input.getPos(); 1.661 + break; 1.662 + } 1.663 + case QuantifierNonGreedy: { 1.664 + backTrack->begin = notFound; 1.665 + context->term += term.atom.parenthesesWidth; 1.666 + return true; 1.667 + } 1.668 + case QuantifierFixedCount: 1.669 + break; 1.670 + } 1.671 + 1.672 + if (term.capture()) { 1.673 + unsigned subpatternId = term.atom.subpatternId; 1.674 + output[(subpatternId << 1)] = input.getPos() - term.inputPosition; 1.675 + } 1.676 + 1.677 + return true; 1.678 + } 1.679 + 1.680 + bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) 1.681 + { 1.682 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); 1.683 + ASSERT(term.atom.quantityCount == 1); 1.684 + 1.685 + if (term.capture()) { 1.686 + unsigned subpatternId = term.atom.subpatternId; 1.687 + output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; 1.688 + } 1.689 + 1.690 + if (term.atom.quantityType == QuantifierFixedCount) 1.691 + return true; 1.692 + 1.693 + BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); 1.694 + return backTrack->begin != input.getPos(); 1.695 + } 1.696 + 1.697 + bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) 1.698 + { 1.699 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); 1.700 + ASSERT(term.atom.quantityCount == 1); 1.701 + 1.702 + BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); 1.703 + 1.704 + if (term.capture()) { 1.705 + unsigned subpatternId = term.atom.subpatternId; 1.706 + output[(subpatternId << 1)] = offsetNoMatch; 1.707 + output[(subpatternId << 1) + 1] = offsetNoMatch; 1.708 + } 1.709 + 1.710 + switch (term.atom.quantityType) { 1.711 + case QuantifierGreedy: 1.712 + // if we backtrack to this point, there is another chance - try matching nothing. 1.713 + ASSERT(backTrack->begin != notFound); 1.714 + backTrack->begin = notFound; 1.715 + context->term += term.atom.parenthesesWidth; 1.716 + return true; 1.717 + case QuantifierNonGreedy: 1.718 + ASSERT(backTrack->begin != notFound); 1.719 + case QuantifierFixedCount: 1.720 + break; 1.721 + } 1.722 + 1.723 + return false; 1.724 + } 1.725 + 1.726 + bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) 1.727 + { 1.728 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); 1.729 + ASSERT(term.atom.quantityCount == 1); 1.730 + 1.731 + BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); 1.732 + 1.733 + switch (term.atom.quantityType) { 1.734 + case QuantifierGreedy: 1.735 + if (backTrack->begin == notFound) { 1.736 + context->term -= term.atom.parenthesesWidth; 1.737 + return false; 1.738 + } 1.739 + case QuantifierNonGreedy: 1.740 + if (backTrack->begin == notFound) { 1.741 + backTrack->begin = input.getPos(); 1.742 + if (term.capture()) { 1.743 + // Technically this access to inputPosition should be accessing the begin term's 1.744 + // inputPosition, but for repeats other than fixed these values should be 1.745 + // the same anyway! (We don't pre-check for greedy or non-greedy matches.) 1.746 + ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin); 1.747 + 1.748 + // Disabled, see bug 808478 1.749 +#if 0 1.750 + ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition); 1.751 +#endif 1.752 + unsigned subpatternId = term.atom.subpatternId; 1.753 + output[subpatternId << 1] = input.getPos() + term.inputPosition; 1.754 + } 1.755 + context->term -= term.atom.parenthesesWidth; 1.756 + return true; 1.757 + } 1.758 + case QuantifierFixedCount: 1.759 + break; 1.760 + } 1.761 + 1.762 + return false; 1.763 + } 1.764 + 1.765 + bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) 1.766 + { 1.767 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); 1.768 + ASSERT(term.atom.quantityType == QuantifierGreedy); 1.769 + ASSERT(term.atom.quantityCount == quantifyInfinite); 1.770 + ASSERT(!term.capture()); 1.771 + 1.772 + BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); 1.773 + backTrack->begin = input.getPos(); 1.774 + return true; 1.775 + } 1.776 + 1.777 + bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context) 1.778 + { 1.779 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd); 1.780 + 1.781 + BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); 1.782 + // Empty match is a failed match. 1.783 + if (backTrack->begin == input.getPos()) 1.784 + return false; 1.785 + 1.786 + // Successful match! Okay, what's next? - loop around and try to match moar! 1.787 + context->term -= (term.atom.parenthesesWidth + 1); 1.788 + return true; 1.789 + } 1.790 + 1.791 + bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) 1.792 + { 1.793 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); 1.794 + ASSERT(term.atom.quantityType == QuantifierGreedy); 1.795 + ASSERT(term.atom.quantityCount == quantifyInfinite); 1.796 + ASSERT(!term.capture()); 1.797 + 1.798 + // If we backtrack to this point, we have failed to match this iteration of the parens. 1.799 + // Since this is greedy / zero minimum a failed is also accepted as a match! 1.800 + context->term += term.atom.parenthesesWidth; 1.801 + return true; 1.802 + } 1.803 + 1.804 + bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*) 1.805 + { 1.806 + // 'Terminal' parentheses are at the end of the regex, and as such a match past end 1.807 + // should always be returned as a successful match - we should never backtrack to here. 1.808 + ASSERT_NOT_REACHED(); 1.809 + return false; 1.810 + } 1.811 + 1.812 + bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) 1.813 + { 1.814 + ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); 1.815 + ASSERT(term.atom.quantityCount == 1); 1.816 + 1.817 + BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); 1.818 + 1.819 + backTrack->begin = input.getPos(); 1.820 + return true; 1.821 + } 1.822 + 1.823 + bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) 1.824 + { 1.825 + ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); 1.826 + ASSERT(term.atom.quantityCount == 1); 1.827 + 1.828 + BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); 1.829 + 1.830 + input.setPos(backTrack->begin); 1.831 + 1.832 + // We've reached the end of the parens; if they are inverted, this is failure. 1.833 + if (term.invert()) { 1.834 + context->term -= term.atom.parenthesesWidth; 1.835 + return false; 1.836 + } 1.837 + 1.838 + return true; 1.839 + } 1.840 + 1.841 + bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) 1.842 + { 1.843 + ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); 1.844 + ASSERT(term.atom.quantityCount == 1); 1.845 + 1.846 + // We've failed to match parens; if they are inverted, this is win! 1.847 + if (term.invert()) { 1.848 + context->term += term.atom.parenthesesWidth; 1.849 + return true; 1.850 + } 1.851 + 1.852 + return false; 1.853 + } 1.854 + 1.855 + bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) 1.856 + { 1.857 + ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); 1.858 + ASSERT(term.atom.quantityCount == 1); 1.859 + 1.860 + BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); 1.861 + 1.862 + input.setPos(backTrack->begin); 1.863 + 1.864 + context->term -= term.atom.parenthesesWidth; 1.865 + return false; 1.866 + } 1.867 + 1.868 + JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context) 1.869 + { 1.870 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); 1.871 + 1.872 + BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); 1.873 + ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; 1.874 + 1.875 + backTrack->matchAmount = 0; 1.876 + backTrack->lastContext = 0; 1.877 + 1.878 + switch (term.atom.quantityType) { 1.879 + case QuantifierFixedCount: { 1.880 + // While we haven't yet reached our fixed limit, 1.881 + while (backTrack->matchAmount < term.atom.quantityCount) { 1.882 + // Try to do a match, and it it succeeds, add it to the list. 1.883 + ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); 1.884 + JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); 1.885 + if (result == JSRegExpMatch) 1.886 + appendParenthesesDisjunctionContext(backTrack, context); 1.887 + else { 1.888 + // The match failed; try to find an alternate point to carry on from. 1.889 + resetMatches(term, context); 1.890 + freeParenthesesDisjunctionContext(context); 1.891 + 1.892 + if (result != JSRegExpNoMatch) 1.893 + return result; 1.894 + JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); 1.895 + if (backtrackResult != JSRegExpMatch) 1.896 + return backtrackResult; 1.897 + } 1.898 + } 1.899 + 1.900 + ASSERT(backTrack->matchAmount == term.atom.quantityCount); 1.901 + ParenthesesDisjunctionContext* context = backTrack->lastContext; 1.902 + recordParenthesesMatch(term, context); 1.903 + return JSRegExpMatch; 1.904 + } 1.905 + 1.906 + case QuantifierGreedy: { 1.907 + while (backTrack->matchAmount < term.atom.quantityCount) { 1.908 + ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); 1.909 + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); 1.910 + if (result == JSRegExpMatch) 1.911 + appendParenthesesDisjunctionContext(backTrack, context); 1.912 + else { 1.913 + resetMatches(term, context); 1.914 + freeParenthesesDisjunctionContext(context); 1.915 + 1.916 + if (result != JSRegExpNoMatch) 1.917 + return result; 1.918 + 1.919 + break; 1.920 + } 1.921 + } 1.922 + 1.923 + if (backTrack->matchAmount) { 1.924 + ParenthesesDisjunctionContext* context = backTrack->lastContext; 1.925 + recordParenthesesMatch(term, context); 1.926 + } 1.927 + return JSRegExpMatch; 1.928 + } 1.929 + 1.930 + case QuantifierNonGreedy: 1.931 + return JSRegExpMatch; 1.932 + } 1.933 + 1.934 + ASSERT_NOT_REACHED(); 1.935 + return JSRegExpErrorNoMatch; 1.936 + } 1.937 + 1.938 + // Rules for backtracking differ depending on whether this is greedy or non-greedy. 1.939 + // 1.940 + // Greedy matches never should try just adding more - you should already have done 1.941 + // the 'more' cases. Always backtrack, at least a leetle bit. However cases where 1.942 + // you backtrack an item off the list needs checking, since we'll never have matched 1.943 + // the one less case. Tracking forwards, still add as much as possible. 1.944 + // 1.945 + // Non-greedy, we've already done the one less case, so don't match on popping. 1.946 + // We haven't done the one more case, so always try to add that. 1.947 + // 1.948 + JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context) 1.949 + { 1.950 + ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); 1.951 + 1.952 + BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); 1.953 + ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; 1.954 + 1.955 + switch (term.atom.quantityType) { 1.956 + case QuantifierFixedCount: { 1.957 + ASSERT(backTrack->matchAmount == term.atom.quantityCount); 1.958 + 1.959 + ParenthesesDisjunctionContext* context = 0; 1.960 + JSRegExpResult result = parenthesesDoBacktrack(term, backTrack); 1.961 + 1.962 + if (result != JSRegExpMatch) 1.963 + return result; 1.964 + 1.965 + // While we haven't yet reached our fixed limit, 1.966 + while (backTrack->matchAmount < term.atom.quantityCount) { 1.967 + // Try to do a match, and it it succeeds, add it to the list. 1.968 + context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); 1.969 + result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); 1.970 + 1.971 + if (result == JSRegExpMatch) 1.972 + appendParenthesesDisjunctionContext(backTrack, context); 1.973 + else { 1.974 + // The match failed; try to find an alternate point to carry on from. 1.975 + resetMatches(term, context); 1.976 + freeParenthesesDisjunctionContext(context); 1.977 + 1.978 + if (result != JSRegExpNoMatch) 1.979 + return result; 1.980 + JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); 1.981 + if (backtrackResult != JSRegExpMatch) 1.982 + return backtrackResult; 1.983 + } 1.984 + } 1.985 + 1.986 + ASSERT(backTrack->matchAmount == term.atom.quantityCount); 1.987 + context = backTrack->lastContext; 1.988 + recordParenthesesMatch(term, context); 1.989 + return JSRegExpMatch; 1.990 + } 1.991 + 1.992 + case QuantifierGreedy: { 1.993 + if (!backTrack->matchAmount) 1.994 + return JSRegExpNoMatch; 1.995 + 1.996 + ParenthesesDisjunctionContext* context = backTrack->lastContext; 1.997 + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); 1.998 + if (result == JSRegExpMatch) { 1.999 + while (backTrack->matchAmount < term.atom.quantityCount) { 1.1000 + ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); 1.1001 + JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); 1.1002 + if (parenthesesResult == JSRegExpMatch) 1.1003 + appendParenthesesDisjunctionContext(backTrack, context); 1.1004 + else { 1.1005 + resetMatches(term, context); 1.1006 + freeParenthesesDisjunctionContext(context); 1.1007 + 1.1008 + if (parenthesesResult != JSRegExpNoMatch) 1.1009 + return parenthesesResult; 1.1010 + 1.1011 + break; 1.1012 + } 1.1013 + } 1.1014 + } else { 1.1015 + // Avoid a topcrash before it occurs. 1.1016 + if (!backTrack->lastContext) { 1.1017 + ASSERT(!"Tripped Bug 856796!"); 1.1018 + return JSRegExpErrorInternal; 1.1019 + } 1.1020 + 1.1021 + resetMatches(term, context); 1.1022 + popParenthesesDisjunctionContext(backTrack); 1.1023 + freeParenthesesDisjunctionContext(context); 1.1024 + 1.1025 + if (result != JSRegExpNoMatch) 1.1026 + return result; 1.1027 + } 1.1028 + 1.1029 + if (backTrack->matchAmount) { 1.1030 + ParenthesesDisjunctionContext* context = backTrack->lastContext; 1.1031 + recordParenthesesMatch(term, context); 1.1032 + } 1.1033 + return JSRegExpMatch; 1.1034 + } 1.1035 + 1.1036 + case QuantifierNonGreedy: { 1.1037 + // If we've not reached the limit, try to add one more match. 1.1038 + if (backTrack->matchAmount < term.atom.quantityCount) { 1.1039 + ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); 1.1040 + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); 1.1041 + if (result == JSRegExpMatch) { 1.1042 + appendParenthesesDisjunctionContext(backTrack, context); 1.1043 + recordParenthesesMatch(term, context); 1.1044 + return JSRegExpMatch; 1.1045 + } 1.1046 + 1.1047 + resetMatches(term, context); 1.1048 + freeParenthesesDisjunctionContext(context); 1.1049 + 1.1050 + if (result != JSRegExpNoMatch) 1.1051 + return result; 1.1052 + } 1.1053 + 1.1054 + // Nope - okay backtrack looking for an alternative. 1.1055 + while (backTrack->matchAmount) { 1.1056 + ParenthesesDisjunctionContext* context = backTrack->lastContext; 1.1057 + JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); 1.1058 + if (result == JSRegExpMatch) { 1.1059 + // successful backtrack! we're back in the game! 1.1060 + if (backTrack->matchAmount) { 1.1061 + context = backTrack->lastContext; 1.1062 + recordParenthesesMatch(term, context); 1.1063 + } 1.1064 + return JSRegExpMatch; 1.1065 + } 1.1066 + 1.1067 + // Avoid a topcrash before it occurs. 1.1068 + if (!backTrack->lastContext) { 1.1069 + ASSERT(!"Tripped Bug 856796!"); 1.1070 + return JSRegExpErrorInternal; 1.1071 + } 1.1072 + 1.1073 + // pop a match off the stack 1.1074 + resetMatches(term, context); 1.1075 + popParenthesesDisjunctionContext(backTrack); 1.1076 + freeParenthesesDisjunctionContext(context); 1.1077 + 1.1078 + if (result != JSRegExpNoMatch) 1.1079 + return result; 1.1080 + } 1.1081 + 1.1082 + return JSRegExpNoMatch; 1.1083 + } 1.1084 + } 1.1085 + 1.1086 + ASSERT_NOT_REACHED(); 1.1087 + return JSRegExpErrorNoMatch; 1.1088 + } 1.1089 + 1.1090 + bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context) 1.1091 + { 1.1092 + UNUSED_PARAM(term); 1.1093 + unsigned matchBegin = context->matchBegin; 1.1094 + 1.1095 + if (matchBegin) { 1.1096 + for (matchBegin--; true; matchBegin--) { 1.1097 + if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) { 1.1098 + ++matchBegin; 1.1099 + break; 1.1100 + } 1.1101 + 1.1102 + if (!matchBegin) 1.1103 + break; 1.1104 + } 1.1105 + } 1.1106 + 1.1107 + unsigned matchEnd = input.getPos(); 1.1108 + 1.1109 + for (; (matchEnd != input.end()) 1.1110 + && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { } 1.1111 + 1.1112 + if (((matchBegin && term.anchors.m_bol) 1.1113 + || ((matchEnd != input.end()) && term.anchors.m_eol)) 1.1114 + && !pattern->m_multiline) 1.1115 + return false; 1.1116 + 1.1117 + context->matchBegin = matchBegin; 1.1118 + context->matchEnd = matchEnd; 1.1119 + return true; 1.1120 + } 1.1121 + 1.1122 +#define MATCH_NEXT() { ++context->term; goto matchAgain; } 1.1123 +#define BACKTRACK() { --context->term; goto backtrack; } 1.1124 +#define currentTerm() (disjunction->terms[context->term]) 1.1125 + JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) 1.1126 + { 1.1127 + if (!--remainingMatchCount) 1.1128 + return JSRegExpErrorHitLimit; 1.1129 + 1.1130 + if (btrack) 1.1131 + BACKTRACK(); 1.1132 + 1.1133 + context->matchBegin = input.getPos(); 1.1134 + context->term = 0; 1.1135 + 1.1136 + matchAgain: 1.1137 + ASSERT(context->term < static_cast<int>(disjunction->terms.size())); 1.1138 + 1.1139 + // Prevent jank resulting from getting stuck in Yarr for a long time. 1.1140 + if (!CheckForInterrupt(this->cx)) 1.1141 + return JSRegExpErrorInternal; 1.1142 + 1.1143 + switch (currentTerm().type) { 1.1144 + case ByteTerm::TypeSubpatternBegin: 1.1145 + MATCH_NEXT(); 1.1146 + case ByteTerm::TypeSubpatternEnd: 1.1147 + context->matchEnd = input.getPos(); 1.1148 + return JSRegExpMatch; 1.1149 + 1.1150 + case ByteTerm::TypeBodyAlternativeBegin: 1.1151 + MATCH_NEXT(); 1.1152 + case ByteTerm::TypeBodyAlternativeDisjunction: 1.1153 + case ByteTerm::TypeBodyAlternativeEnd: 1.1154 + context->matchEnd = input.getPos(); 1.1155 + return JSRegExpMatch; 1.1156 + 1.1157 + case ByteTerm::TypeAlternativeBegin: 1.1158 + MATCH_NEXT(); 1.1159 + case ByteTerm::TypeAlternativeDisjunction: 1.1160 + case ByteTerm::TypeAlternativeEnd: { 1.1161 + int offset = currentTerm().alternative.end; 1.1162 + BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); 1.1163 + backTrack->offset = offset; 1.1164 + context->term += offset; 1.1165 + MATCH_NEXT(); 1.1166 + } 1.1167 + 1.1168 + case ByteTerm::TypeAssertionBOL: 1.1169 + if (matchAssertionBOL(currentTerm())) 1.1170 + MATCH_NEXT(); 1.1171 + BACKTRACK(); 1.1172 + case ByteTerm::TypeAssertionEOL: 1.1173 + if (matchAssertionEOL(currentTerm())) 1.1174 + MATCH_NEXT(); 1.1175 + BACKTRACK(); 1.1176 + case ByteTerm::TypeAssertionWordBoundary: 1.1177 + if (matchAssertionWordBoundary(currentTerm())) 1.1178 + MATCH_NEXT(); 1.1179 + BACKTRACK(); 1.1180 + 1.1181 + case ByteTerm::TypePatternCharacterOnce: 1.1182 + case ByteTerm::TypePatternCharacterFixed: { 1.1183 + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { 1.1184 + if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) 1.1185 + BACKTRACK(); 1.1186 + } 1.1187 + MATCH_NEXT(); 1.1188 + } 1.1189 + case ByteTerm::TypePatternCharacterGreedy: { 1.1190 + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); 1.1191 + unsigned matchAmount = 0; 1.1192 + while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { 1.1193 + if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) { 1.1194 + input.uncheckInput(1); 1.1195 + break; 1.1196 + } 1.1197 + ++matchAmount; 1.1198 + } 1.1199 + backTrack->matchAmount = matchAmount; 1.1200 + 1.1201 + MATCH_NEXT(); 1.1202 + } 1.1203 + case ByteTerm::TypePatternCharacterNonGreedy: { 1.1204 + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); 1.1205 + backTrack->matchAmount = 0; 1.1206 + MATCH_NEXT(); 1.1207 + } 1.1208 + 1.1209 + case ByteTerm::TypePatternCasedCharacterOnce: 1.1210 + case ByteTerm::TypePatternCasedCharacterFixed: { 1.1211 + for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { 1.1212 + if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) 1.1213 + BACKTRACK(); 1.1214 + } 1.1215 + MATCH_NEXT(); 1.1216 + } 1.1217 + case ByteTerm::TypePatternCasedCharacterGreedy: { 1.1218 + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); 1.1219 + unsigned matchAmount = 0; 1.1220 + while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { 1.1221 + if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) { 1.1222 + input.uncheckInput(1); 1.1223 + break; 1.1224 + } 1.1225 + ++matchAmount; 1.1226 + } 1.1227 + backTrack->matchAmount = matchAmount; 1.1228 + 1.1229 + MATCH_NEXT(); 1.1230 + } 1.1231 + case ByteTerm::TypePatternCasedCharacterNonGreedy: { 1.1232 + BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); 1.1233 + backTrack->matchAmount = 0; 1.1234 + MATCH_NEXT(); 1.1235 + } 1.1236 + 1.1237 + case ByteTerm::TypeCharacterClass: 1.1238 + if (matchCharacterClass(currentTerm(), context)) 1.1239 + MATCH_NEXT(); 1.1240 + BACKTRACK(); 1.1241 + case ByteTerm::TypeBackReference: 1.1242 + if (matchBackReference(currentTerm(), context)) 1.1243 + MATCH_NEXT(); 1.1244 + BACKTRACK(); 1.1245 + case ByteTerm::TypeParenthesesSubpattern: { 1.1246 + JSRegExpResult result = matchParentheses(currentTerm(), context); 1.1247 + 1.1248 + if (result == JSRegExpMatch) { 1.1249 + MATCH_NEXT(); 1.1250 + } else if (result != JSRegExpNoMatch) 1.1251 + return result; 1.1252 + 1.1253 + BACKTRACK(); 1.1254 + } 1.1255 + case ByteTerm::TypeParenthesesSubpatternOnceBegin: 1.1256 + if (matchParenthesesOnceBegin(currentTerm(), context)) 1.1257 + MATCH_NEXT(); 1.1258 + BACKTRACK(); 1.1259 + case ByteTerm::TypeParenthesesSubpatternOnceEnd: 1.1260 + if (matchParenthesesOnceEnd(currentTerm(), context)) 1.1261 + MATCH_NEXT(); 1.1262 + BACKTRACK(); 1.1263 + case ByteTerm::TypeParenthesesSubpatternTerminalBegin: 1.1264 + if (matchParenthesesTerminalBegin(currentTerm(), context)) 1.1265 + MATCH_NEXT(); 1.1266 + BACKTRACK(); 1.1267 + case ByteTerm::TypeParenthesesSubpatternTerminalEnd: 1.1268 + if (matchParenthesesTerminalEnd(currentTerm(), context)) 1.1269 + MATCH_NEXT(); 1.1270 + BACKTRACK(); 1.1271 + case ByteTerm::TypeParentheticalAssertionBegin: 1.1272 + if (matchParentheticalAssertionBegin(currentTerm(), context)) 1.1273 + MATCH_NEXT(); 1.1274 + BACKTRACK(); 1.1275 + case ByteTerm::TypeParentheticalAssertionEnd: 1.1276 + if (matchParentheticalAssertionEnd(currentTerm(), context)) 1.1277 + MATCH_NEXT(); 1.1278 + BACKTRACK(); 1.1279 + 1.1280 + case ByteTerm::TypeCheckInput: 1.1281 + if (input.checkInput(currentTerm().checkInputCount)) 1.1282 + MATCH_NEXT(); 1.1283 + BACKTRACK(); 1.1284 + 1.1285 + case ByteTerm::TypeUncheckInput: 1.1286 + input.uncheckInput(currentTerm().checkInputCount); 1.1287 + MATCH_NEXT(); 1.1288 + 1.1289 + case ByteTerm::TypeDotStarEnclosure: 1.1290 + if (matchDotStarEnclosure(currentTerm(), context)) 1.1291 + return JSRegExpMatch; 1.1292 + BACKTRACK(); 1.1293 + } 1.1294 + 1.1295 + // We should never fall-through to here. 1.1296 + ASSERT_NOT_REACHED(); 1.1297 + 1.1298 + backtrack: 1.1299 + ASSERT(context->term < static_cast<int>(disjunction->terms.size())); 1.1300 + 1.1301 + // Prevent jank resulting from getting stuck in Yarr for a long time. 1.1302 + if (!CheckForInterrupt(this->cx)) 1.1303 + return JSRegExpErrorInternal; 1.1304 + 1.1305 + switch (currentTerm().type) { 1.1306 + case ByteTerm::TypeSubpatternBegin: 1.1307 + return JSRegExpNoMatch; 1.1308 + case ByteTerm::TypeSubpatternEnd: 1.1309 + ASSERT_NOT_REACHED(); 1.1310 + 1.1311 + case ByteTerm::TypeBodyAlternativeBegin: 1.1312 + case ByteTerm::TypeBodyAlternativeDisjunction: { 1.1313 + int offset = currentTerm().alternative.next; 1.1314 + context->term += offset; 1.1315 + if (offset > 0) 1.1316 + MATCH_NEXT(); 1.1317 + 1.1318 + if (input.atEnd()) 1.1319 + return JSRegExpNoMatch; 1.1320 + 1.1321 + input.next(); 1.1322 + 1.1323 + context->matchBegin = input.getPos(); 1.1324 + 1.1325 + if (currentTerm().alternative.onceThrough) 1.1326 + context->term += currentTerm().alternative.next; 1.1327 + 1.1328 + MATCH_NEXT(); 1.1329 + } 1.1330 + case ByteTerm::TypeBodyAlternativeEnd: 1.1331 + ASSERT_NOT_REACHED(); 1.1332 + 1.1333 + case ByteTerm::TypeAlternativeBegin: 1.1334 + case ByteTerm::TypeAlternativeDisjunction: { 1.1335 + int offset = currentTerm().alternative.next; 1.1336 + context->term += offset; 1.1337 + if (offset > 0) 1.1338 + MATCH_NEXT(); 1.1339 + BACKTRACK(); 1.1340 + } 1.1341 + case ByteTerm::TypeAlternativeEnd: { 1.1342 + // We should never backtrack back into an alternative of the main body of the regex. 1.1343 + BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); 1.1344 + unsigned offset = backTrack->offset; 1.1345 + context->term -= offset; 1.1346 + BACKTRACK(); 1.1347 + } 1.1348 + 1.1349 + case ByteTerm::TypeAssertionBOL: 1.1350 + case ByteTerm::TypeAssertionEOL: 1.1351 + case ByteTerm::TypeAssertionWordBoundary: 1.1352 + BACKTRACK(); 1.1353 + 1.1354 + case ByteTerm::TypePatternCharacterOnce: 1.1355 + case ByteTerm::TypePatternCharacterFixed: 1.1356 + case ByteTerm::TypePatternCharacterGreedy: 1.1357 + case ByteTerm::TypePatternCharacterNonGreedy: 1.1358 + if (backtrackPatternCharacter(currentTerm(), context)) 1.1359 + MATCH_NEXT(); 1.1360 + BACKTRACK(); 1.1361 + case ByteTerm::TypePatternCasedCharacterOnce: 1.1362 + case ByteTerm::TypePatternCasedCharacterFixed: 1.1363 + case ByteTerm::TypePatternCasedCharacterGreedy: 1.1364 + case ByteTerm::TypePatternCasedCharacterNonGreedy: 1.1365 + if (backtrackPatternCasedCharacter(currentTerm(), context)) 1.1366 + MATCH_NEXT(); 1.1367 + BACKTRACK(); 1.1368 + case ByteTerm::TypeCharacterClass: 1.1369 + if (backtrackCharacterClass(currentTerm(), context)) 1.1370 + MATCH_NEXT(); 1.1371 + BACKTRACK(); 1.1372 + case ByteTerm::TypeBackReference: 1.1373 + if (backtrackBackReference(currentTerm(), context)) 1.1374 + MATCH_NEXT(); 1.1375 + BACKTRACK(); 1.1376 + case ByteTerm::TypeParenthesesSubpattern: { 1.1377 + JSRegExpResult result = backtrackParentheses(currentTerm(), context); 1.1378 + 1.1379 + if (result == JSRegExpMatch) { 1.1380 + MATCH_NEXT(); 1.1381 + } else if (result != JSRegExpNoMatch) 1.1382 + return result; 1.1383 + 1.1384 + BACKTRACK(); 1.1385 + } 1.1386 + case ByteTerm::TypeParenthesesSubpatternOnceBegin: 1.1387 + if (backtrackParenthesesOnceBegin(currentTerm(), context)) 1.1388 + MATCH_NEXT(); 1.1389 + BACKTRACK(); 1.1390 + case ByteTerm::TypeParenthesesSubpatternOnceEnd: 1.1391 + if (backtrackParenthesesOnceEnd(currentTerm(), context)) 1.1392 + MATCH_NEXT(); 1.1393 + BACKTRACK(); 1.1394 + case ByteTerm::TypeParenthesesSubpatternTerminalBegin: 1.1395 + if (backtrackParenthesesTerminalBegin(currentTerm(), context)) 1.1396 + MATCH_NEXT(); 1.1397 + BACKTRACK(); 1.1398 + case ByteTerm::TypeParenthesesSubpatternTerminalEnd: 1.1399 + if (backtrackParenthesesTerminalEnd(currentTerm(), context)) 1.1400 + MATCH_NEXT(); 1.1401 + BACKTRACK(); 1.1402 + case ByteTerm::TypeParentheticalAssertionBegin: 1.1403 + if (backtrackParentheticalAssertionBegin(currentTerm(), context)) 1.1404 + MATCH_NEXT(); 1.1405 + BACKTRACK(); 1.1406 + case ByteTerm::TypeParentheticalAssertionEnd: 1.1407 + if (backtrackParentheticalAssertionEnd(currentTerm(), context)) 1.1408 + MATCH_NEXT(); 1.1409 + BACKTRACK(); 1.1410 + 1.1411 + case ByteTerm::TypeCheckInput: 1.1412 + input.uncheckInput(currentTerm().checkInputCount); 1.1413 + BACKTRACK(); 1.1414 + 1.1415 + case ByteTerm::TypeUncheckInput: 1.1416 + input.checkInput(currentTerm().checkInputCount); 1.1417 + BACKTRACK(); 1.1418 + 1.1419 + case ByteTerm::TypeDotStarEnclosure: 1.1420 + ASSERT_NOT_REACHED(); 1.1421 + } 1.1422 + 1.1423 + ASSERT_NOT_REACHED(); 1.1424 + return JSRegExpErrorNoMatch; 1.1425 + } 1.1426 + 1.1427 + JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) 1.1428 + { 1.1429 + JSRegExpResult result = matchDisjunction(disjunction, context, btrack); 1.1430 + 1.1431 + if (result == JSRegExpMatch) { 1.1432 + while (context->matchBegin == context->matchEnd) { 1.1433 + result = matchDisjunction(disjunction, context, true); 1.1434 + if (result != JSRegExpMatch) 1.1435 + return result; 1.1436 + } 1.1437 + return JSRegExpMatch; 1.1438 + } 1.1439 + 1.1440 + return result; 1.1441 + } 1.1442 + 1.1443 + unsigned interpret() 1.1444 + { 1.1445 + if (!input.isAvailableInput(0)) 1.1446 + return offsetNoMatch; 1.1447 + 1.1448 + for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i) 1.1449 + output[i << 1] = offsetNoMatch; 1.1450 + 1.1451 + allocatorPool = pattern->m_allocator->startAllocator(); 1.1452 + if (!allocatorPool) 1.1453 + CRASH(); 1.1454 + 1.1455 + DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get()); 1.1456 + 1.1457 + JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false); 1.1458 + if (result == JSRegExpMatch) { 1.1459 + output[0] = context->matchBegin; 1.1460 + output[1] = context->matchEnd; 1.1461 + } 1.1462 + 1.1463 + freeDisjunctionContext(context); 1.1464 + 1.1465 + pattern->m_allocator->stopAllocator(); 1.1466 + 1.1467 + ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch)); 1.1468 + return output[0]; 1.1469 + } 1.1470 + 1.1471 + Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start) 1.1472 + : cx(cx) 1.1473 + , pattern(pattern) 1.1474 + , output(output) 1.1475 + , input(input, start, length) 1.1476 + , allocatorPool(0) 1.1477 + , remainingMatchCount(matchLimit) 1.1478 + { 1.1479 + } 1.1480 + 1.1481 +private: 1.1482 + JSContext *cx; 1.1483 + BytecodePattern* pattern; 1.1484 + unsigned* output; 1.1485 + InputStream input; 1.1486 + BumpPointerPool* allocatorPool; 1.1487 + unsigned remainingMatchCount; 1.1488 +}; 1.1489 + 1.1490 + 1.1491 + 1.1492 +class ByteCompiler { 1.1493 + struct ParenthesesStackEntry { 1.1494 + unsigned beginTerm; 1.1495 + unsigned savedAlternativeIndex; 1.1496 + 1.1497 + // For js::Vector. Does not create a valid object. 1.1498 + ParenthesesStackEntry() { } 1.1499 + 1.1500 + ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/) 1.1501 + : beginTerm(beginTerm) 1.1502 + , savedAlternativeIndex(savedAlternativeIndex) 1.1503 + { 1.1504 + } 1.1505 + }; 1.1506 + 1.1507 +public: 1.1508 + ByteCompiler(YarrPattern& pattern) 1.1509 + : m_pattern(pattern) 1.1510 + { 1.1511 + m_currentAlternativeIndex = 0; 1.1512 + } 1.1513 + 1.1514 + PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator) 1.1515 + { 1.1516 + regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); 1.1517 + emitDisjunction(m_pattern.m_body); 1.1518 + regexEnd(); 1.1519 + 1.1520 + return adoptPtr(newOrCrash<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator)); 1.1521 + } 1.1522 + 1.1523 + void checkInput(unsigned count) 1.1524 + { 1.1525 + m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count)); 1.1526 + } 1.1527 + 1.1528 + void uncheckInput(unsigned count) 1.1529 + { 1.1530 + m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count)); 1.1531 + } 1.1532 + 1.1533 + void assertionBOL(unsigned inputPosition) 1.1534 + { 1.1535 + m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition)); 1.1536 + } 1.1537 + 1.1538 + void assertionEOL(unsigned inputPosition) 1.1539 + { 1.1540 + m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition)); 1.1541 + } 1.1542 + 1.1543 + void assertionWordBoundary(bool invert, unsigned inputPosition) 1.1544 + { 1.1545 + m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition)); 1.1546 + } 1.1547 + 1.1548 + void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 1.1549 + { 1.1550 + if (m_pattern.m_ignoreCase) { 1.1551 + UChar lo = Unicode::toLower(ch); 1.1552 + UChar hi = Unicode::toUpper(ch); 1.1553 + 1.1554 + if (lo != hi) { 1.1555 + m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); 1.1556 + return; 1.1557 + } 1.1558 + } 1.1559 + 1.1560 + m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); 1.1561 + } 1.1562 + 1.1563 + void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 1.1564 + { 1.1565 + m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition)); 1.1566 + 1.1567 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); 1.1568 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; 1.1569 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; 1.1570 + } 1.1571 + 1.1572 + void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 1.1573 + { 1.1574 + ASSERT(subpatternId); 1.1575 + 1.1576 + m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition)); 1.1577 + 1.1578 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); 1.1579 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; 1.1580 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; 1.1581 + } 1.1582 + 1.1583 + void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) 1.1584 + { 1.1585 + int beginTerm = m_bodyDisjunction->terms.size(); 1.1586 + 1.1587 + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); 1.1588 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; 1.1589 + m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); 1.1590 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; 1.1591 + 1.1592 + m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); 1.1593 + m_currentAlternativeIndex = beginTerm + 1; 1.1594 + } 1.1595 + 1.1596 + void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) 1.1597 + { 1.1598 + int beginTerm = m_bodyDisjunction->terms.size(); 1.1599 + 1.1600 + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition)); 1.1601 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; 1.1602 + m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); 1.1603 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; 1.1604 + 1.1605 + m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); 1.1606 + m_currentAlternativeIndex = beginTerm + 1; 1.1607 + } 1.1608 + 1.1609 + void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) 1.1610 + { 1.1611 + // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin, 1.1612 + // then fix this up at the end! - simplifying this should make it much clearer. 1.1613 + // https://bugs.webkit.org/show_bug.cgi?id=50136 1.1614 + 1.1615 + int beginTerm = m_bodyDisjunction->terms.size(); 1.1616 + 1.1617 + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); 1.1618 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; 1.1619 + m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); 1.1620 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; 1.1621 + 1.1622 + m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); 1.1623 + m_currentAlternativeIndex = beginTerm + 1; 1.1624 + } 1.1625 + 1.1626 + void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation) 1.1627 + { 1.1628 + int beginTerm = m_bodyDisjunction->terms.size(); 1.1629 + 1.1630 + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0)); 1.1631 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; 1.1632 + m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); 1.1633 + m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; 1.1634 + 1.1635 + m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); 1.1636 + m_currentAlternativeIndex = beginTerm + 1; 1.1637 + } 1.1638 + 1.1639 + void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 1.1640 + { 1.1641 + unsigned beginTerm = popParenthesesStack(); 1.1642 + closeAlternative(beginTerm + 1); 1.1643 + unsigned endTerm = m_bodyDisjunction->terms.size(); 1.1644 + 1.1645 + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin); 1.1646 + 1.1647 + bool invert = m_bodyDisjunction->terms[beginTerm].invert(); 1.1648 + unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; 1.1649 + 1.1650 + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition)); 1.1651 + m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; 1.1652 + m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; 1.1653 + m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; 1.1654 + 1.1655 + m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); 1.1656 + m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; 1.1657 + m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); 1.1658 + m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; 1.1659 + } 1.1660 + 1.1661 + void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored) 1.1662 + { 1.1663 + m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored)); 1.1664 + } 1.1665 + 1.1666 + unsigned popParenthesesStack() 1.1667 + { 1.1668 + ASSERT(m_parenthesesStack.size()); 1.1669 + int stackEnd = m_parenthesesStack.size() - 1; 1.1670 + unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm; 1.1671 + m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex; 1.1672 + m_parenthesesStack.shrink(stackEnd); 1.1673 + 1.1674 + ASSERT(beginTerm < m_bodyDisjunction->terms.size()); 1.1675 + ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size()); 1.1676 + 1.1677 + return beginTerm; 1.1678 + } 1.1679 + 1.1680 +#ifndef NDEBUG 1.1681 + void dumpDisjunction(ByteDisjunction* disjunction) 1.1682 + { 1.1683 + dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction); 1.1684 + for (unsigned i = 0; i < disjunction->terms.size(); ++i) 1.1685 + dataLogF("{ %d } ", disjunction->terms[i].type); 1.1686 + dataLogF("\n"); 1.1687 + } 1.1688 +#endif 1.1689 + 1.1690 + void closeAlternative(int beginTerm) 1.1691 + { 1.1692 + int origBeginTerm = beginTerm; 1.1693 + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin); 1.1694 + int endIndex = m_bodyDisjunction->terms.size(); 1.1695 + 1.1696 + unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; 1.1697 + 1.1698 + if (!m_bodyDisjunction->terms[beginTerm].alternative.next) 1.1699 + m_bodyDisjunction->terms.remove(beginTerm); 1.1700 + else { 1.1701 + while (m_bodyDisjunction->terms[beginTerm].alternative.next) { 1.1702 + beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; 1.1703 + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction); 1.1704 + m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; 1.1705 + m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; 1.1706 + } 1.1707 + 1.1708 + m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; 1.1709 + 1.1710 + m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd()); 1.1711 + m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; 1.1712 + } 1.1713 + } 1.1714 + 1.1715 + void closeBodyAlternative() 1.1716 + { 1.1717 + int beginTerm = 0; 1.1718 + int origBeginTerm = 0; 1.1719 + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin); 1.1720 + int endIndex = m_bodyDisjunction->terms.size(); 1.1721 + 1.1722 + unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; 1.1723 + 1.1724 + while (m_bodyDisjunction->terms[beginTerm].alternative.next) { 1.1725 + beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; 1.1726 + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction); 1.1727 + m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; 1.1728 + m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; 1.1729 + } 1.1730 + 1.1731 + m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; 1.1732 + 1.1733 + m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd()); 1.1734 + m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; 1.1735 + } 1.1736 + 1.1737 + void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) 1.1738 + { 1.1739 + unsigned beginTerm = popParenthesesStack(); 1.1740 + closeAlternative(beginTerm + 1); 1.1741 + unsigned endTerm = m_bodyDisjunction->terms.size(); 1.1742 + 1.1743 + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); 1.1744 + 1.1745 + ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm]; 1.1746 + 1.1747 + bool capture = parenthesesBegin.capture(); 1.1748 + unsigned subpatternId = parenthesesBegin.atom.subpatternId; 1.1749 + 1.1750 + unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; 1.1751 + ByteDisjunction* parenthesesDisjunction = newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize); 1.1752 + 1.1753 + parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1); 1.1754 + parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin()); 1.1755 + for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses) 1.1756 + parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]); 1.1757 + parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd()); 1.1758 + 1.1759 + m_bodyDisjunction->terms.shrink(beginTerm); 1.1760 + 1.1761 + m_allParenthesesInfo.append(parenthesesDisjunction); 1.1762 + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition)); 1.1763 + 1.1764 + m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); 1.1765 + m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; 1.1766 + m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; 1.1767 + } 1.1768 + 1.1769 + void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 1.1770 + { 1.1771 + unsigned beginTerm = popParenthesesStack(); 1.1772 + closeAlternative(beginTerm + 1); 1.1773 + unsigned endTerm = m_bodyDisjunction->terms.size(); 1.1774 + 1.1775 + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); 1.1776 + 1.1777 + bool capture = m_bodyDisjunction->terms[beginTerm].capture(); 1.1778 + unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; 1.1779 + 1.1780 + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition)); 1.1781 + m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; 1.1782 + m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; 1.1783 + m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; 1.1784 + 1.1785 + m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); 1.1786 + m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; 1.1787 + m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); 1.1788 + m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; 1.1789 + } 1.1790 + 1.1791 + void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) 1.1792 + { 1.1793 + unsigned beginTerm = popParenthesesStack(); 1.1794 + closeAlternative(beginTerm + 1); 1.1795 + unsigned endTerm = m_bodyDisjunction->terms.size(); 1.1796 + 1.1797 + ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); 1.1798 + 1.1799 + bool capture = m_bodyDisjunction->terms[beginTerm].capture(); 1.1800 + unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; 1.1801 + 1.1802 + m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition)); 1.1803 + m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; 1.1804 + m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; 1.1805 + m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; 1.1806 + 1.1807 + m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); 1.1808 + m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; 1.1809 + m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); 1.1810 + m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; 1.1811 + } 1.1812 + 1.1813 + void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) 1.1814 + { 1.1815 + m_bodyDisjunction = adoptPtr(newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize)); 1.1816 + m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); 1.1817 + m_bodyDisjunction->terms[0].frameLocation = 0; 1.1818 + m_currentAlternativeIndex = 0; 1.1819 + } 1.1820 + 1.1821 + void regexEnd() 1.1822 + { 1.1823 + closeBodyAlternative(); 1.1824 + } 1.1825 + 1.1826 + void alternativeBodyDisjunction(bool onceThrough) 1.1827 + { 1.1828 + int newAlternativeIndex = m_bodyDisjunction->terms.size(); 1.1829 + m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; 1.1830 + m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough)); 1.1831 + 1.1832 + m_currentAlternativeIndex = newAlternativeIndex; 1.1833 + } 1.1834 + 1.1835 + void alternativeDisjunction() 1.1836 + { 1.1837 + int newAlternativeIndex = m_bodyDisjunction->terms.size(); 1.1838 + m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; 1.1839 + m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction()); 1.1840 + 1.1841 + m_currentAlternativeIndex = newAlternativeIndex; 1.1842 + } 1.1843 + 1.1844 + void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0) 1.1845 + { 1.1846 + for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { 1.1847 + unsigned currentCountAlreadyChecked = inputCountAlreadyChecked; 1.1848 + 1.1849 + PatternAlternative* alternative = disjunction->m_alternatives[alt]; 1.1850 + 1.1851 + if (alt) { 1.1852 + if (disjunction == m_pattern.m_body) 1.1853 + alternativeBodyDisjunction(alternative->onceThrough()); 1.1854 + else 1.1855 + alternativeDisjunction(); 1.1856 + } 1.1857 + 1.1858 + unsigned minimumSize = alternative->m_minimumSize; 1.1859 + ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked); 1.1860 + unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked; 1.1861 + 1.1862 + if (countToCheck) { 1.1863 + checkInput(countToCheck); 1.1864 + currentCountAlreadyChecked += countToCheck; 1.1865 + } 1.1866 + 1.1867 + for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { 1.1868 + PatternTerm& term = alternative->m_terms[i]; 1.1869 + 1.1870 + switch (term.type) { 1.1871 + case PatternTerm::TypeAssertionBOL: 1.1872 + assertionBOL(currentCountAlreadyChecked - term.inputPosition); 1.1873 + break; 1.1874 + 1.1875 + case PatternTerm::TypeAssertionEOL: 1.1876 + assertionEOL(currentCountAlreadyChecked - term.inputPosition); 1.1877 + break; 1.1878 + 1.1879 + case PatternTerm::TypeAssertionWordBoundary: 1.1880 + assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition); 1.1881 + break; 1.1882 + 1.1883 + case PatternTerm::TypePatternCharacter: 1.1884 + atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); 1.1885 + break; 1.1886 + 1.1887 + case PatternTerm::TypeCharacterClass: 1.1888 + atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); 1.1889 + break; 1.1890 + 1.1891 + case PatternTerm::TypeBackReference: 1.1892 + atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); 1.1893 + break; 1.1894 + 1.1895 + case PatternTerm::TypeForwardReference: 1.1896 + break; 1.1897 + 1.1898 + case PatternTerm::TypeParenthesesSubpattern: { 1.1899 + unsigned disjunctionAlreadyCheckedCount = 0; 1.1900 + if (term.quantityCount == 1 && !term.parentheses.isCopy) { 1.1901 + unsigned alternativeFrameLocation = term.frameLocation; 1.1902 + // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame. 1.1903 + if (term.quantityType == QuantifierFixedCount) 1.1904 + disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; 1.1905 + else 1.1906 + alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; 1.1907 + unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; 1.1908 + atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation); 1.1909 + emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); 1.1910 + atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); 1.1911 + } else if (term.parentheses.isTerminal) { 1.1912 + unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; 1.1913 + atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); 1.1914 + emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); 1.1915 + atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); 1.1916 + } else { 1.1917 + unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; 1.1918 + atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0); 1.1919 + emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); 1.1920 + atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); 1.1921 + } 1.1922 + break; 1.1923 + } 1.1924 + 1.1925 + case PatternTerm::TypeParentheticalAssertion: { 1.1926 + unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; 1.1927 + 1.1928 + ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition)); 1.1929 + unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition); 1.1930 + unsigned uncheckAmount = 0; 1.1931 + if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) { 1.1932 + uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize; 1.1933 + uncheckInput(uncheckAmount); 1.1934 + currentCountAlreadyChecked -= uncheckAmount; 1.1935 + } 1.1936 + 1.1937 + atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation); 1.1938 + emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount); 1.1939 + atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType); 1.1940 + if (uncheckAmount) { 1.1941 + checkInput(uncheckAmount); 1.1942 + currentCountAlreadyChecked += uncheckAmount; 1.1943 + } 1.1944 + break; 1.1945 + } 1.1946 + 1.1947 + case PatternTerm::TypeDotStarEnclosure: 1.1948 + assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor); 1.1949 + break; 1.1950 + } 1.1951 + } 1.1952 + } 1.1953 + } 1.1954 + 1.1955 +private: 1.1956 + YarrPattern& m_pattern; 1.1957 + OwnPtr<ByteDisjunction> m_bodyDisjunction; 1.1958 + unsigned m_currentAlternativeIndex; 1.1959 + Vector<ParenthesesStackEntry> m_parenthesesStack; 1.1960 + Vector<ByteDisjunction*> m_allParenthesesInfo; 1.1961 +}; 1.1962 + 1.1963 +PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) 1.1964 +{ 1.1965 + return ByteCompiler(pattern).compile(allocator); 1.1966 +} 1.1967 + 1.1968 +unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output) 1.1969 +{ 1.1970 +#if YARR_8BIT_CHAR_SUPPORT 1.1971 + if (input.is8Bit()) 1.1972 + return Interpreter<LChar>(cx, bytecode, output, input.characters8(), input.length(), start).interpret(); 1.1973 +#endif 1.1974 + return Interpreter<UChar>(cx, bytecode, output, input.chars(), input.length(), start).interpret(); 1.1975 +} 1.1976 + 1.1977 +unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output) 1.1978 +{ 1.1979 + return Interpreter<LChar>(cx, bytecode, output, input, length, start).interpret(); 1.1980 +} 1.1981 + 1.1982 +unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output) 1.1983 +{ 1.1984 + return Interpreter<UChar>(cx, bytecode, output, input, length, start).interpret(); 1.1985 +} 1.1986 + 1.1987 +// These should be the same for both UChar & LChar. 1.1988 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter); 1.1989 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass); 1.1990 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference); 1.1991 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative); 1.1992 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion); 1.1993 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce); 1.1994 +COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses); 1.1995 + 1.1996 + 1.1997 +} }