js/src/yarr/YarrInterpreter.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0 2 * vim: set ts=8 sts=4 et sw=4 tw=99:
michael@0 3 *
michael@0 4 * Copyright (C) 2009 Apple Inc. All rights reserved.
michael@0 5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
michael@0 6 *
michael@0 7 * Redistribution and use in source and binary forms, with or without
michael@0 8 * modification, are permitted provided that the following conditions
michael@0 9 * are met:
michael@0 10 * 1. Redistributions of source code must retain the above copyright
michael@0 11 * notice, this list of conditions and the following disclaimer.
michael@0 12 * 2. Redistributions in binary form must reproduce the above copyright
michael@0 13 * notice, this list of conditions and the following disclaimer in the
michael@0 14 * documentation and/or other materials provided with the distribution.
michael@0 15 *
michael@0 16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
michael@0 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
michael@0 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
michael@0 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
michael@0 20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
michael@0 21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
michael@0 22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
michael@0 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
michael@0 24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0 27 */
michael@0 28
michael@0 29 #include "yarr/YarrInterpreter.h"
michael@0 30
michael@0 31 #include "jscntxt.h"
michael@0 32
michael@0 33 #include "yarr/Yarr.h"
michael@0 34 #include "yarr/YarrCanonicalizeUCS2.h"
michael@0 35 #include "yarr/BumpPointerAllocator.h"
michael@0 36
michael@0 37 using namespace WTF;
michael@0 38
michael@0 39 namespace JSC { namespace Yarr {
michael@0 40
michael@0 41 template<typename CharType>
michael@0 42 class Interpreter {
michael@0 43 public:
michael@0 44 struct ParenthesesDisjunctionContext;
michael@0 45
michael@0 46 struct BackTrackInfoPatternCharacter {
michael@0 47 uintptr_t matchAmount;
michael@0 48 };
michael@0 49 struct BackTrackInfoCharacterClass {
michael@0 50 uintptr_t matchAmount;
michael@0 51 };
michael@0 52 struct BackTrackInfoBackReference {
michael@0 53 uintptr_t begin; // Not really needed for greedy quantifiers.
michael@0 54 uintptr_t matchAmount; // Not really needed for fixed quantifiers.
michael@0 55 };
michael@0 56 struct BackTrackInfoAlternative {
michael@0 57 uintptr_t offset;
michael@0 58 };
michael@0 59 struct BackTrackInfoParentheticalAssertion {
michael@0 60 uintptr_t begin;
michael@0 61 };
michael@0 62 struct BackTrackInfoParenthesesOnce {
michael@0 63 uintptr_t begin;
michael@0 64 };
michael@0 65 struct BackTrackInfoParenthesesTerminal {
michael@0 66 uintptr_t begin;
michael@0 67 };
michael@0 68 struct BackTrackInfoParentheses {
michael@0 69 uintptr_t matchAmount;
michael@0 70 ParenthesesDisjunctionContext* lastContext;
michael@0 71 };
michael@0 72
michael@0 73 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
michael@0 74 {
michael@0 75 context->next = backTrack->lastContext;
michael@0 76 backTrack->lastContext = context;
michael@0 77 ++backTrack->matchAmount;
michael@0 78 }
michael@0 79
michael@0 80 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
michael@0 81 {
michael@0 82 ASSERT(backTrack->matchAmount);
michael@0 83 ASSERT(backTrack->lastContext);
michael@0 84 backTrack->lastContext = backTrack->lastContext->next;
michael@0 85 --backTrack->matchAmount;
michael@0 86 }
michael@0 87
michael@0 88 struct DisjunctionContext
michael@0 89 {
michael@0 90 DisjunctionContext()
michael@0 91 : term(0)
michael@0 92 {
michael@0 93 }
michael@0 94
michael@0 95 void* operator new(size_t, void* where)
michael@0 96 {
michael@0 97 return where;
michael@0 98 }
michael@0 99
michael@0 100 int term;
michael@0 101 unsigned matchBegin;
michael@0 102 unsigned matchEnd;
michael@0 103 uintptr_t frame[1];
michael@0 104 };
michael@0 105
michael@0 106 DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
michael@0 107 {
michael@0 108 size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
michael@0 109 allocatorPool = allocatorPool->ensureCapacity(size);
michael@0 110 if (!allocatorPool)
michael@0 111 CRASH();
michael@0 112 return new (allocatorPool->alloc(size)) DisjunctionContext();
michael@0 113 }
michael@0 114
michael@0 115 void freeDisjunctionContext(DisjunctionContext* context)
michael@0 116 {
michael@0 117 allocatorPool = allocatorPool->dealloc(context);
michael@0 118 }
michael@0 119
michael@0 120 struct ParenthesesDisjunctionContext
michael@0 121 {
michael@0 122 ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
michael@0 123 : next(0)
michael@0 124 {
michael@0 125 unsigned firstSubpatternId = term.atom.subpatternId;
michael@0 126 unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
michael@0 127
michael@0 128 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
michael@0 129 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
michael@0 130 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
michael@0 131 }
michael@0 132
michael@0 133 new (getDisjunctionContext(term)) DisjunctionContext();
michael@0 134 }
michael@0 135
michael@0 136 void* operator new(size_t, void* where)
michael@0 137 {
michael@0 138 return where;
michael@0 139 }
michael@0 140
michael@0 141 void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
michael@0 142 {
michael@0 143 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
michael@0 144 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
michael@0 145 }
michael@0 146
michael@0 147 DisjunctionContext* getDisjunctionContext(ByteTerm& term)
michael@0 148 {
michael@0 149 return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
michael@0 150 }
michael@0 151
michael@0 152 ParenthesesDisjunctionContext* next;
michael@0 153 unsigned subpatternBackup[1];
michael@0 154 };
michael@0 155
michael@0 156 ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
michael@0 157 {
michael@0 158 size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
michael@0 159 size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext));
michael@0 160 allocatorPool = allocatorPool->ensureCapacity(size);
michael@0 161 if (!allocatorPool)
michael@0 162 CRASH();
michael@0 163 return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
michael@0 164 }
michael@0 165
michael@0 166 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
michael@0 167 {
michael@0 168 allocatorPool = allocatorPool->dealloc(context);
michael@0 169 }
michael@0 170
michael@0 171 class InputStream {
michael@0 172 public:
michael@0 173 InputStream(const CharType* input, unsigned start, unsigned length)
michael@0 174 : input(input)
michael@0 175 , pos(start)
michael@0 176 , length(length)
michael@0 177 {
michael@0 178 }
michael@0 179
michael@0 180 void next()
michael@0 181 {
michael@0 182 ++pos;
michael@0 183 }
michael@0 184
michael@0 185 void rewind(unsigned amount)
michael@0 186 {
michael@0 187 ASSERT(pos >= amount);
michael@0 188 pos -= amount;
michael@0 189 }
michael@0 190
michael@0 191 int read()
michael@0 192 {
michael@0 193 ASSERT(pos < length);
michael@0 194 if (pos < length)
michael@0 195 return input[pos];
michael@0 196 return -1;
michael@0 197 }
michael@0 198
michael@0 199 int readPair()
michael@0 200 {
michael@0 201 ASSERT(pos + 1 < length);
michael@0 202 return input[pos] | input[pos + 1] << 16;
michael@0 203 }
michael@0 204
michael@0 205 int readChecked(unsigned negativePositionOffest)
michael@0 206 {
michael@0 207 if (pos < negativePositionOffest)
michael@0 208 CRASH();
michael@0 209 unsigned p = pos - negativePositionOffest;
michael@0 210 ASSERT(p < length);
michael@0 211 return input[p];
michael@0 212 }
michael@0 213
michael@0 214 int reread(unsigned from)
michael@0 215 {
michael@0 216 ASSERT(from < length);
michael@0 217 return input[from];
michael@0 218 }
michael@0 219
michael@0 220 int prev()
michael@0 221 {
michael@0 222 ASSERT(!(pos > length));
michael@0 223 if (pos && length)
michael@0 224 return input[pos - 1];
michael@0 225 return -1;
michael@0 226 }
michael@0 227
michael@0 228 unsigned getPos()
michael@0 229 {
michael@0 230 return pos;
michael@0 231 }
michael@0 232
michael@0 233 void setPos(unsigned p)
michael@0 234 {
michael@0 235 pos = p;
michael@0 236 }
michael@0 237
michael@0 238 bool atStart()
michael@0 239 {
michael@0 240 return pos == 0;
michael@0 241 }
michael@0 242
michael@0 243 bool atEnd()
michael@0 244 {
michael@0 245 return pos == length;
michael@0 246 }
michael@0 247
michael@0 248 unsigned end()
michael@0 249 {
michael@0 250 return length;
michael@0 251 }
michael@0 252
michael@0 253 bool checkInput(unsigned count)
michael@0 254 {
michael@0 255 if (((pos + count) <= length) && ((pos + count) >= pos)) {
michael@0 256 pos += count;
michael@0 257 return true;
michael@0 258 }
michael@0 259 return false;
michael@0 260 }
michael@0 261
michael@0 262 void uncheckInput(unsigned count)
michael@0 263 {
michael@0 264 if (pos < count)
michael@0 265 CRASH();
michael@0 266 pos -= count;
michael@0 267 }
michael@0 268
michael@0 269 bool atStart(unsigned negativePositionOffest)
michael@0 270 {
michael@0 271 return pos == negativePositionOffest;
michael@0 272 }
michael@0 273
michael@0 274 bool atEnd(unsigned negativePositionOffest)
michael@0 275 {
michael@0 276 if (pos < negativePositionOffest)
michael@0 277 CRASH();
michael@0 278 return (pos - negativePositionOffest) == length;
michael@0 279 }
michael@0 280
michael@0 281 bool isAvailableInput(unsigned offset)
michael@0 282 {
michael@0 283 return (((pos + offset) <= length) && ((pos + offset) >= pos));
michael@0 284 }
michael@0 285
michael@0 286 private:
michael@0 287 const CharType* input;
michael@0 288 unsigned pos;
michael@0 289 unsigned length;
michael@0 290 };
michael@0 291
michael@0 292 bool testCharacterClass(CharacterClass* characterClass, int ch)
michael@0 293 {
michael@0 294 if (ch & 0xFF80) {
michael@0 295 for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
michael@0 296 if (ch == characterClass->m_matchesUnicode[i])
michael@0 297 return true;
michael@0 298 for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
michael@0 299 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
michael@0 300 return true;
michael@0 301 } else {
michael@0 302 for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
michael@0 303 if (ch == characterClass->m_matches[i])
michael@0 304 return true;
michael@0 305 for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
michael@0 306 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
michael@0 307 return true;
michael@0 308 }
michael@0 309
michael@0 310 return false;
michael@0 311 }
michael@0 312
michael@0 313 bool checkCharacter(int testChar, unsigned negativeInputOffset)
michael@0 314 {
michael@0 315 return testChar == input.readChecked(negativeInputOffset);
michael@0 316 }
michael@0 317
michael@0 318 bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
michael@0 319 {
michael@0 320 int ch = input.readChecked(negativeInputOffset);
michael@0 321 return (loChar == ch) || (hiChar == ch);
michael@0 322 }
michael@0 323
michael@0 324 bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
michael@0 325 {
michael@0 326 bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
michael@0 327 return invert ? !match : match;
michael@0 328 }
michael@0 329
michael@0 330 bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
michael@0 331 {
michael@0 332 unsigned matchSize = (unsigned)(matchEnd - matchBegin);
michael@0 333
michael@0 334 if (!input.checkInput(matchSize))
michael@0 335 return false;
michael@0 336
michael@0 337 if (pattern->m_ignoreCase) {
michael@0 338 for (unsigned i = 0; i < matchSize; ++i) {
michael@0 339 int oldCh = input.reread(matchBegin + i);
michael@0 340 int ch = input.readChecked(negativeInputOffset + matchSize - i);
michael@0 341
michael@0 342 if (oldCh == ch)
michael@0 343 continue;
michael@0 344
michael@0 345 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
michael@0 346 // unicode values are never allowed to match against ascii ones.
michael@0 347 if (isASCII(oldCh) || isASCII(ch)) {
michael@0 348 if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
michael@0 349 continue;
michael@0 350 } else if (areCanonicallyEquivalent(oldCh, ch))
michael@0 351 continue;
michael@0 352
michael@0 353 input.uncheckInput(matchSize);
michael@0 354 return false;
michael@0 355 }
michael@0 356 } else {
michael@0 357 for (unsigned i = 0; i < matchSize; ++i) {
michael@0 358 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
michael@0 359 input.uncheckInput(matchSize);
michael@0 360 return false;
michael@0 361 }
michael@0 362 }
michael@0 363 }
michael@0 364
michael@0 365 return true;
michael@0 366 }
michael@0 367
michael@0 368 bool matchAssertionBOL(ByteTerm& term)
michael@0 369 {
michael@0 370 return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
michael@0 371 }
michael@0 372
michael@0 373 bool matchAssertionEOL(ByteTerm& term)
michael@0 374 {
michael@0 375 if (term.inputPosition)
michael@0 376 return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
michael@0 377
michael@0 378 return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
michael@0 379 }
michael@0 380
michael@0 381 bool matchAssertionWordBoundary(ByteTerm& term)
michael@0 382 {
michael@0 383 bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
michael@0 384 bool readIsWordchar;
michael@0 385 if (term.inputPosition)
michael@0 386 readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
michael@0 387 else
michael@0 388 readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
michael@0 389
michael@0 390 bool wordBoundary = prevIsWordchar != readIsWordchar;
michael@0 391 return term.invert() ? !wordBoundary : wordBoundary;
michael@0 392 }
michael@0 393
michael@0 394 bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
michael@0 395 {
michael@0 396 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
michael@0 397
michael@0 398 switch (term.atom.quantityType) {
michael@0 399 case QuantifierFixedCount:
michael@0 400 break;
michael@0 401
michael@0 402 case QuantifierGreedy:
michael@0 403 if (backTrack->matchAmount) {
michael@0 404 --backTrack->matchAmount;
michael@0 405 input.uncheckInput(1);
michael@0 406 return true;
michael@0 407 }
michael@0 408 break;
michael@0 409
michael@0 410 case QuantifierNonGreedy:
michael@0 411 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
michael@0 412 ++backTrack->matchAmount;
michael@0 413 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
michael@0 414 return true;
michael@0 415 }
michael@0 416 input.uncheckInput(backTrack->matchAmount);
michael@0 417 break;
michael@0 418 }
michael@0 419
michael@0 420 return false;
michael@0 421 }
michael@0 422
michael@0 423 bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
michael@0 424 {
michael@0 425 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
michael@0 426
michael@0 427 switch (term.atom.quantityType) {
michael@0 428 case QuantifierFixedCount:
michael@0 429 break;
michael@0 430
michael@0 431 case QuantifierGreedy:
michael@0 432 if (backTrack->matchAmount) {
michael@0 433 --backTrack->matchAmount;
michael@0 434 input.uncheckInput(1);
michael@0 435 return true;
michael@0 436 }
michael@0 437 break;
michael@0 438
michael@0 439 case QuantifierNonGreedy:
michael@0 440 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
michael@0 441 ++backTrack->matchAmount;
michael@0 442 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
michael@0 443 return true;
michael@0 444 }
michael@0 445 input.uncheckInput(backTrack->matchAmount);
michael@0 446 break;
michael@0 447 }
michael@0 448
michael@0 449 return false;
michael@0 450 }
michael@0 451
michael@0 452 bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
michael@0 453 {
michael@0 454 ASSERT(term.type == ByteTerm::TypeCharacterClass);
michael@0 455 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
michael@0 456
michael@0 457 switch (term.atom.quantityType) {
michael@0 458 case QuantifierFixedCount: {
michael@0 459 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
michael@0 460 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
michael@0 461 return false;
michael@0 462 }
michael@0 463 return true;
michael@0 464 }
michael@0 465
michael@0 466 case QuantifierGreedy: {
michael@0 467 unsigned matchAmount = 0;
michael@0 468 while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
michael@0 469 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
michael@0 470 input.uncheckInput(1);
michael@0 471 break;
michael@0 472 }
michael@0 473 ++matchAmount;
michael@0 474 }
michael@0 475 backTrack->matchAmount = matchAmount;
michael@0 476
michael@0 477 return true;
michael@0 478 }
michael@0 479
michael@0 480 case QuantifierNonGreedy:
michael@0 481 backTrack->matchAmount = 0;
michael@0 482 return true;
michael@0 483 }
michael@0 484
michael@0 485 ASSERT_NOT_REACHED();
michael@0 486 return false;
michael@0 487 }
michael@0 488
michael@0 489 bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
michael@0 490 {
michael@0 491 ASSERT(term.type == ByteTerm::TypeCharacterClass);
michael@0 492 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
michael@0 493
michael@0 494 switch (term.atom.quantityType) {
michael@0 495 case QuantifierFixedCount:
michael@0 496 break;
michael@0 497
michael@0 498 case QuantifierGreedy:
michael@0 499 if (backTrack->matchAmount) {
michael@0 500 --backTrack->matchAmount;
michael@0 501 input.uncheckInput(1);
michael@0 502 return true;
michael@0 503 }
michael@0 504 break;
michael@0 505
michael@0 506 case QuantifierNonGreedy:
michael@0 507 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
michael@0 508 ++backTrack->matchAmount;
michael@0 509 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
michael@0 510 return true;
michael@0 511 }
michael@0 512 input.uncheckInput(backTrack->matchAmount);
michael@0 513 break;
michael@0 514 }
michael@0 515
michael@0 516 return false;
michael@0 517 }
michael@0 518
michael@0 519 bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
michael@0 520 {
michael@0 521 ASSERT(term.type == ByteTerm::TypeBackReference);
michael@0 522 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
michael@0 523
michael@0 524 unsigned matchBegin = output[(term.atom.subpatternId << 1)];
michael@0 525 unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
michael@0 526
michael@0 527 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
michael@0 528 // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
michael@0 529 // Eg.: /(a\1)/
michael@0 530 if (matchEnd == offsetNoMatch)
michael@0 531 return true;
michael@0 532
michael@0 533 if (matchBegin == offsetNoMatch)
michael@0 534 return true;
michael@0 535
michael@0 536 ASSERT(matchBegin <= matchEnd);
michael@0 537
michael@0 538 if (matchBegin == matchEnd)
michael@0 539 return true;
michael@0 540
michael@0 541 switch (term.atom.quantityType) {
michael@0 542 case QuantifierFixedCount: {
michael@0 543 backTrack->begin = input.getPos();
michael@0 544 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
michael@0 545 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
michael@0 546 input.setPos(backTrack->begin);
michael@0 547 return false;
michael@0 548 }
michael@0 549 }
michael@0 550 return true;
michael@0 551 }
michael@0 552
michael@0 553 case QuantifierGreedy: {
michael@0 554 unsigned matchAmount = 0;
michael@0 555 while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
michael@0 556 ++matchAmount;
michael@0 557 backTrack->matchAmount = matchAmount;
michael@0 558 return true;
michael@0 559 }
michael@0 560
michael@0 561 case QuantifierNonGreedy:
michael@0 562 backTrack->begin = input.getPos();
michael@0 563 backTrack->matchAmount = 0;
michael@0 564 return true;
michael@0 565 }
michael@0 566
michael@0 567 ASSERT_NOT_REACHED();
michael@0 568 return false;
michael@0 569 }
michael@0 570
michael@0 571 bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
michael@0 572 {
michael@0 573 ASSERT(term.type == ByteTerm::TypeBackReference);
michael@0 574 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
michael@0 575
michael@0 576 unsigned matchBegin = output[(term.atom.subpatternId << 1)];
michael@0 577 unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
michael@0 578
michael@0 579 if (matchBegin == offsetNoMatch)
michael@0 580 return false;
michael@0 581
michael@0 582 ASSERT(matchBegin <= matchEnd);
michael@0 583
michael@0 584 if (matchBegin == matchEnd)
michael@0 585 return false;
michael@0 586
michael@0 587 switch (term.atom.quantityType) {
michael@0 588 case QuantifierFixedCount:
michael@0 589 // for quantityCount == 1, could rewind.
michael@0 590 input.setPos(backTrack->begin);
michael@0 591 break;
michael@0 592
michael@0 593 case QuantifierGreedy:
michael@0 594 if (backTrack->matchAmount) {
michael@0 595 --backTrack->matchAmount;
michael@0 596 input.rewind(matchEnd - matchBegin);
michael@0 597 return true;
michael@0 598 }
michael@0 599 break;
michael@0 600
michael@0 601 case QuantifierNonGreedy:
michael@0 602 if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
michael@0 603 ++backTrack->matchAmount;
michael@0 604 return true;
michael@0 605 }
michael@0 606 input.setPos(backTrack->begin);
michael@0 607 break;
michael@0 608 }
michael@0 609
michael@0 610 return false;
michael@0 611 }
michael@0 612
michael@0 613 void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
michael@0 614 {
michael@0 615 if (term.capture()) {
michael@0 616 unsigned subpatternId = term.atom.subpatternId;
michael@0 617 output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
michael@0 618 output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
michael@0 619 }
michael@0 620 }
michael@0 621 void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
michael@0 622 {
michael@0 623 unsigned firstSubpatternId = term.atom.subpatternId;
michael@0 624 unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
michael@0 625 context->restoreOutput(output, firstSubpatternId, count);
michael@0 626 }
michael@0 627 JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
michael@0 628 {
michael@0 629 while (backTrack->matchAmount) {
michael@0 630 ParenthesesDisjunctionContext* context = backTrack->lastContext;
michael@0 631
michael@0 632 JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
michael@0 633 if (result == JSRegExpMatch)
michael@0 634 return JSRegExpMatch;
michael@0 635
michael@0 636 resetMatches(term, context);
michael@0 637 popParenthesesDisjunctionContext(backTrack);
michael@0 638 freeParenthesesDisjunctionContext(context);
michael@0 639
michael@0 640 if (result != JSRegExpNoMatch)
michael@0 641 return result;
michael@0 642 }
michael@0 643
michael@0 644 return JSRegExpNoMatch;
michael@0 645 }
michael@0 646
michael@0 647 bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
michael@0 648 {
michael@0 649 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
michael@0 650 ASSERT(term.atom.quantityCount == 1);
michael@0 651
michael@0 652 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
michael@0 653
michael@0 654 switch (term.atom.quantityType) {
michael@0 655 case QuantifierGreedy: {
michael@0 656 // set this speculatively; if we get to the parens end this will be true.
michael@0 657 backTrack->begin = input.getPos();
michael@0 658 break;
michael@0 659 }
michael@0 660 case QuantifierNonGreedy: {
michael@0 661 backTrack->begin = notFound;
michael@0 662 context->term += term.atom.parenthesesWidth;
michael@0 663 return true;
michael@0 664 }
michael@0 665 case QuantifierFixedCount:
michael@0 666 break;
michael@0 667 }
michael@0 668
michael@0 669 if (term.capture()) {
michael@0 670 unsigned subpatternId = term.atom.subpatternId;
michael@0 671 output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
michael@0 672 }
michael@0 673
michael@0 674 return true;
michael@0 675 }
michael@0 676
michael@0 677 bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
michael@0 678 {
michael@0 679 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
michael@0 680 ASSERT(term.atom.quantityCount == 1);
michael@0 681
michael@0 682 if (term.capture()) {
michael@0 683 unsigned subpatternId = term.atom.subpatternId;
michael@0 684 output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
michael@0 685 }
michael@0 686
michael@0 687 if (term.atom.quantityType == QuantifierFixedCount)
michael@0 688 return true;
michael@0 689
michael@0 690 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
michael@0 691 return backTrack->begin != input.getPos();
michael@0 692 }
michael@0 693
michael@0 694 bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
michael@0 695 {
michael@0 696 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
michael@0 697 ASSERT(term.atom.quantityCount == 1);
michael@0 698
michael@0 699 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
michael@0 700
michael@0 701 if (term.capture()) {
michael@0 702 unsigned subpatternId = term.atom.subpatternId;
michael@0 703 output[(subpatternId << 1)] = offsetNoMatch;
michael@0 704 output[(subpatternId << 1) + 1] = offsetNoMatch;
michael@0 705 }
michael@0 706
michael@0 707 switch (term.atom.quantityType) {
michael@0 708 case QuantifierGreedy:
michael@0 709 // if we backtrack to this point, there is another chance - try matching nothing.
michael@0 710 ASSERT(backTrack->begin != notFound);
michael@0 711 backTrack->begin = notFound;
michael@0 712 context->term += term.atom.parenthesesWidth;
michael@0 713 return true;
michael@0 714 case QuantifierNonGreedy:
michael@0 715 ASSERT(backTrack->begin != notFound);
michael@0 716 case QuantifierFixedCount:
michael@0 717 break;
michael@0 718 }
michael@0 719
michael@0 720 return false;
michael@0 721 }
michael@0 722
michael@0 723 bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
michael@0 724 {
michael@0 725 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
michael@0 726 ASSERT(term.atom.quantityCount == 1);
michael@0 727
michael@0 728 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
michael@0 729
michael@0 730 switch (term.atom.quantityType) {
michael@0 731 case QuantifierGreedy:
michael@0 732 if (backTrack->begin == notFound) {
michael@0 733 context->term -= term.atom.parenthesesWidth;
michael@0 734 return false;
michael@0 735 }
michael@0 736 case QuantifierNonGreedy:
michael@0 737 if (backTrack->begin == notFound) {
michael@0 738 backTrack->begin = input.getPos();
michael@0 739 if (term.capture()) {
michael@0 740 // Technically this access to inputPosition should be accessing the begin term's
michael@0 741 // inputPosition, but for repeats other than fixed these values should be
michael@0 742 // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
michael@0 743 ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
michael@0 744
michael@0 745 // Disabled, see bug 808478
michael@0 746 #if 0
michael@0 747 ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
michael@0 748 #endif
michael@0 749 unsigned subpatternId = term.atom.subpatternId;
michael@0 750 output[subpatternId << 1] = input.getPos() + term.inputPosition;
michael@0 751 }
michael@0 752 context->term -= term.atom.parenthesesWidth;
michael@0 753 return true;
michael@0 754 }
michael@0 755 case QuantifierFixedCount:
michael@0 756 break;
michael@0 757 }
michael@0 758
michael@0 759 return false;
michael@0 760 }
michael@0 761
michael@0 762 bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
michael@0 763 {
michael@0 764 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
michael@0 765 ASSERT(term.atom.quantityType == QuantifierGreedy);
michael@0 766 ASSERT(term.atom.quantityCount == quantifyInfinite);
michael@0 767 ASSERT(!term.capture());
michael@0 768
michael@0 769 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
michael@0 770 backTrack->begin = input.getPos();
michael@0 771 return true;
michael@0 772 }
michael@0 773
michael@0 774 bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
michael@0 775 {
michael@0 776 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
michael@0 777
michael@0 778 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
michael@0 779 // Empty match is a failed match.
michael@0 780 if (backTrack->begin == input.getPos())
michael@0 781 return false;
michael@0 782
michael@0 783 // Successful match! Okay, what's next? - loop around and try to match moar!
michael@0 784 context->term -= (term.atom.parenthesesWidth + 1);
michael@0 785 return true;
michael@0 786 }
michael@0 787
michael@0 788 bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
michael@0 789 {
michael@0 790 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
michael@0 791 ASSERT(term.atom.quantityType == QuantifierGreedy);
michael@0 792 ASSERT(term.atom.quantityCount == quantifyInfinite);
michael@0 793 ASSERT(!term.capture());
michael@0 794
michael@0 795 // If we backtrack to this point, we have failed to match this iteration of the parens.
michael@0 796 // Since this is greedy / zero minimum a failed is also accepted as a match!
michael@0 797 context->term += term.atom.parenthesesWidth;
michael@0 798 return true;
michael@0 799 }
michael@0 800
michael@0 801 bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
michael@0 802 {
michael@0 803 // 'Terminal' parentheses are at the end of the regex, and as such a match past end
michael@0 804 // should always be returned as a successful match - we should never backtrack to here.
michael@0 805 ASSERT_NOT_REACHED();
michael@0 806 return false;
michael@0 807 }
michael@0 808
michael@0 809 bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
michael@0 810 {
michael@0 811 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
michael@0 812 ASSERT(term.atom.quantityCount == 1);
michael@0 813
michael@0 814 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
michael@0 815
michael@0 816 backTrack->begin = input.getPos();
michael@0 817 return true;
michael@0 818 }
michael@0 819
michael@0 820 bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
michael@0 821 {
michael@0 822 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
michael@0 823 ASSERT(term.atom.quantityCount == 1);
michael@0 824
michael@0 825 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
michael@0 826
michael@0 827 input.setPos(backTrack->begin);
michael@0 828
michael@0 829 // We've reached the end of the parens; if they are inverted, this is failure.
michael@0 830 if (term.invert()) {
michael@0 831 context->term -= term.atom.parenthesesWidth;
michael@0 832 return false;
michael@0 833 }
michael@0 834
michael@0 835 return true;
michael@0 836 }
michael@0 837
michael@0 838 bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
michael@0 839 {
michael@0 840 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
michael@0 841 ASSERT(term.atom.quantityCount == 1);
michael@0 842
michael@0 843 // We've failed to match parens; if they are inverted, this is win!
michael@0 844 if (term.invert()) {
michael@0 845 context->term += term.atom.parenthesesWidth;
michael@0 846 return true;
michael@0 847 }
michael@0 848
michael@0 849 return false;
michael@0 850 }
michael@0 851
michael@0 852 bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
michael@0 853 {
michael@0 854 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
michael@0 855 ASSERT(term.atom.quantityCount == 1);
michael@0 856
michael@0 857 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
michael@0 858
michael@0 859 input.setPos(backTrack->begin);
michael@0 860
michael@0 861 context->term -= term.atom.parenthesesWidth;
michael@0 862 return false;
michael@0 863 }
michael@0 864
michael@0 865 JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
michael@0 866 {
michael@0 867 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
michael@0 868
michael@0 869 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
michael@0 870 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
michael@0 871
michael@0 872 backTrack->matchAmount = 0;
michael@0 873 backTrack->lastContext = 0;
michael@0 874
michael@0 875 switch (term.atom.quantityType) {
michael@0 876 case QuantifierFixedCount: {
michael@0 877 // While we haven't yet reached our fixed limit,
michael@0 878 while (backTrack->matchAmount < term.atom.quantityCount) {
michael@0 879 // Try to do a match, and it it succeeds, add it to the list.
michael@0 880 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
michael@0 881 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
michael@0 882 if (result == JSRegExpMatch)
michael@0 883 appendParenthesesDisjunctionContext(backTrack, context);
michael@0 884 else {
michael@0 885 // The match failed; try to find an alternate point to carry on from.
michael@0 886 resetMatches(term, context);
michael@0 887 freeParenthesesDisjunctionContext(context);
michael@0 888
michael@0 889 if (result != JSRegExpNoMatch)
michael@0 890 return result;
michael@0 891 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
michael@0 892 if (backtrackResult != JSRegExpMatch)
michael@0 893 return backtrackResult;
michael@0 894 }
michael@0 895 }
michael@0 896
michael@0 897 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
michael@0 898 ParenthesesDisjunctionContext* context = backTrack->lastContext;
michael@0 899 recordParenthesesMatch(term, context);
michael@0 900 return JSRegExpMatch;
michael@0 901 }
michael@0 902
michael@0 903 case QuantifierGreedy: {
michael@0 904 while (backTrack->matchAmount < term.atom.quantityCount) {
michael@0 905 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
michael@0 906 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
michael@0 907 if (result == JSRegExpMatch)
michael@0 908 appendParenthesesDisjunctionContext(backTrack, context);
michael@0 909 else {
michael@0 910 resetMatches(term, context);
michael@0 911 freeParenthesesDisjunctionContext(context);
michael@0 912
michael@0 913 if (result != JSRegExpNoMatch)
michael@0 914 return result;
michael@0 915
michael@0 916 break;
michael@0 917 }
michael@0 918 }
michael@0 919
michael@0 920 if (backTrack->matchAmount) {
michael@0 921 ParenthesesDisjunctionContext* context = backTrack->lastContext;
michael@0 922 recordParenthesesMatch(term, context);
michael@0 923 }
michael@0 924 return JSRegExpMatch;
michael@0 925 }
michael@0 926
michael@0 927 case QuantifierNonGreedy:
michael@0 928 return JSRegExpMatch;
michael@0 929 }
michael@0 930
michael@0 931 ASSERT_NOT_REACHED();
michael@0 932 return JSRegExpErrorNoMatch;
michael@0 933 }
michael@0 934
michael@0 935 // Rules for backtracking differ depending on whether this is greedy or non-greedy.
michael@0 936 //
michael@0 937 // Greedy matches never should try just adding more - you should already have done
michael@0 938 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
michael@0 939 // you backtrack an item off the list needs checking, since we'll never have matched
michael@0 940 // the one less case. Tracking forwards, still add as much as possible.
michael@0 941 //
michael@0 942 // Non-greedy, we've already done the one less case, so don't match on popping.
michael@0 943 // We haven't done the one more case, so always try to add that.
michael@0 944 //
michael@0 945 JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
michael@0 946 {
michael@0 947 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
michael@0 948
michael@0 949 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
michael@0 950 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
michael@0 951
michael@0 952 switch (term.atom.quantityType) {
michael@0 953 case QuantifierFixedCount: {
michael@0 954 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
michael@0 955
michael@0 956 ParenthesesDisjunctionContext* context = 0;
michael@0 957 JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
michael@0 958
michael@0 959 if (result != JSRegExpMatch)
michael@0 960 return result;
michael@0 961
michael@0 962 // While we haven't yet reached our fixed limit,
michael@0 963 while (backTrack->matchAmount < term.atom.quantityCount) {
michael@0 964 // Try to do a match, and it it succeeds, add it to the list.
michael@0 965 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
michael@0 966 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
michael@0 967
michael@0 968 if (result == JSRegExpMatch)
michael@0 969 appendParenthesesDisjunctionContext(backTrack, context);
michael@0 970 else {
michael@0 971 // The match failed; try to find an alternate point to carry on from.
michael@0 972 resetMatches(term, context);
michael@0 973 freeParenthesesDisjunctionContext(context);
michael@0 974
michael@0 975 if (result != JSRegExpNoMatch)
michael@0 976 return result;
michael@0 977 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
michael@0 978 if (backtrackResult != JSRegExpMatch)
michael@0 979 return backtrackResult;
michael@0 980 }
michael@0 981 }
michael@0 982
michael@0 983 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
michael@0 984 context = backTrack->lastContext;
michael@0 985 recordParenthesesMatch(term, context);
michael@0 986 return JSRegExpMatch;
michael@0 987 }
michael@0 988
michael@0 989 case QuantifierGreedy: {
michael@0 990 if (!backTrack->matchAmount)
michael@0 991 return JSRegExpNoMatch;
michael@0 992
michael@0 993 ParenthesesDisjunctionContext* context = backTrack->lastContext;
michael@0 994 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
michael@0 995 if (result == JSRegExpMatch) {
michael@0 996 while (backTrack->matchAmount < term.atom.quantityCount) {
michael@0 997 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
michael@0 998 JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
michael@0 999 if (parenthesesResult == JSRegExpMatch)
michael@0 1000 appendParenthesesDisjunctionContext(backTrack, context);
michael@0 1001 else {
michael@0 1002 resetMatches(term, context);
michael@0 1003 freeParenthesesDisjunctionContext(context);
michael@0 1004
michael@0 1005 if (parenthesesResult != JSRegExpNoMatch)
michael@0 1006 return parenthesesResult;
michael@0 1007
michael@0 1008 break;
michael@0 1009 }
michael@0 1010 }
michael@0 1011 } else {
michael@0 1012 // Avoid a topcrash before it occurs.
michael@0 1013 if (!backTrack->lastContext) {
michael@0 1014 ASSERT(!"Tripped Bug 856796!");
michael@0 1015 return JSRegExpErrorInternal;
michael@0 1016 }
michael@0 1017
michael@0 1018 resetMatches(term, context);
michael@0 1019 popParenthesesDisjunctionContext(backTrack);
michael@0 1020 freeParenthesesDisjunctionContext(context);
michael@0 1021
michael@0 1022 if (result != JSRegExpNoMatch)
michael@0 1023 return result;
michael@0 1024 }
michael@0 1025
michael@0 1026 if (backTrack->matchAmount) {
michael@0 1027 ParenthesesDisjunctionContext* context = backTrack->lastContext;
michael@0 1028 recordParenthesesMatch(term, context);
michael@0 1029 }
michael@0 1030 return JSRegExpMatch;
michael@0 1031 }
michael@0 1032
michael@0 1033 case QuantifierNonGreedy: {
michael@0 1034 // If we've not reached the limit, try to add one more match.
michael@0 1035 if (backTrack->matchAmount < term.atom.quantityCount) {
michael@0 1036 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
michael@0 1037 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
michael@0 1038 if (result == JSRegExpMatch) {
michael@0 1039 appendParenthesesDisjunctionContext(backTrack, context);
michael@0 1040 recordParenthesesMatch(term, context);
michael@0 1041 return JSRegExpMatch;
michael@0 1042 }
michael@0 1043
michael@0 1044 resetMatches(term, context);
michael@0 1045 freeParenthesesDisjunctionContext(context);
michael@0 1046
michael@0 1047 if (result != JSRegExpNoMatch)
michael@0 1048 return result;
michael@0 1049 }
michael@0 1050
michael@0 1051 // Nope - okay backtrack looking for an alternative.
michael@0 1052 while (backTrack->matchAmount) {
michael@0 1053 ParenthesesDisjunctionContext* context = backTrack->lastContext;
michael@0 1054 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
michael@0 1055 if (result == JSRegExpMatch) {
michael@0 1056 // successful backtrack! we're back in the game!
michael@0 1057 if (backTrack->matchAmount) {
michael@0 1058 context = backTrack->lastContext;
michael@0 1059 recordParenthesesMatch(term, context);
michael@0 1060 }
michael@0 1061 return JSRegExpMatch;
michael@0 1062 }
michael@0 1063
michael@0 1064 // Avoid a topcrash before it occurs.
michael@0 1065 if (!backTrack->lastContext) {
michael@0 1066 ASSERT(!"Tripped Bug 856796!");
michael@0 1067 return JSRegExpErrorInternal;
michael@0 1068 }
michael@0 1069
michael@0 1070 // pop a match off the stack
michael@0 1071 resetMatches(term, context);
michael@0 1072 popParenthesesDisjunctionContext(backTrack);
michael@0 1073 freeParenthesesDisjunctionContext(context);
michael@0 1074
michael@0 1075 if (result != JSRegExpNoMatch)
michael@0 1076 return result;
michael@0 1077 }
michael@0 1078
michael@0 1079 return JSRegExpNoMatch;
michael@0 1080 }
michael@0 1081 }
michael@0 1082
michael@0 1083 ASSERT_NOT_REACHED();
michael@0 1084 return JSRegExpErrorNoMatch;
michael@0 1085 }
michael@0 1086
michael@0 1087 bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
michael@0 1088 {
michael@0 1089 UNUSED_PARAM(term);
michael@0 1090 unsigned matchBegin = context->matchBegin;
michael@0 1091
michael@0 1092 if (matchBegin) {
michael@0 1093 for (matchBegin--; true; matchBegin--) {
michael@0 1094 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
michael@0 1095 ++matchBegin;
michael@0 1096 break;
michael@0 1097 }
michael@0 1098
michael@0 1099 if (!matchBegin)
michael@0 1100 break;
michael@0 1101 }
michael@0 1102 }
michael@0 1103
michael@0 1104 unsigned matchEnd = input.getPos();
michael@0 1105
michael@0 1106 for (; (matchEnd != input.end())
michael@0 1107 && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
michael@0 1108
michael@0 1109 if (((matchBegin && term.anchors.m_bol)
michael@0 1110 || ((matchEnd != input.end()) && term.anchors.m_eol))
michael@0 1111 && !pattern->m_multiline)
michael@0 1112 return false;
michael@0 1113
michael@0 1114 context->matchBegin = matchBegin;
michael@0 1115 context->matchEnd = matchEnd;
michael@0 1116 return true;
michael@0 1117 }
michael@0 1118
michael@0 1119 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
michael@0 1120 #define BACKTRACK() { --context->term; goto backtrack; }
michael@0 1121 #define currentTerm() (disjunction->terms[context->term])
michael@0 1122 JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
michael@0 1123 {
michael@0 1124 if (!--remainingMatchCount)
michael@0 1125 return JSRegExpErrorHitLimit;
michael@0 1126
michael@0 1127 if (btrack)
michael@0 1128 BACKTRACK();
michael@0 1129
michael@0 1130 context->matchBegin = input.getPos();
michael@0 1131 context->term = 0;
michael@0 1132
michael@0 1133 matchAgain:
michael@0 1134 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
michael@0 1135
michael@0 1136 // Prevent jank resulting from getting stuck in Yarr for a long time.
michael@0 1137 if (!CheckForInterrupt(this->cx))
michael@0 1138 return JSRegExpErrorInternal;
michael@0 1139
michael@0 1140 switch (currentTerm().type) {
michael@0 1141 case ByteTerm::TypeSubpatternBegin:
michael@0 1142 MATCH_NEXT();
michael@0 1143 case ByteTerm::TypeSubpatternEnd:
michael@0 1144 context->matchEnd = input.getPos();
michael@0 1145 return JSRegExpMatch;
michael@0 1146
michael@0 1147 case ByteTerm::TypeBodyAlternativeBegin:
michael@0 1148 MATCH_NEXT();
michael@0 1149 case ByteTerm::TypeBodyAlternativeDisjunction:
michael@0 1150 case ByteTerm::TypeBodyAlternativeEnd:
michael@0 1151 context->matchEnd = input.getPos();
michael@0 1152 return JSRegExpMatch;
michael@0 1153
michael@0 1154 case ByteTerm::TypeAlternativeBegin:
michael@0 1155 MATCH_NEXT();
michael@0 1156 case ByteTerm::TypeAlternativeDisjunction:
michael@0 1157 case ByteTerm::TypeAlternativeEnd: {
michael@0 1158 int offset = currentTerm().alternative.end;
michael@0 1159 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
michael@0 1160 backTrack->offset = offset;
michael@0 1161 context->term += offset;
michael@0 1162 MATCH_NEXT();
michael@0 1163 }
michael@0 1164
michael@0 1165 case ByteTerm::TypeAssertionBOL:
michael@0 1166 if (matchAssertionBOL(currentTerm()))
michael@0 1167 MATCH_NEXT();
michael@0 1168 BACKTRACK();
michael@0 1169 case ByteTerm::TypeAssertionEOL:
michael@0 1170 if (matchAssertionEOL(currentTerm()))
michael@0 1171 MATCH_NEXT();
michael@0 1172 BACKTRACK();
michael@0 1173 case ByteTerm::TypeAssertionWordBoundary:
michael@0 1174 if (matchAssertionWordBoundary(currentTerm()))
michael@0 1175 MATCH_NEXT();
michael@0 1176 BACKTRACK();
michael@0 1177
michael@0 1178 case ByteTerm::TypePatternCharacterOnce:
michael@0 1179 case ByteTerm::TypePatternCharacterFixed: {
michael@0 1180 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
michael@0 1181 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
michael@0 1182 BACKTRACK();
michael@0 1183 }
michael@0 1184 MATCH_NEXT();
michael@0 1185 }
michael@0 1186 case ByteTerm::TypePatternCharacterGreedy: {
michael@0 1187 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
michael@0 1188 unsigned matchAmount = 0;
michael@0 1189 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
michael@0 1190 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
michael@0 1191 input.uncheckInput(1);
michael@0 1192 break;
michael@0 1193 }
michael@0 1194 ++matchAmount;
michael@0 1195 }
michael@0 1196 backTrack->matchAmount = matchAmount;
michael@0 1197
michael@0 1198 MATCH_NEXT();
michael@0 1199 }
michael@0 1200 case ByteTerm::TypePatternCharacterNonGreedy: {
michael@0 1201 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
michael@0 1202 backTrack->matchAmount = 0;
michael@0 1203 MATCH_NEXT();
michael@0 1204 }
michael@0 1205
michael@0 1206 case ByteTerm::TypePatternCasedCharacterOnce:
michael@0 1207 case ByteTerm::TypePatternCasedCharacterFixed: {
michael@0 1208 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
michael@0 1209 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
michael@0 1210 BACKTRACK();
michael@0 1211 }
michael@0 1212 MATCH_NEXT();
michael@0 1213 }
michael@0 1214 case ByteTerm::TypePatternCasedCharacterGreedy: {
michael@0 1215 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
michael@0 1216 unsigned matchAmount = 0;
michael@0 1217 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
michael@0 1218 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
michael@0 1219 input.uncheckInput(1);
michael@0 1220 break;
michael@0 1221 }
michael@0 1222 ++matchAmount;
michael@0 1223 }
michael@0 1224 backTrack->matchAmount = matchAmount;
michael@0 1225
michael@0 1226 MATCH_NEXT();
michael@0 1227 }
michael@0 1228 case ByteTerm::TypePatternCasedCharacterNonGreedy: {
michael@0 1229 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
michael@0 1230 backTrack->matchAmount = 0;
michael@0 1231 MATCH_NEXT();
michael@0 1232 }
michael@0 1233
michael@0 1234 case ByteTerm::TypeCharacterClass:
michael@0 1235 if (matchCharacterClass(currentTerm(), context))
michael@0 1236 MATCH_NEXT();
michael@0 1237 BACKTRACK();
michael@0 1238 case ByteTerm::TypeBackReference:
michael@0 1239 if (matchBackReference(currentTerm(), context))
michael@0 1240 MATCH_NEXT();
michael@0 1241 BACKTRACK();
michael@0 1242 case ByteTerm::TypeParenthesesSubpattern: {
michael@0 1243 JSRegExpResult result = matchParentheses(currentTerm(), context);
michael@0 1244
michael@0 1245 if (result == JSRegExpMatch) {
michael@0 1246 MATCH_NEXT();
michael@0 1247 } else if (result != JSRegExpNoMatch)
michael@0 1248 return result;
michael@0 1249
michael@0 1250 BACKTRACK();
michael@0 1251 }
michael@0 1252 case ByteTerm::TypeParenthesesSubpatternOnceBegin:
michael@0 1253 if (matchParenthesesOnceBegin(currentTerm(), context))
michael@0 1254 MATCH_NEXT();
michael@0 1255 BACKTRACK();
michael@0 1256 case ByteTerm::TypeParenthesesSubpatternOnceEnd:
michael@0 1257 if (matchParenthesesOnceEnd(currentTerm(), context))
michael@0 1258 MATCH_NEXT();
michael@0 1259 BACKTRACK();
michael@0 1260 case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
michael@0 1261 if (matchParenthesesTerminalBegin(currentTerm(), context))
michael@0 1262 MATCH_NEXT();
michael@0 1263 BACKTRACK();
michael@0 1264 case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
michael@0 1265 if (matchParenthesesTerminalEnd(currentTerm(), context))
michael@0 1266 MATCH_NEXT();
michael@0 1267 BACKTRACK();
michael@0 1268 case ByteTerm::TypeParentheticalAssertionBegin:
michael@0 1269 if (matchParentheticalAssertionBegin(currentTerm(), context))
michael@0 1270 MATCH_NEXT();
michael@0 1271 BACKTRACK();
michael@0 1272 case ByteTerm::TypeParentheticalAssertionEnd:
michael@0 1273 if (matchParentheticalAssertionEnd(currentTerm(), context))
michael@0 1274 MATCH_NEXT();
michael@0 1275 BACKTRACK();
michael@0 1276
michael@0 1277 case ByteTerm::TypeCheckInput:
michael@0 1278 if (input.checkInput(currentTerm().checkInputCount))
michael@0 1279 MATCH_NEXT();
michael@0 1280 BACKTRACK();
michael@0 1281
michael@0 1282 case ByteTerm::TypeUncheckInput:
michael@0 1283 input.uncheckInput(currentTerm().checkInputCount);
michael@0 1284 MATCH_NEXT();
michael@0 1285
michael@0 1286 case ByteTerm::TypeDotStarEnclosure:
michael@0 1287 if (matchDotStarEnclosure(currentTerm(), context))
michael@0 1288 return JSRegExpMatch;
michael@0 1289 BACKTRACK();
michael@0 1290 }
michael@0 1291
michael@0 1292 // We should never fall-through to here.
michael@0 1293 ASSERT_NOT_REACHED();
michael@0 1294
michael@0 1295 backtrack:
michael@0 1296 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
michael@0 1297
michael@0 1298 // Prevent jank resulting from getting stuck in Yarr for a long time.
michael@0 1299 if (!CheckForInterrupt(this->cx))
michael@0 1300 return JSRegExpErrorInternal;
michael@0 1301
michael@0 1302 switch (currentTerm().type) {
michael@0 1303 case ByteTerm::TypeSubpatternBegin:
michael@0 1304 return JSRegExpNoMatch;
michael@0 1305 case ByteTerm::TypeSubpatternEnd:
michael@0 1306 ASSERT_NOT_REACHED();
michael@0 1307
michael@0 1308 case ByteTerm::TypeBodyAlternativeBegin:
michael@0 1309 case ByteTerm::TypeBodyAlternativeDisjunction: {
michael@0 1310 int offset = currentTerm().alternative.next;
michael@0 1311 context->term += offset;
michael@0 1312 if (offset > 0)
michael@0 1313 MATCH_NEXT();
michael@0 1314
michael@0 1315 if (input.atEnd())
michael@0 1316 return JSRegExpNoMatch;
michael@0 1317
michael@0 1318 input.next();
michael@0 1319
michael@0 1320 context->matchBegin = input.getPos();
michael@0 1321
michael@0 1322 if (currentTerm().alternative.onceThrough)
michael@0 1323 context->term += currentTerm().alternative.next;
michael@0 1324
michael@0 1325 MATCH_NEXT();
michael@0 1326 }
michael@0 1327 case ByteTerm::TypeBodyAlternativeEnd:
michael@0 1328 ASSERT_NOT_REACHED();
michael@0 1329
michael@0 1330 case ByteTerm::TypeAlternativeBegin:
michael@0 1331 case ByteTerm::TypeAlternativeDisjunction: {
michael@0 1332 int offset = currentTerm().alternative.next;
michael@0 1333 context->term += offset;
michael@0 1334 if (offset > 0)
michael@0 1335 MATCH_NEXT();
michael@0 1336 BACKTRACK();
michael@0 1337 }
michael@0 1338 case ByteTerm::TypeAlternativeEnd: {
michael@0 1339 // We should never backtrack back into an alternative of the main body of the regex.
michael@0 1340 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
michael@0 1341 unsigned offset = backTrack->offset;
michael@0 1342 context->term -= offset;
michael@0 1343 BACKTRACK();
michael@0 1344 }
michael@0 1345
michael@0 1346 case ByteTerm::TypeAssertionBOL:
michael@0 1347 case ByteTerm::TypeAssertionEOL:
michael@0 1348 case ByteTerm::TypeAssertionWordBoundary:
michael@0 1349 BACKTRACK();
michael@0 1350
michael@0 1351 case ByteTerm::TypePatternCharacterOnce:
michael@0 1352 case ByteTerm::TypePatternCharacterFixed:
michael@0 1353 case ByteTerm::TypePatternCharacterGreedy:
michael@0 1354 case ByteTerm::TypePatternCharacterNonGreedy:
michael@0 1355 if (backtrackPatternCharacter(currentTerm(), context))
michael@0 1356 MATCH_NEXT();
michael@0 1357 BACKTRACK();
michael@0 1358 case ByteTerm::TypePatternCasedCharacterOnce:
michael@0 1359 case ByteTerm::TypePatternCasedCharacterFixed:
michael@0 1360 case ByteTerm::TypePatternCasedCharacterGreedy:
michael@0 1361 case ByteTerm::TypePatternCasedCharacterNonGreedy:
michael@0 1362 if (backtrackPatternCasedCharacter(currentTerm(), context))
michael@0 1363 MATCH_NEXT();
michael@0 1364 BACKTRACK();
michael@0 1365 case ByteTerm::TypeCharacterClass:
michael@0 1366 if (backtrackCharacterClass(currentTerm(), context))
michael@0 1367 MATCH_NEXT();
michael@0 1368 BACKTRACK();
michael@0 1369 case ByteTerm::TypeBackReference:
michael@0 1370 if (backtrackBackReference(currentTerm(), context))
michael@0 1371 MATCH_NEXT();
michael@0 1372 BACKTRACK();
michael@0 1373 case ByteTerm::TypeParenthesesSubpattern: {
michael@0 1374 JSRegExpResult result = backtrackParentheses(currentTerm(), context);
michael@0 1375
michael@0 1376 if (result == JSRegExpMatch) {
michael@0 1377 MATCH_NEXT();
michael@0 1378 } else if (result != JSRegExpNoMatch)
michael@0 1379 return result;
michael@0 1380
michael@0 1381 BACKTRACK();
michael@0 1382 }
michael@0 1383 case ByteTerm::TypeParenthesesSubpatternOnceBegin:
michael@0 1384 if (backtrackParenthesesOnceBegin(currentTerm(), context))
michael@0 1385 MATCH_NEXT();
michael@0 1386 BACKTRACK();
michael@0 1387 case ByteTerm::TypeParenthesesSubpatternOnceEnd:
michael@0 1388 if (backtrackParenthesesOnceEnd(currentTerm(), context))
michael@0 1389 MATCH_NEXT();
michael@0 1390 BACKTRACK();
michael@0 1391 case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
michael@0 1392 if (backtrackParenthesesTerminalBegin(currentTerm(), context))
michael@0 1393 MATCH_NEXT();
michael@0 1394 BACKTRACK();
michael@0 1395 case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
michael@0 1396 if (backtrackParenthesesTerminalEnd(currentTerm(), context))
michael@0 1397 MATCH_NEXT();
michael@0 1398 BACKTRACK();
michael@0 1399 case ByteTerm::TypeParentheticalAssertionBegin:
michael@0 1400 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
michael@0 1401 MATCH_NEXT();
michael@0 1402 BACKTRACK();
michael@0 1403 case ByteTerm::TypeParentheticalAssertionEnd:
michael@0 1404 if (backtrackParentheticalAssertionEnd(currentTerm(), context))
michael@0 1405 MATCH_NEXT();
michael@0 1406 BACKTRACK();
michael@0 1407
michael@0 1408 case ByteTerm::TypeCheckInput:
michael@0 1409 input.uncheckInput(currentTerm().checkInputCount);
michael@0 1410 BACKTRACK();
michael@0 1411
michael@0 1412 case ByteTerm::TypeUncheckInput:
michael@0 1413 input.checkInput(currentTerm().checkInputCount);
michael@0 1414 BACKTRACK();
michael@0 1415
michael@0 1416 case ByteTerm::TypeDotStarEnclosure:
michael@0 1417 ASSERT_NOT_REACHED();
michael@0 1418 }
michael@0 1419
michael@0 1420 ASSERT_NOT_REACHED();
michael@0 1421 return JSRegExpErrorNoMatch;
michael@0 1422 }
michael@0 1423
michael@0 1424 JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
michael@0 1425 {
michael@0 1426 JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
michael@0 1427
michael@0 1428 if (result == JSRegExpMatch) {
michael@0 1429 while (context->matchBegin == context->matchEnd) {
michael@0 1430 result = matchDisjunction(disjunction, context, true);
michael@0 1431 if (result != JSRegExpMatch)
michael@0 1432 return result;
michael@0 1433 }
michael@0 1434 return JSRegExpMatch;
michael@0 1435 }
michael@0 1436
michael@0 1437 return result;
michael@0 1438 }
michael@0 1439
michael@0 1440 unsigned interpret()
michael@0 1441 {
michael@0 1442 if (!input.isAvailableInput(0))
michael@0 1443 return offsetNoMatch;
michael@0 1444
michael@0 1445 for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
michael@0 1446 output[i << 1] = offsetNoMatch;
michael@0 1447
michael@0 1448 allocatorPool = pattern->m_allocator->startAllocator();
michael@0 1449 if (!allocatorPool)
michael@0 1450 CRASH();
michael@0 1451
michael@0 1452 DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
michael@0 1453
michael@0 1454 JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
michael@0 1455 if (result == JSRegExpMatch) {
michael@0 1456 output[0] = context->matchBegin;
michael@0 1457 output[1] = context->matchEnd;
michael@0 1458 }
michael@0 1459
michael@0 1460 freeDisjunctionContext(context);
michael@0 1461
michael@0 1462 pattern->m_allocator->stopAllocator();
michael@0 1463
michael@0 1464 ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
michael@0 1465 return output[0];
michael@0 1466 }
michael@0 1467
michael@0 1468 Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
michael@0 1469 : cx(cx)
michael@0 1470 , pattern(pattern)
michael@0 1471 , output(output)
michael@0 1472 , input(input, start, length)
michael@0 1473 , allocatorPool(0)
michael@0 1474 , remainingMatchCount(matchLimit)
michael@0 1475 {
michael@0 1476 }
michael@0 1477
michael@0 1478 private:
michael@0 1479 JSContext *cx;
michael@0 1480 BytecodePattern* pattern;
michael@0 1481 unsigned* output;
michael@0 1482 InputStream input;
michael@0 1483 BumpPointerPool* allocatorPool;
michael@0 1484 unsigned remainingMatchCount;
michael@0 1485 };
michael@0 1486
michael@0 1487
michael@0 1488
michael@0 1489 class ByteCompiler {
michael@0 1490 struct ParenthesesStackEntry {
michael@0 1491 unsigned beginTerm;
michael@0 1492 unsigned savedAlternativeIndex;
michael@0 1493
michael@0 1494 // For js::Vector. Does not create a valid object.
michael@0 1495 ParenthesesStackEntry() { }
michael@0 1496
michael@0 1497 ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
michael@0 1498 : beginTerm(beginTerm)
michael@0 1499 , savedAlternativeIndex(savedAlternativeIndex)
michael@0 1500 {
michael@0 1501 }
michael@0 1502 };
michael@0 1503
michael@0 1504 public:
michael@0 1505 ByteCompiler(YarrPattern& pattern)
michael@0 1506 : m_pattern(pattern)
michael@0 1507 {
michael@0 1508 m_currentAlternativeIndex = 0;
michael@0 1509 }
michael@0 1510
michael@0 1511 PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
michael@0 1512 {
michael@0 1513 regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
michael@0 1514 emitDisjunction(m_pattern.m_body);
michael@0 1515 regexEnd();
michael@0 1516
michael@0 1517 return adoptPtr(newOrCrash<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator));
michael@0 1518 }
michael@0 1519
michael@0 1520 void checkInput(unsigned count)
michael@0 1521 {
michael@0 1522 m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
michael@0 1523 }
michael@0 1524
michael@0 1525 void uncheckInput(unsigned count)
michael@0 1526 {
michael@0 1527 m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
michael@0 1528 }
michael@0 1529
michael@0 1530 void assertionBOL(unsigned inputPosition)
michael@0 1531 {
michael@0 1532 m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
michael@0 1533 }
michael@0 1534
michael@0 1535 void assertionEOL(unsigned inputPosition)
michael@0 1536 {
michael@0 1537 m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
michael@0 1538 }
michael@0 1539
michael@0 1540 void assertionWordBoundary(bool invert, unsigned inputPosition)
michael@0 1541 {
michael@0 1542 m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
michael@0 1543 }
michael@0 1544
michael@0 1545 void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
michael@0 1546 {
michael@0 1547 if (m_pattern.m_ignoreCase) {
michael@0 1548 UChar lo = Unicode::toLower(ch);
michael@0 1549 UChar hi = Unicode::toUpper(ch);
michael@0 1550
michael@0 1551 if (lo != hi) {
michael@0 1552 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
michael@0 1553 return;
michael@0 1554 }
michael@0 1555 }
michael@0 1556
michael@0 1557 m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
michael@0 1558 }
michael@0 1559
michael@0 1560 void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
michael@0 1561 {
michael@0 1562 m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
michael@0 1563
michael@0 1564 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1565 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
michael@0 1566 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
michael@0 1567 }
michael@0 1568
michael@0 1569 void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
michael@0 1570 {
michael@0 1571 ASSERT(subpatternId);
michael@0 1572
michael@0 1573 m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
michael@0 1574
michael@0 1575 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1576 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
michael@0 1577 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
michael@0 1578 }
michael@0 1579
michael@0 1580 void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
michael@0 1581 {
michael@0 1582 int beginTerm = m_bodyDisjunction->terms.size();
michael@0 1583
michael@0 1584 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
michael@0 1585 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
michael@0 1586 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
michael@0 1587 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
michael@0 1588
michael@0 1589 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
michael@0 1590 m_currentAlternativeIndex = beginTerm + 1;
michael@0 1591 }
michael@0 1592
michael@0 1593 void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
michael@0 1594 {
michael@0 1595 int beginTerm = m_bodyDisjunction->terms.size();
michael@0 1596
michael@0 1597 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
michael@0 1598 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
michael@0 1599 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
michael@0 1600 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
michael@0 1601
michael@0 1602 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
michael@0 1603 m_currentAlternativeIndex = beginTerm + 1;
michael@0 1604 }
michael@0 1605
michael@0 1606 void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
michael@0 1607 {
michael@0 1608 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
michael@0 1609 // then fix this up at the end! - simplifying this should make it much clearer.
michael@0 1610 // https://bugs.webkit.org/show_bug.cgi?id=50136
michael@0 1611
michael@0 1612 int beginTerm = m_bodyDisjunction->terms.size();
michael@0 1613
michael@0 1614 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
michael@0 1615 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
michael@0 1616 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
michael@0 1617 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
michael@0 1618
michael@0 1619 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
michael@0 1620 m_currentAlternativeIndex = beginTerm + 1;
michael@0 1621 }
michael@0 1622
michael@0 1623 void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
michael@0 1624 {
michael@0 1625 int beginTerm = m_bodyDisjunction->terms.size();
michael@0 1626
michael@0 1627 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
michael@0 1628 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
michael@0 1629 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
michael@0 1630 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
michael@0 1631
michael@0 1632 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
michael@0 1633 m_currentAlternativeIndex = beginTerm + 1;
michael@0 1634 }
michael@0 1635
michael@0 1636 void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
michael@0 1637 {
michael@0 1638 unsigned beginTerm = popParenthesesStack();
michael@0 1639 closeAlternative(beginTerm + 1);
michael@0 1640 unsigned endTerm = m_bodyDisjunction->terms.size();
michael@0 1641
michael@0 1642 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
michael@0 1643
michael@0 1644 bool invert = m_bodyDisjunction->terms[beginTerm].invert();
michael@0 1645 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
michael@0 1646
michael@0 1647 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
michael@0 1648 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
michael@0 1649 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
michael@0 1650 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
michael@0 1651
michael@0 1652 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1653 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
michael@0 1654 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1655 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
michael@0 1656 }
michael@0 1657
michael@0 1658 void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
michael@0 1659 {
michael@0 1660 m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
michael@0 1661 }
michael@0 1662
michael@0 1663 unsigned popParenthesesStack()
michael@0 1664 {
michael@0 1665 ASSERT(m_parenthesesStack.size());
michael@0 1666 int stackEnd = m_parenthesesStack.size() - 1;
michael@0 1667 unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
michael@0 1668 m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
michael@0 1669 m_parenthesesStack.shrink(stackEnd);
michael@0 1670
michael@0 1671 ASSERT(beginTerm < m_bodyDisjunction->terms.size());
michael@0 1672 ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
michael@0 1673
michael@0 1674 return beginTerm;
michael@0 1675 }
michael@0 1676
michael@0 1677 #ifndef NDEBUG
michael@0 1678 void dumpDisjunction(ByteDisjunction* disjunction)
michael@0 1679 {
michael@0 1680 dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction);
michael@0 1681 for (unsigned i = 0; i < disjunction->terms.size(); ++i)
michael@0 1682 dataLogF("{ %d } ", disjunction->terms[i].type);
michael@0 1683 dataLogF("\n");
michael@0 1684 }
michael@0 1685 #endif
michael@0 1686
michael@0 1687 void closeAlternative(int beginTerm)
michael@0 1688 {
michael@0 1689 int origBeginTerm = beginTerm;
michael@0 1690 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
michael@0 1691 int endIndex = m_bodyDisjunction->terms.size();
michael@0 1692
michael@0 1693 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
michael@0 1694
michael@0 1695 if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
michael@0 1696 m_bodyDisjunction->terms.remove(beginTerm);
michael@0 1697 else {
michael@0 1698 while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
michael@0 1699 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
michael@0 1700 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
michael@0 1701 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
michael@0 1702 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
michael@0 1703 }
michael@0 1704
michael@0 1705 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
michael@0 1706
michael@0 1707 m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
michael@0 1708 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
michael@0 1709 }
michael@0 1710 }
michael@0 1711
michael@0 1712 void closeBodyAlternative()
michael@0 1713 {
michael@0 1714 int beginTerm = 0;
michael@0 1715 int origBeginTerm = 0;
michael@0 1716 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
michael@0 1717 int endIndex = m_bodyDisjunction->terms.size();
michael@0 1718
michael@0 1719 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
michael@0 1720
michael@0 1721 while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
michael@0 1722 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
michael@0 1723 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
michael@0 1724 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
michael@0 1725 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
michael@0 1726 }
michael@0 1727
michael@0 1728 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
michael@0 1729
michael@0 1730 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
michael@0 1731 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
michael@0 1732 }
michael@0 1733
michael@0 1734 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
michael@0 1735 {
michael@0 1736 unsigned beginTerm = popParenthesesStack();
michael@0 1737 closeAlternative(beginTerm + 1);
michael@0 1738 unsigned endTerm = m_bodyDisjunction->terms.size();
michael@0 1739
michael@0 1740 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
michael@0 1741
michael@0 1742 ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
michael@0 1743
michael@0 1744 bool capture = parenthesesBegin.capture();
michael@0 1745 unsigned subpatternId = parenthesesBegin.atom.subpatternId;
michael@0 1746
michael@0 1747 unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
michael@0 1748 ByteDisjunction* parenthesesDisjunction = newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize);
michael@0 1749
michael@0 1750 parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1);
michael@0 1751 parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
michael@0 1752 for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
michael@0 1753 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
michael@0 1754 parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
michael@0 1755
michael@0 1756 m_bodyDisjunction->terms.shrink(beginTerm);
michael@0 1757
michael@0 1758 m_allParenthesesInfo.append(parenthesesDisjunction);
michael@0 1759 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
michael@0 1760
michael@0 1761 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1762 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
michael@0 1763 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
michael@0 1764 }
michael@0 1765
michael@0 1766 void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
michael@0 1767 {
michael@0 1768 unsigned beginTerm = popParenthesesStack();
michael@0 1769 closeAlternative(beginTerm + 1);
michael@0 1770 unsigned endTerm = m_bodyDisjunction->terms.size();
michael@0 1771
michael@0 1772 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
michael@0 1773
michael@0 1774 bool capture = m_bodyDisjunction->terms[beginTerm].capture();
michael@0 1775 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
michael@0 1776
michael@0 1777 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
michael@0 1778 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
michael@0 1779 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
michael@0 1780 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
michael@0 1781
michael@0 1782 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1783 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
michael@0 1784 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1785 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
michael@0 1786 }
michael@0 1787
michael@0 1788 void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
michael@0 1789 {
michael@0 1790 unsigned beginTerm = popParenthesesStack();
michael@0 1791 closeAlternative(beginTerm + 1);
michael@0 1792 unsigned endTerm = m_bodyDisjunction->terms.size();
michael@0 1793
michael@0 1794 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
michael@0 1795
michael@0 1796 bool capture = m_bodyDisjunction->terms[beginTerm].capture();
michael@0 1797 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
michael@0 1798
michael@0 1799 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
michael@0 1800 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
michael@0 1801 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
michael@0 1802 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
michael@0 1803
michael@0 1804 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1805 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
michael@0 1806 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
michael@0 1807 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
michael@0 1808 }
michael@0 1809
michael@0 1810 void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
michael@0 1811 {
michael@0 1812 m_bodyDisjunction = adoptPtr(newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize));
michael@0 1813 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
michael@0 1814 m_bodyDisjunction->terms[0].frameLocation = 0;
michael@0 1815 m_currentAlternativeIndex = 0;
michael@0 1816 }
michael@0 1817
michael@0 1818 void regexEnd()
michael@0 1819 {
michael@0 1820 closeBodyAlternative();
michael@0 1821 }
michael@0 1822
michael@0 1823 void alternativeBodyDisjunction(bool onceThrough)
michael@0 1824 {
michael@0 1825 int newAlternativeIndex = m_bodyDisjunction->terms.size();
michael@0 1826 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
michael@0 1827 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
michael@0 1828
michael@0 1829 m_currentAlternativeIndex = newAlternativeIndex;
michael@0 1830 }
michael@0 1831
michael@0 1832 void alternativeDisjunction()
michael@0 1833 {
michael@0 1834 int newAlternativeIndex = m_bodyDisjunction->terms.size();
michael@0 1835 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
michael@0 1836 m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
michael@0 1837
michael@0 1838 m_currentAlternativeIndex = newAlternativeIndex;
michael@0 1839 }
michael@0 1840
michael@0 1841 void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
michael@0 1842 {
michael@0 1843 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
michael@0 1844 unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
michael@0 1845
michael@0 1846 PatternAlternative* alternative = disjunction->m_alternatives[alt];
michael@0 1847
michael@0 1848 if (alt) {
michael@0 1849 if (disjunction == m_pattern.m_body)
michael@0 1850 alternativeBodyDisjunction(alternative->onceThrough());
michael@0 1851 else
michael@0 1852 alternativeDisjunction();
michael@0 1853 }
michael@0 1854
michael@0 1855 unsigned minimumSize = alternative->m_minimumSize;
michael@0 1856 ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
michael@0 1857 unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
michael@0 1858
michael@0 1859 if (countToCheck) {
michael@0 1860 checkInput(countToCheck);
michael@0 1861 currentCountAlreadyChecked += countToCheck;
michael@0 1862 }
michael@0 1863
michael@0 1864 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
michael@0 1865 PatternTerm& term = alternative->m_terms[i];
michael@0 1866
michael@0 1867 switch (term.type) {
michael@0 1868 case PatternTerm::TypeAssertionBOL:
michael@0 1869 assertionBOL(currentCountAlreadyChecked - term.inputPosition);
michael@0 1870 break;
michael@0 1871
michael@0 1872 case PatternTerm::TypeAssertionEOL:
michael@0 1873 assertionEOL(currentCountAlreadyChecked - term.inputPosition);
michael@0 1874 break;
michael@0 1875
michael@0 1876 case PatternTerm::TypeAssertionWordBoundary:
michael@0 1877 assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
michael@0 1878 break;
michael@0 1879
michael@0 1880 case PatternTerm::TypePatternCharacter:
michael@0 1881 atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
michael@0 1882 break;
michael@0 1883
michael@0 1884 case PatternTerm::TypeCharacterClass:
michael@0 1885 atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
michael@0 1886 break;
michael@0 1887
michael@0 1888 case PatternTerm::TypeBackReference:
michael@0 1889 atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
michael@0 1890 break;
michael@0 1891
michael@0 1892 case PatternTerm::TypeForwardReference:
michael@0 1893 break;
michael@0 1894
michael@0 1895 case PatternTerm::TypeParenthesesSubpattern: {
michael@0 1896 unsigned disjunctionAlreadyCheckedCount = 0;
michael@0 1897 if (term.quantityCount == 1 && !term.parentheses.isCopy) {
michael@0 1898 unsigned alternativeFrameLocation = term.frameLocation;
michael@0 1899 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
michael@0 1900 if (term.quantityType == QuantifierFixedCount)
michael@0 1901 disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
michael@0 1902 else
michael@0 1903 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
michael@0 1904 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
michael@0 1905 atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
michael@0 1906 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
michael@0 1907 atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
michael@0 1908 } else if (term.parentheses.isTerminal) {
michael@0 1909 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
michael@0 1910 atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
michael@0 1911 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
michael@0 1912 atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
michael@0 1913 } else {
michael@0 1914 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
michael@0 1915 atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
michael@0 1916 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
michael@0 1917 atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
michael@0 1918 }
michael@0 1919 break;
michael@0 1920 }
michael@0 1921
michael@0 1922 case PatternTerm::TypeParentheticalAssertion: {
michael@0 1923 unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
michael@0 1924
michael@0 1925 ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
michael@0 1926 unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
michael@0 1927 unsigned uncheckAmount = 0;
michael@0 1928 if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
michael@0 1929 uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
michael@0 1930 uncheckInput(uncheckAmount);
michael@0 1931 currentCountAlreadyChecked -= uncheckAmount;
michael@0 1932 }
michael@0 1933
michael@0 1934 atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
michael@0 1935 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
michael@0 1936 atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
michael@0 1937 if (uncheckAmount) {
michael@0 1938 checkInput(uncheckAmount);
michael@0 1939 currentCountAlreadyChecked += uncheckAmount;
michael@0 1940 }
michael@0 1941 break;
michael@0 1942 }
michael@0 1943
michael@0 1944 case PatternTerm::TypeDotStarEnclosure:
michael@0 1945 assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
michael@0 1946 break;
michael@0 1947 }
michael@0 1948 }
michael@0 1949 }
michael@0 1950 }
michael@0 1951
michael@0 1952 private:
michael@0 1953 YarrPattern& m_pattern;
michael@0 1954 OwnPtr<ByteDisjunction> m_bodyDisjunction;
michael@0 1955 unsigned m_currentAlternativeIndex;
michael@0 1956 Vector<ParenthesesStackEntry> m_parenthesesStack;
michael@0 1957 Vector<ByteDisjunction*> m_allParenthesesInfo;
michael@0 1958 };
michael@0 1959
michael@0 1960 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
michael@0 1961 {
michael@0 1962 return ByteCompiler(pattern).compile(allocator);
michael@0 1963 }
michael@0 1964
michael@0 1965 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
michael@0 1966 {
michael@0 1967 #if YARR_8BIT_CHAR_SUPPORT
michael@0 1968 if (input.is8Bit())
michael@0 1969 return Interpreter<LChar>(cx, bytecode, output, input.characters8(), input.length(), start).interpret();
michael@0 1970 #endif
michael@0 1971 return Interpreter<UChar>(cx, bytecode, output, input.chars(), input.length(), start).interpret();
michael@0 1972 }
michael@0 1973
michael@0 1974 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
michael@0 1975 {
michael@0 1976 return Interpreter<LChar>(cx, bytecode, output, input, length, start).interpret();
michael@0 1977 }
michael@0 1978
michael@0 1979 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
michael@0 1980 {
michael@0 1981 return Interpreter<UChar>(cx, bytecode, output, input, length, start).interpret();
michael@0 1982 }
michael@0 1983
michael@0 1984 // These should be the same for both UChar & LChar.
michael@0 1985 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
michael@0 1986 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
michael@0 1987 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
michael@0 1988 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
michael@0 1989 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
michael@0 1990 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
michael@0 1991 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
michael@0 1992
michael@0 1993
michael@0 1994 } }

mercurial