Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 *
4 * Copyright (C) 2009 Apple Inc. All rights reserved.
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "yarr/YarrInterpreter.h"
31 #include "jscntxt.h"
33 #include "yarr/Yarr.h"
34 #include "yarr/YarrCanonicalizeUCS2.h"
35 #include "yarr/BumpPointerAllocator.h"
37 using namespace WTF;
39 namespace JSC { namespace Yarr {
41 template<typename CharType>
42 class Interpreter {
43 public:
44 struct ParenthesesDisjunctionContext;
46 struct BackTrackInfoPatternCharacter {
47 uintptr_t matchAmount;
48 };
49 struct BackTrackInfoCharacterClass {
50 uintptr_t matchAmount;
51 };
52 struct BackTrackInfoBackReference {
53 uintptr_t begin; // Not really needed for greedy quantifiers.
54 uintptr_t matchAmount; // Not really needed for fixed quantifiers.
55 };
56 struct BackTrackInfoAlternative {
57 uintptr_t offset;
58 };
59 struct BackTrackInfoParentheticalAssertion {
60 uintptr_t begin;
61 };
62 struct BackTrackInfoParenthesesOnce {
63 uintptr_t begin;
64 };
65 struct BackTrackInfoParenthesesTerminal {
66 uintptr_t begin;
67 };
68 struct BackTrackInfoParentheses {
69 uintptr_t matchAmount;
70 ParenthesesDisjunctionContext* lastContext;
71 };
73 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
74 {
75 context->next = backTrack->lastContext;
76 backTrack->lastContext = context;
77 ++backTrack->matchAmount;
78 }
80 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
81 {
82 ASSERT(backTrack->matchAmount);
83 ASSERT(backTrack->lastContext);
84 backTrack->lastContext = backTrack->lastContext->next;
85 --backTrack->matchAmount;
86 }
88 struct DisjunctionContext
89 {
90 DisjunctionContext()
91 : term(0)
92 {
93 }
95 void* operator new(size_t, void* where)
96 {
97 return where;
98 }
100 int term;
101 unsigned matchBegin;
102 unsigned matchEnd;
103 uintptr_t frame[1];
104 };
106 DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
107 {
108 size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
109 allocatorPool = allocatorPool->ensureCapacity(size);
110 if (!allocatorPool)
111 CRASH();
112 return new (allocatorPool->alloc(size)) DisjunctionContext();
113 }
115 void freeDisjunctionContext(DisjunctionContext* context)
116 {
117 allocatorPool = allocatorPool->dealloc(context);
118 }
120 struct ParenthesesDisjunctionContext
121 {
122 ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
123 : next(0)
124 {
125 unsigned firstSubpatternId = term.atom.subpatternId;
126 unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
128 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
129 subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
130 output[(firstSubpatternId << 1) + i] = offsetNoMatch;
131 }
133 new (getDisjunctionContext(term)) DisjunctionContext();
134 }
136 void* operator new(size_t, void* where)
137 {
138 return where;
139 }
141 void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
142 {
143 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
144 output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
145 }
147 DisjunctionContext* getDisjunctionContext(ByteTerm& term)
148 {
149 return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
150 }
152 ParenthesesDisjunctionContext* next;
153 unsigned subpatternBackup[1];
154 };
156 ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
157 {
158 size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
159 size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext));
160 allocatorPool = allocatorPool->ensureCapacity(size);
161 if (!allocatorPool)
162 CRASH();
163 return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
164 }
166 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
167 {
168 allocatorPool = allocatorPool->dealloc(context);
169 }
171 class InputStream {
172 public:
173 InputStream(const CharType* input, unsigned start, unsigned length)
174 : input(input)
175 , pos(start)
176 , length(length)
177 {
178 }
180 void next()
181 {
182 ++pos;
183 }
185 void rewind(unsigned amount)
186 {
187 ASSERT(pos >= amount);
188 pos -= amount;
189 }
191 int read()
192 {
193 ASSERT(pos < length);
194 if (pos < length)
195 return input[pos];
196 return -1;
197 }
199 int readPair()
200 {
201 ASSERT(pos + 1 < length);
202 return input[pos] | input[pos + 1] << 16;
203 }
205 int readChecked(unsigned negativePositionOffest)
206 {
207 if (pos < negativePositionOffest)
208 CRASH();
209 unsigned p = pos - negativePositionOffest;
210 ASSERT(p < length);
211 return input[p];
212 }
214 int reread(unsigned from)
215 {
216 ASSERT(from < length);
217 return input[from];
218 }
220 int prev()
221 {
222 ASSERT(!(pos > length));
223 if (pos && length)
224 return input[pos - 1];
225 return -1;
226 }
228 unsigned getPos()
229 {
230 return pos;
231 }
233 void setPos(unsigned p)
234 {
235 pos = p;
236 }
238 bool atStart()
239 {
240 return pos == 0;
241 }
243 bool atEnd()
244 {
245 return pos == length;
246 }
248 unsigned end()
249 {
250 return length;
251 }
253 bool checkInput(unsigned count)
254 {
255 if (((pos + count) <= length) && ((pos + count) >= pos)) {
256 pos += count;
257 return true;
258 }
259 return false;
260 }
262 void uncheckInput(unsigned count)
263 {
264 if (pos < count)
265 CRASH();
266 pos -= count;
267 }
269 bool atStart(unsigned negativePositionOffest)
270 {
271 return pos == negativePositionOffest;
272 }
274 bool atEnd(unsigned negativePositionOffest)
275 {
276 if (pos < negativePositionOffest)
277 CRASH();
278 return (pos - negativePositionOffest) == length;
279 }
281 bool isAvailableInput(unsigned offset)
282 {
283 return (((pos + offset) <= length) && ((pos + offset) >= pos));
284 }
286 private:
287 const CharType* input;
288 unsigned pos;
289 unsigned length;
290 };
292 bool testCharacterClass(CharacterClass* characterClass, int ch)
293 {
294 if (ch & 0xFF80) {
295 for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
296 if (ch == characterClass->m_matchesUnicode[i])
297 return true;
298 for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
299 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
300 return true;
301 } else {
302 for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
303 if (ch == characterClass->m_matches[i])
304 return true;
305 for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
306 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
307 return true;
308 }
310 return false;
311 }
313 bool checkCharacter(int testChar, unsigned negativeInputOffset)
314 {
315 return testChar == input.readChecked(negativeInputOffset);
316 }
318 bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
319 {
320 int ch = input.readChecked(negativeInputOffset);
321 return (loChar == ch) || (hiChar == ch);
322 }
324 bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
325 {
326 bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset));
327 return invert ? !match : match;
328 }
330 bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
331 {
332 unsigned matchSize = (unsigned)(matchEnd - matchBegin);
334 if (!input.checkInput(matchSize))
335 return false;
337 if (pattern->m_ignoreCase) {
338 for (unsigned i = 0; i < matchSize; ++i) {
339 int oldCh = input.reread(matchBegin + i);
340 int ch = input.readChecked(negativeInputOffset + matchSize - i);
342 if (oldCh == ch)
343 continue;
345 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that
346 // unicode values are never allowed to match against ascii ones.
347 if (isASCII(oldCh) || isASCII(ch)) {
348 if (toASCIIUpper(oldCh) == toASCIIUpper(ch))
349 continue;
350 } else if (areCanonicallyEquivalent(oldCh, ch))
351 continue;
353 input.uncheckInput(matchSize);
354 return false;
355 }
356 } else {
357 for (unsigned i = 0; i < matchSize; ++i) {
358 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) {
359 input.uncheckInput(matchSize);
360 return false;
361 }
362 }
363 }
365 return true;
366 }
368 bool matchAssertionBOL(ByteTerm& term)
369 {
370 return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1)));
371 }
373 bool matchAssertionEOL(ByteTerm& term)
374 {
375 if (term.inputPosition)
376 return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
378 return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
379 }
381 bool matchAssertionWordBoundary(ByteTerm& term)
382 {
383 bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1));
384 bool readIsWordchar;
385 if (term.inputPosition)
386 readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
387 else
388 readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
390 bool wordBoundary = prevIsWordchar != readIsWordchar;
391 return term.invert() ? !wordBoundary : wordBoundary;
392 }
394 bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
395 {
396 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
398 switch (term.atom.quantityType) {
399 case QuantifierFixedCount:
400 break;
402 case QuantifierGreedy:
403 if (backTrack->matchAmount) {
404 --backTrack->matchAmount;
405 input.uncheckInput(1);
406 return true;
407 }
408 break;
410 case QuantifierNonGreedy:
411 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
412 ++backTrack->matchAmount;
413 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1))
414 return true;
415 }
416 input.uncheckInput(backTrack->matchAmount);
417 break;
418 }
420 return false;
421 }
423 bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
424 {
425 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
427 switch (term.atom.quantityType) {
428 case QuantifierFixedCount:
429 break;
431 case QuantifierGreedy:
432 if (backTrack->matchAmount) {
433 --backTrack->matchAmount;
434 input.uncheckInput(1);
435 return true;
436 }
437 break;
439 case QuantifierNonGreedy:
440 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
441 ++backTrack->matchAmount;
442 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1))
443 return true;
444 }
445 input.uncheckInput(backTrack->matchAmount);
446 break;
447 }
449 return false;
450 }
452 bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
453 {
454 ASSERT(term.type == ByteTerm::TypeCharacterClass);
455 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
457 switch (term.atom.quantityType) {
458 case QuantifierFixedCount: {
459 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
460 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount))
461 return false;
462 }
463 return true;
464 }
466 case QuantifierGreedy: {
467 unsigned matchAmount = 0;
468 while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
469 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) {
470 input.uncheckInput(1);
471 break;
472 }
473 ++matchAmount;
474 }
475 backTrack->matchAmount = matchAmount;
477 return true;
478 }
480 case QuantifierNonGreedy:
481 backTrack->matchAmount = 0;
482 return true;
483 }
485 ASSERT_NOT_REACHED();
486 return false;
487 }
489 bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
490 {
491 ASSERT(term.type == ByteTerm::TypeCharacterClass);
492 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
494 switch (term.atom.quantityType) {
495 case QuantifierFixedCount:
496 break;
498 case QuantifierGreedy:
499 if (backTrack->matchAmount) {
500 --backTrack->matchAmount;
501 input.uncheckInput(1);
502 return true;
503 }
504 break;
506 case QuantifierNonGreedy:
507 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
508 ++backTrack->matchAmount;
509 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1))
510 return true;
511 }
512 input.uncheckInput(backTrack->matchAmount);
513 break;
514 }
516 return false;
517 }
519 bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
520 {
521 ASSERT(term.type == ByteTerm::TypeBackReference);
522 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
524 unsigned matchBegin = output[(term.atom.subpatternId << 1)];
525 unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
527 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
528 // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
529 // Eg.: /(a\1)/
530 if (matchEnd == offsetNoMatch)
531 return true;
533 if (matchBegin == offsetNoMatch)
534 return true;
536 ASSERT(matchBegin <= matchEnd);
538 if (matchBegin == matchEnd)
539 return true;
541 switch (term.atom.quantityType) {
542 case QuantifierFixedCount: {
543 backTrack->begin = input.getPos();
544 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
545 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
546 input.setPos(backTrack->begin);
547 return false;
548 }
549 }
550 return true;
551 }
553 case QuantifierGreedy: {
554 unsigned matchAmount = 0;
555 while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
556 ++matchAmount;
557 backTrack->matchAmount = matchAmount;
558 return true;
559 }
561 case QuantifierNonGreedy:
562 backTrack->begin = input.getPos();
563 backTrack->matchAmount = 0;
564 return true;
565 }
567 ASSERT_NOT_REACHED();
568 return false;
569 }
571 bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
572 {
573 ASSERT(term.type == ByteTerm::TypeBackReference);
574 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
576 unsigned matchBegin = output[(term.atom.subpatternId << 1)];
577 unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1];
579 if (matchBegin == offsetNoMatch)
580 return false;
582 ASSERT(matchBegin <= matchEnd);
584 if (matchBegin == matchEnd)
585 return false;
587 switch (term.atom.quantityType) {
588 case QuantifierFixedCount:
589 // for quantityCount == 1, could rewind.
590 input.setPos(backTrack->begin);
591 break;
593 case QuantifierGreedy:
594 if (backTrack->matchAmount) {
595 --backTrack->matchAmount;
596 input.rewind(matchEnd - matchBegin);
597 return true;
598 }
599 break;
601 case QuantifierNonGreedy:
602 if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
603 ++backTrack->matchAmount;
604 return true;
605 }
606 input.setPos(backTrack->begin);
607 break;
608 }
610 return false;
611 }
613 void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
614 {
615 if (term.capture()) {
616 unsigned subpatternId = term.atom.subpatternId;
617 output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
618 output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
619 }
620 }
621 void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
622 {
623 unsigned firstSubpatternId = term.atom.subpatternId;
624 unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
625 context->restoreOutput(output, firstSubpatternId, count);
626 }
627 JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
628 {
629 while (backTrack->matchAmount) {
630 ParenthesesDisjunctionContext* context = backTrack->lastContext;
632 JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
633 if (result == JSRegExpMatch)
634 return JSRegExpMatch;
636 resetMatches(term, context);
637 popParenthesesDisjunctionContext(backTrack);
638 freeParenthesesDisjunctionContext(context);
640 if (result != JSRegExpNoMatch)
641 return result;
642 }
644 return JSRegExpNoMatch;
645 }
647 bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
648 {
649 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
650 ASSERT(term.atom.quantityCount == 1);
652 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
654 switch (term.atom.quantityType) {
655 case QuantifierGreedy: {
656 // set this speculatively; if we get to the parens end this will be true.
657 backTrack->begin = input.getPos();
658 break;
659 }
660 case QuantifierNonGreedy: {
661 backTrack->begin = notFound;
662 context->term += term.atom.parenthesesWidth;
663 return true;
664 }
665 case QuantifierFixedCount:
666 break;
667 }
669 if (term.capture()) {
670 unsigned subpatternId = term.atom.subpatternId;
671 output[(subpatternId << 1)] = input.getPos() - term.inputPosition;
672 }
674 return true;
675 }
677 bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
678 {
679 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
680 ASSERT(term.atom.quantityCount == 1);
682 if (term.capture()) {
683 unsigned subpatternId = term.atom.subpatternId;
684 output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
685 }
687 if (term.atom.quantityType == QuantifierFixedCount)
688 return true;
690 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
691 return backTrack->begin != input.getPos();
692 }
694 bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
695 {
696 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
697 ASSERT(term.atom.quantityCount == 1);
699 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
701 if (term.capture()) {
702 unsigned subpatternId = term.atom.subpatternId;
703 output[(subpatternId << 1)] = offsetNoMatch;
704 output[(subpatternId << 1) + 1] = offsetNoMatch;
705 }
707 switch (term.atom.quantityType) {
708 case QuantifierGreedy:
709 // if we backtrack to this point, there is another chance - try matching nothing.
710 ASSERT(backTrack->begin != notFound);
711 backTrack->begin = notFound;
712 context->term += term.atom.parenthesesWidth;
713 return true;
714 case QuantifierNonGreedy:
715 ASSERT(backTrack->begin != notFound);
716 case QuantifierFixedCount:
717 break;
718 }
720 return false;
721 }
723 bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
724 {
725 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
726 ASSERT(term.atom.quantityCount == 1);
728 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
730 switch (term.atom.quantityType) {
731 case QuantifierGreedy:
732 if (backTrack->begin == notFound) {
733 context->term -= term.atom.parenthesesWidth;
734 return false;
735 }
736 case QuantifierNonGreedy:
737 if (backTrack->begin == notFound) {
738 backTrack->begin = input.getPos();
739 if (term.capture()) {
740 // Technically this access to inputPosition should be accessing the begin term's
741 // inputPosition, but for repeats other than fixed these values should be
742 // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
743 ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
745 // Disabled, see bug 808478
746 #if 0
747 ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
748 #endif
749 unsigned subpatternId = term.atom.subpatternId;
750 output[subpatternId << 1] = input.getPos() + term.inputPosition;
751 }
752 context->term -= term.atom.parenthesesWidth;
753 return true;
754 }
755 case QuantifierFixedCount:
756 break;
757 }
759 return false;
760 }
762 bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
763 {
764 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
765 ASSERT(term.atom.quantityType == QuantifierGreedy);
766 ASSERT(term.atom.quantityCount == quantifyInfinite);
767 ASSERT(!term.capture());
769 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
770 backTrack->begin = input.getPos();
771 return true;
772 }
774 bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
775 {
776 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
778 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
779 // Empty match is a failed match.
780 if (backTrack->begin == input.getPos())
781 return false;
783 // Successful match! Okay, what's next? - loop around and try to match moar!
784 context->term -= (term.atom.parenthesesWidth + 1);
785 return true;
786 }
788 bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
789 {
790 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
791 ASSERT(term.atom.quantityType == QuantifierGreedy);
792 ASSERT(term.atom.quantityCount == quantifyInfinite);
793 ASSERT(!term.capture());
795 // If we backtrack to this point, we have failed to match this iteration of the parens.
796 // Since this is greedy / zero minimum a failed is also accepted as a match!
797 context->term += term.atom.parenthesesWidth;
798 return true;
799 }
801 bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
802 {
803 // 'Terminal' parentheses are at the end of the regex, and as such a match past end
804 // should always be returned as a successful match - we should never backtrack to here.
805 ASSERT_NOT_REACHED();
806 return false;
807 }
809 bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
810 {
811 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
812 ASSERT(term.atom.quantityCount == 1);
814 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
816 backTrack->begin = input.getPos();
817 return true;
818 }
820 bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
821 {
822 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
823 ASSERT(term.atom.quantityCount == 1);
825 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
827 input.setPos(backTrack->begin);
829 // We've reached the end of the parens; if they are inverted, this is failure.
830 if (term.invert()) {
831 context->term -= term.atom.parenthesesWidth;
832 return false;
833 }
835 return true;
836 }
838 bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
839 {
840 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
841 ASSERT(term.atom.quantityCount == 1);
843 // We've failed to match parens; if they are inverted, this is win!
844 if (term.invert()) {
845 context->term += term.atom.parenthesesWidth;
846 return true;
847 }
849 return false;
850 }
852 bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
853 {
854 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
855 ASSERT(term.atom.quantityCount == 1);
857 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
859 input.setPos(backTrack->begin);
861 context->term -= term.atom.parenthesesWidth;
862 return false;
863 }
865 JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
866 {
867 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
869 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
870 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
872 backTrack->matchAmount = 0;
873 backTrack->lastContext = 0;
875 switch (term.atom.quantityType) {
876 case QuantifierFixedCount: {
877 // While we haven't yet reached our fixed limit,
878 while (backTrack->matchAmount < term.atom.quantityCount) {
879 // Try to do a match, and it it succeeds, add it to the list.
880 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
881 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
882 if (result == JSRegExpMatch)
883 appendParenthesesDisjunctionContext(backTrack, context);
884 else {
885 // The match failed; try to find an alternate point to carry on from.
886 resetMatches(term, context);
887 freeParenthesesDisjunctionContext(context);
889 if (result != JSRegExpNoMatch)
890 return result;
891 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
892 if (backtrackResult != JSRegExpMatch)
893 return backtrackResult;
894 }
895 }
897 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
898 ParenthesesDisjunctionContext* context = backTrack->lastContext;
899 recordParenthesesMatch(term, context);
900 return JSRegExpMatch;
901 }
903 case QuantifierGreedy: {
904 while (backTrack->matchAmount < term.atom.quantityCount) {
905 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
906 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
907 if (result == JSRegExpMatch)
908 appendParenthesesDisjunctionContext(backTrack, context);
909 else {
910 resetMatches(term, context);
911 freeParenthesesDisjunctionContext(context);
913 if (result != JSRegExpNoMatch)
914 return result;
916 break;
917 }
918 }
920 if (backTrack->matchAmount) {
921 ParenthesesDisjunctionContext* context = backTrack->lastContext;
922 recordParenthesesMatch(term, context);
923 }
924 return JSRegExpMatch;
925 }
927 case QuantifierNonGreedy:
928 return JSRegExpMatch;
929 }
931 ASSERT_NOT_REACHED();
932 return JSRegExpErrorNoMatch;
933 }
935 // Rules for backtracking differ depending on whether this is greedy or non-greedy.
936 //
937 // Greedy matches never should try just adding more - you should already have done
938 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
939 // you backtrack an item off the list needs checking, since we'll never have matched
940 // the one less case. Tracking forwards, still add as much as possible.
941 //
942 // Non-greedy, we've already done the one less case, so don't match on popping.
943 // We haven't done the one more case, so always try to add that.
944 //
945 JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
946 {
947 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
949 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
950 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
952 switch (term.atom.quantityType) {
953 case QuantifierFixedCount: {
954 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
956 ParenthesesDisjunctionContext* context = 0;
957 JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
959 if (result != JSRegExpMatch)
960 return result;
962 // While we haven't yet reached our fixed limit,
963 while (backTrack->matchAmount < term.atom.quantityCount) {
964 // Try to do a match, and it it succeeds, add it to the list.
965 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
966 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
968 if (result == JSRegExpMatch)
969 appendParenthesesDisjunctionContext(backTrack, context);
970 else {
971 // The match failed; try to find an alternate point to carry on from.
972 resetMatches(term, context);
973 freeParenthesesDisjunctionContext(context);
975 if (result != JSRegExpNoMatch)
976 return result;
977 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
978 if (backtrackResult != JSRegExpMatch)
979 return backtrackResult;
980 }
981 }
983 ASSERT(backTrack->matchAmount == term.atom.quantityCount);
984 context = backTrack->lastContext;
985 recordParenthesesMatch(term, context);
986 return JSRegExpMatch;
987 }
989 case QuantifierGreedy: {
990 if (!backTrack->matchAmount)
991 return JSRegExpNoMatch;
993 ParenthesesDisjunctionContext* context = backTrack->lastContext;
994 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
995 if (result == JSRegExpMatch) {
996 while (backTrack->matchAmount < term.atom.quantityCount) {
997 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
998 JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
999 if (parenthesesResult == JSRegExpMatch)
1000 appendParenthesesDisjunctionContext(backTrack, context);
1001 else {
1002 resetMatches(term, context);
1003 freeParenthesesDisjunctionContext(context);
1005 if (parenthesesResult != JSRegExpNoMatch)
1006 return parenthesesResult;
1008 break;
1009 }
1010 }
1011 } else {
1012 // Avoid a topcrash before it occurs.
1013 if (!backTrack->lastContext) {
1014 ASSERT(!"Tripped Bug 856796!");
1015 return JSRegExpErrorInternal;
1016 }
1018 resetMatches(term, context);
1019 popParenthesesDisjunctionContext(backTrack);
1020 freeParenthesesDisjunctionContext(context);
1022 if (result != JSRegExpNoMatch)
1023 return result;
1024 }
1026 if (backTrack->matchAmount) {
1027 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1028 recordParenthesesMatch(term, context);
1029 }
1030 return JSRegExpMatch;
1031 }
1033 case QuantifierNonGreedy: {
1034 // If we've not reached the limit, try to add one more match.
1035 if (backTrack->matchAmount < term.atom.quantityCount) {
1036 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1037 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1038 if (result == JSRegExpMatch) {
1039 appendParenthesesDisjunctionContext(backTrack, context);
1040 recordParenthesesMatch(term, context);
1041 return JSRegExpMatch;
1042 }
1044 resetMatches(term, context);
1045 freeParenthesesDisjunctionContext(context);
1047 if (result != JSRegExpNoMatch)
1048 return result;
1049 }
1051 // Nope - okay backtrack looking for an alternative.
1052 while (backTrack->matchAmount) {
1053 ParenthesesDisjunctionContext* context = backTrack->lastContext;
1054 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1055 if (result == JSRegExpMatch) {
1056 // successful backtrack! we're back in the game!
1057 if (backTrack->matchAmount) {
1058 context = backTrack->lastContext;
1059 recordParenthesesMatch(term, context);
1060 }
1061 return JSRegExpMatch;
1062 }
1064 // Avoid a topcrash before it occurs.
1065 if (!backTrack->lastContext) {
1066 ASSERT(!"Tripped Bug 856796!");
1067 return JSRegExpErrorInternal;
1068 }
1070 // pop a match off the stack
1071 resetMatches(term, context);
1072 popParenthesesDisjunctionContext(backTrack);
1073 freeParenthesesDisjunctionContext(context);
1075 if (result != JSRegExpNoMatch)
1076 return result;
1077 }
1079 return JSRegExpNoMatch;
1080 }
1081 }
1083 ASSERT_NOT_REACHED();
1084 return JSRegExpErrorNoMatch;
1085 }
1087 bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1088 {
1089 UNUSED_PARAM(term);
1090 unsigned matchBegin = context->matchBegin;
1092 if (matchBegin) {
1093 for (matchBegin--; true; matchBegin--) {
1094 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) {
1095 ++matchBegin;
1096 break;
1097 }
1099 if (!matchBegin)
1100 break;
1101 }
1102 }
1104 unsigned matchEnd = input.getPos();
1106 for (; (matchEnd != input.end())
1107 && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { }
1109 if (((matchBegin && term.anchors.m_bol)
1110 || ((matchEnd != input.end()) && term.anchors.m_eol))
1111 && !pattern->m_multiline)
1112 return false;
1114 context->matchBegin = matchBegin;
1115 context->matchEnd = matchEnd;
1116 return true;
1117 }
1119 #define MATCH_NEXT() { ++context->term; goto matchAgain; }
1120 #define BACKTRACK() { --context->term; goto backtrack; }
1121 #define currentTerm() (disjunction->terms[context->term])
1122 JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1123 {
1124 if (!--remainingMatchCount)
1125 return JSRegExpErrorHitLimit;
1127 if (btrack)
1128 BACKTRACK();
1130 context->matchBegin = input.getPos();
1131 context->term = 0;
1133 matchAgain:
1134 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1136 // Prevent jank resulting from getting stuck in Yarr for a long time.
1137 if (!CheckForInterrupt(this->cx))
1138 return JSRegExpErrorInternal;
1140 switch (currentTerm().type) {
1141 case ByteTerm::TypeSubpatternBegin:
1142 MATCH_NEXT();
1143 case ByteTerm::TypeSubpatternEnd:
1144 context->matchEnd = input.getPos();
1145 return JSRegExpMatch;
1147 case ByteTerm::TypeBodyAlternativeBegin:
1148 MATCH_NEXT();
1149 case ByteTerm::TypeBodyAlternativeDisjunction:
1150 case ByteTerm::TypeBodyAlternativeEnd:
1151 context->matchEnd = input.getPos();
1152 return JSRegExpMatch;
1154 case ByteTerm::TypeAlternativeBegin:
1155 MATCH_NEXT();
1156 case ByteTerm::TypeAlternativeDisjunction:
1157 case ByteTerm::TypeAlternativeEnd: {
1158 int offset = currentTerm().alternative.end;
1159 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1160 backTrack->offset = offset;
1161 context->term += offset;
1162 MATCH_NEXT();
1163 }
1165 case ByteTerm::TypeAssertionBOL:
1166 if (matchAssertionBOL(currentTerm()))
1167 MATCH_NEXT();
1168 BACKTRACK();
1169 case ByteTerm::TypeAssertionEOL:
1170 if (matchAssertionEOL(currentTerm()))
1171 MATCH_NEXT();
1172 BACKTRACK();
1173 case ByteTerm::TypeAssertionWordBoundary:
1174 if (matchAssertionWordBoundary(currentTerm()))
1175 MATCH_NEXT();
1176 BACKTRACK();
1178 case ByteTerm::TypePatternCharacterOnce:
1179 case ByteTerm::TypePatternCharacterFixed: {
1180 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1181 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount))
1182 BACKTRACK();
1183 }
1184 MATCH_NEXT();
1185 }
1186 case ByteTerm::TypePatternCharacterGreedy: {
1187 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1188 unsigned matchAmount = 0;
1189 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1190 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) {
1191 input.uncheckInput(1);
1192 break;
1193 }
1194 ++matchAmount;
1195 }
1196 backTrack->matchAmount = matchAmount;
1198 MATCH_NEXT();
1199 }
1200 case ByteTerm::TypePatternCharacterNonGreedy: {
1201 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1202 backTrack->matchAmount = 0;
1203 MATCH_NEXT();
1204 }
1206 case ByteTerm::TypePatternCasedCharacterOnce:
1207 case ByteTerm::TypePatternCasedCharacterFixed: {
1208 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1209 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1210 BACKTRACK();
1211 }
1212 MATCH_NEXT();
1213 }
1214 case ByteTerm::TypePatternCasedCharacterGreedy: {
1215 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1216 unsigned matchAmount = 0;
1217 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1218 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) {
1219 input.uncheckInput(1);
1220 break;
1221 }
1222 ++matchAmount;
1223 }
1224 backTrack->matchAmount = matchAmount;
1226 MATCH_NEXT();
1227 }
1228 case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1229 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1230 backTrack->matchAmount = 0;
1231 MATCH_NEXT();
1232 }
1234 case ByteTerm::TypeCharacterClass:
1235 if (matchCharacterClass(currentTerm(), context))
1236 MATCH_NEXT();
1237 BACKTRACK();
1238 case ByteTerm::TypeBackReference:
1239 if (matchBackReference(currentTerm(), context))
1240 MATCH_NEXT();
1241 BACKTRACK();
1242 case ByteTerm::TypeParenthesesSubpattern: {
1243 JSRegExpResult result = matchParentheses(currentTerm(), context);
1245 if (result == JSRegExpMatch) {
1246 MATCH_NEXT();
1247 } else if (result != JSRegExpNoMatch)
1248 return result;
1250 BACKTRACK();
1251 }
1252 case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1253 if (matchParenthesesOnceBegin(currentTerm(), context))
1254 MATCH_NEXT();
1255 BACKTRACK();
1256 case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1257 if (matchParenthesesOnceEnd(currentTerm(), context))
1258 MATCH_NEXT();
1259 BACKTRACK();
1260 case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1261 if (matchParenthesesTerminalBegin(currentTerm(), context))
1262 MATCH_NEXT();
1263 BACKTRACK();
1264 case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1265 if (matchParenthesesTerminalEnd(currentTerm(), context))
1266 MATCH_NEXT();
1267 BACKTRACK();
1268 case ByteTerm::TypeParentheticalAssertionBegin:
1269 if (matchParentheticalAssertionBegin(currentTerm(), context))
1270 MATCH_NEXT();
1271 BACKTRACK();
1272 case ByteTerm::TypeParentheticalAssertionEnd:
1273 if (matchParentheticalAssertionEnd(currentTerm(), context))
1274 MATCH_NEXT();
1275 BACKTRACK();
1277 case ByteTerm::TypeCheckInput:
1278 if (input.checkInput(currentTerm().checkInputCount))
1279 MATCH_NEXT();
1280 BACKTRACK();
1282 case ByteTerm::TypeUncheckInput:
1283 input.uncheckInput(currentTerm().checkInputCount);
1284 MATCH_NEXT();
1286 case ByteTerm::TypeDotStarEnclosure:
1287 if (matchDotStarEnclosure(currentTerm(), context))
1288 return JSRegExpMatch;
1289 BACKTRACK();
1290 }
1292 // We should never fall-through to here.
1293 ASSERT_NOT_REACHED();
1295 backtrack:
1296 ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1298 // Prevent jank resulting from getting stuck in Yarr for a long time.
1299 if (!CheckForInterrupt(this->cx))
1300 return JSRegExpErrorInternal;
1302 switch (currentTerm().type) {
1303 case ByteTerm::TypeSubpatternBegin:
1304 return JSRegExpNoMatch;
1305 case ByteTerm::TypeSubpatternEnd:
1306 ASSERT_NOT_REACHED();
1308 case ByteTerm::TypeBodyAlternativeBegin:
1309 case ByteTerm::TypeBodyAlternativeDisjunction: {
1310 int offset = currentTerm().alternative.next;
1311 context->term += offset;
1312 if (offset > 0)
1313 MATCH_NEXT();
1315 if (input.atEnd())
1316 return JSRegExpNoMatch;
1318 input.next();
1320 context->matchBegin = input.getPos();
1322 if (currentTerm().alternative.onceThrough)
1323 context->term += currentTerm().alternative.next;
1325 MATCH_NEXT();
1326 }
1327 case ByteTerm::TypeBodyAlternativeEnd:
1328 ASSERT_NOT_REACHED();
1330 case ByteTerm::TypeAlternativeBegin:
1331 case ByteTerm::TypeAlternativeDisjunction: {
1332 int offset = currentTerm().alternative.next;
1333 context->term += offset;
1334 if (offset > 0)
1335 MATCH_NEXT();
1336 BACKTRACK();
1337 }
1338 case ByteTerm::TypeAlternativeEnd: {
1339 // We should never backtrack back into an alternative of the main body of the regex.
1340 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1341 unsigned offset = backTrack->offset;
1342 context->term -= offset;
1343 BACKTRACK();
1344 }
1346 case ByteTerm::TypeAssertionBOL:
1347 case ByteTerm::TypeAssertionEOL:
1348 case ByteTerm::TypeAssertionWordBoundary:
1349 BACKTRACK();
1351 case ByteTerm::TypePatternCharacterOnce:
1352 case ByteTerm::TypePatternCharacterFixed:
1353 case ByteTerm::TypePatternCharacterGreedy:
1354 case ByteTerm::TypePatternCharacterNonGreedy:
1355 if (backtrackPatternCharacter(currentTerm(), context))
1356 MATCH_NEXT();
1357 BACKTRACK();
1358 case ByteTerm::TypePatternCasedCharacterOnce:
1359 case ByteTerm::TypePatternCasedCharacterFixed:
1360 case ByteTerm::TypePatternCasedCharacterGreedy:
1361 case ByteTerm::TypePatternCasedCharacterNonGreedy:
1362 if (backtrackPatternCasedCharacter(currentTerm(), context))
1363 MATCH_NEXT();
1364 BACKTRACK();
1365 case ByteTerm::TypeCharacterClass:
1366 if (backtrackCharacterClass(currentTerm(), context))
1367 MATCH_NEXT();
1368 BACKTRACK();
1369 case ByteTerm::TypeBackReference:
1370 if (backtrackBackReference(currentTerm(), context))
1371 MATCH_NEXT();
1372 BACKTRACK();
1373 case ByteTerm::TypeParenthesesSubpattern: {
1374 JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1376 if (result == JSRegExpMatch) {
1377 MATCH_NEXT();
1378 } else if (result != JSRegExpNoMatch)
1379 return result;
1381 BACKTRACK();
1382 }
1383 case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1384 if (backtrackParenthesesOnceBegin(currentTerm(), context))
1385 MATCH_NEXT();
1386 BACKTRACK();
1387 case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1388 if (backtrackParenthesesOnceEnd(currentTerm(), context))
1389 MATCH_NEXT();
1390 BACKTRACK();
1391 case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1392 if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1393 MATCH_NEXT();
1394 BACKTRACK();
1395 case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1396 if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1397 MATCH_NEXT();
1398 BACKTRACK();
1399 case ByteTerm::TypeParentheticalAssertionBegin:
1400 if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1401 MATCH_NEXT();
1402 BACKTRACK();
1403 case ByteTerm::TypeParentheticalAssertionEnd:
1404 if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1405 MATCH_NEXT();
1406 BACKTRACK();
1408 case ByteTerm::TypeCheckInput:
1409 input.uncheckInput(currentTerm().checkInputCount);
1410 BACKTRACK();
1412 case ByteTerm::TypeUncheckInput:
1413 input.checkInput(currentTerm().checkInputCount);
1414 BACKTRACK();
1416 case ByteTerm::TypeDotStarEnclosure:
1417 ASSERT_NOT_REACHED();
1418 }
1420 ASSERT_NOT_REACHED();
1421 return JSRegExpErrorNoMatch;
1422 }
1424 JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1425 {
1426 JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1428 if (result == JSRegExpMatch) {
1429 while (context->matchBegin == context->matchEnd) {
1430 result = matchDisjunction(disjunction, context, true);
1431 if (result != JSRegExpMatch)
1432 return result;
1433 }
1434 return JSRegExpMatch;
1435 }
1437 return result;
1438 }
1440 unsigned interpret()
1441 {
1442 if (!input.isAvailableInput(0))
1443 return offsetNoMatch;
1445 for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i)
1446 output[i << 1] = offsetNoMatch;
1448 allocatorPool = pattern->m_allocator->startAllocator();
1449 if (!allocatorPool)
1450 CRASH();
1452 DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1454 JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false);
1455 if (result == JSRegExpMatch) {
1456 output[0] = context->matchBegin;
1457 output[1] = context->matchEnd;
1458 }
1460 freeDisjunctionContext(context);
1462 pattern->m_allocator->stopAllocator();
1464 ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch));
1465 return output[0];
1466 }
1468 Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1469 : cx(cx)
1470 , pattern(pattern)
1471 , output(output)
1472 , input(input, start, length)
1473 , allocatorPool(0)
1474 , remainingMatchCount(matchLimit)
1475 {
1476 }
1478 private:
1479 JSContext *cx;
1480 BytecodePattern* pattern;
1481 unsigned* output;
1482 InputStream input;
1483 BumpPointerPool* allocatorPool;
1484 unsigned remainingMatchCount;
1485 };
1489 class ByteCompiler {
1490 struct ParenthesesStackEntry {
1491 unsigned beginTerm;
1492 unsigned savedAlternativeIndex;
1494 // For js::Vector. Does not create a valid object.
1495 ParenthesesStackEntry() { }
1497 ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1498 : beginTerm(beginTerm)
1499 , savedAlternativeIndex(savedAlternativeIndex)
1500 {
1501 }
1502 };
1504 public:
1505 ByteCompiler(YarrPattern& pattern)
1506 : m_pattern(pattern)
1507 {
1508 m_currentAlternativeIndex = 0;
1509 }
1511 PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1512 {
1513 regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1514 emitDisjunction(m_pattern.m_body);
1515 regexEnd();
1517 return adoptPtr(newOrCrash<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator));
1518 }
1520 void checkInput(unsigned count)
1521 {
1522 m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1523 }
1525 void uncheckInput(unsigned count)
1526 {
1527 m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1528 }
1530 void assertionBOL(unsigned inputPosition)
1531 {
1532 m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1533 }
1535 void assertionEOL(unsigned inputPosition)
1536 {
1537 m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1538 }
1540 void assertionWordBoundary(bool invert, unsigned inputPosition)
1541 {
1542 m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1543 }
1545 void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1546 {
1547 if (m_pattern.m_ignoreCase) {
1548 UChar lo = Unicode::toLower(ch);
1549 UChar hi = Unicode::toUpper(ch);
1551 if (lo != hi) {
1552 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1553 return;
1554 }
1555 }
1557 m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1558 }
1560 void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1561 {
1562 m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1564 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1565 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1566 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1567 }
1569 void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1570 {
1571 ASSERT(subpatternId);
1573 m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1575 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet();
1576 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1577 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1578 }
1580 void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1581 {
1582 int beginTerm = m_bodyDisjunction->terms.size();
1584 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1585 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1586 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1587 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1589 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1590 m_currentAlternativeIndex = beginTerm + 1;
1591 }
1593 void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1594 {
1595 int beginTerm = m_bodyDisjunction->terms.size();
1597 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1598 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1599 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1600 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1602 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1603 m_currentAlternativeIndex = beginTerm + 1;
1604 }
1606 void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1607 {
1608 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1609 // then fix this up at the end! - simplifying this should make it much clearer.
1610 // https://bugs.webkit.org/show_bug.cgi?id=50136
1612 int beginTerm = m_bodyDisjunction->terms.size();
1614 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1615 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1616 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1617 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1619 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1620 m_currentAlternativeIndex = beginTerm + 1;
1621 }
1623 void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1624 {
1625 int beginTerm = m_bodyDisjunction->terms.size();
1627 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1628 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1629 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1630 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1632 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1633 m_currentAlternativeIndex = beginTerm + 1;
1634 }
1636 void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1637 {
1638 unsigned beginTerm = popParenthesesStack();
1639 closeAlternative(beginTerm + 1);
1640 unsigned endTerm = m_bodyDisjunction->terms.size();
1642 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1644 bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1645 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1647 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1648 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1649 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1650 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1652 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1653 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1654 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1655 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1656 }
1658 void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1659 {
1660 m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored));
1661 }
1663 unsigned popParenthesesStack()
1664 {
1665 ASSERT(m_parenthesesStack.size());
1666 int stackEnd = m_parenthesesStack.size() - 1;
1667 unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1668 m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1669 m_parenthesesStack.shrink(stackEnd);
1671 ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1672 ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1674 return beginTerm;
1675 }
1677 #ifndef NDEBUG
1678 void dumpDisjunction(ByteDisjunction* disjunction)
1679 {
1680 dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction);
1681 for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1682 dataLogF("{ %d } ", disjunction->terms[i].type);
1683 dataLogF("\n");
1684 }
1685 #endif
1687 void closeAlternative(int beginTerm)
1688 {
1689 int origBeginTerm = beginTerm;
1690 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1691 int endIndex = m_bodyDisjunction->terms.size();
1693 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1695 if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1696 m_bodyDisjunction->terms.remove(beginTerm);
1697 else {
1698 while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1699 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1700 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1701 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1702 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1703 }
1705 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1707 m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1708 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1709 }
1710 }
1712 void closeBodyAlternative()
1713 {
1714 int beginTerm = 0;
1715 int origBeginTerm = 0;
1716 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1717 int endIndex = m_bodyDisjunction->terms.size();
1719 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1721 while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1722 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1723 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1724 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1725 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1726 }
1728 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1730 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1731 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1732 }
1734 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1735 {
1736 unsigned beginTerm = popParenthesesStack();
1737 closeAlternative(beginTerm + 1);
1738 unsigned endTerm = m_bodyDisjunction->terms.size();
1740 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1742 ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1744 bool capture = parenthesesBegin.capture();
1745 unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1747 unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1748 ByteDisjunction* parenthesesDisjunction = newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize);
1750 parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1);
1751 parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1752 for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1753 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1754 parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1756 m_bodyDisjunction->terms.shrink(beginTerm);
1758 m_allParenthesesInfo.append(parenthesesDisjunction);
1759 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
1761 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1762 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1763 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1764 }
1766 void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1767 {
1768 unsigned beginTerm = popParenthesesStack();
1769 closeAlternative(beginTerm + 1);
1770 unsigned endTerm = m_bodyDisjunction->terms.size();
1772 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1774 bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1775 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1777 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1778 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1779 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1780 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1782 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1783 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1784 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1785 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1786 }
1788 void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
1789 {
1790 unsigned beginTerm = popParenthesesStack();
1791 closeAlternative(beginTerm + 1);
1792 unsigned endTerm = m_bodyDisjunction->terms.size();
1794 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1796 bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1797 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1799 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1800 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1801 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1802 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1804 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet();
1805 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1806 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet();
1807 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1808 }
1810 void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1811 {
1812 m_bodyDisjunction = adoptPtr(newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize));
1813 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1814 m_bodyDisjunction->terms[0].frameLocation = 0;
1815 m_currentAlternativeIndex = 0;
1816 }
1818 void regexEnd()
1819 {
1820 closeBodyAlternative();
1821 }
1823 void alternativeBodyDisjunction(bool onceThrough)
1824 {
1825 int newAlternativeIndex = m_bodyDisjunction->terms.size();
1826 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1827 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1829 m_currentAlternativeIndex = newAlternativeIndex;
1830 }
1832 void alternativeDisjunction()
1833 {
1834 int newAlternativeIndex = m_bodyDisjunction->terms.size();
1835 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1836 m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1838 m_currentAlternativeIndex = newAlternativeIndex;
1839 }
1841 void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
1842 {
1843 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1844 unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1846 PatternAlternative* alternative = disjunction->m_alternatives[alt];
1848 if (alt) {
1849 if (disjunction == m_pattern.m_body)
1850 alternativeBodyDisjunction(alternative->onceThrough());
1851 else
1852 alternativeDisjunction();
1853 }
1855 unsigned minimumSize = alternative->m_minimumSize;
1856 ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
1857 unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1859 if (countToCheck) {
1860 checkInput(countToCheck);
1861 currentCountAlreadyChecked += countToCheck;
1862 }
1864 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1865 PatternTerm& term = alternative->m_terms[i];
1867 switch (term.type) {
1868 case PatternTerm::TypeAssertionBOL:
1869 assertionBOL(currentCountAlreadyChecked - term.inputPosition);
1870 break;
1872 case PatternTerm::TypeAssertionEOL:
1873 assertionEOL(currentCountAlreadyChecked - term.inputPosition);
1874 break;
1876 case PatternTerm::TypeAssertionWordBoundary:
1877 assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition);
1878 break;
1880 case PatternTerm::TypePatternCharacter:
1881 atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1882 break;
1884 case PatternTerm::TypeCharacterClass:
1885 atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1886 break;
1888 case PatternTerm::TypeBackReference:
1889 atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType);
1890 break;
1892 case PatternTerm::TypeForwardReference:
1893 break;
1895 case PatternTerm::TypeParenthesesSubpattern: {
1896 unsigned disjunctionAlreadyCheckedCount = 0;
1897 if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1898 unsigned alternativeFrameLocation = term.frameLocation;
1899 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1900 if (term.quantityType == QuantifierFixedCount)
1901 disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1902 else
1903 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1904 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1905 atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation);
1906 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1907 atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1908 } else if (term.parentheses.isTerminal) {
1909 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1910 atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1911 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1912 atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1913 } else {
1914 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1915 atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0);
1916 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1917 atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1918 }
1919 break;
1920 }
1922 case PatternTerm::TypeParentheticalAssertion: {
1923 unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1925 ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1926 unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition);
1927 unsigned uncheckAmount = 0;
1928 if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
1929 uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1930 uncheckInput(uncheckAmount);
1931 currentCountAlreadyChecked -= uncheckAmount;
1932 }
1934 atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1935 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount);
1936 atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1937 if (uncheckAmount) {
1938 checkInput(uncheckAmount);
1939 currentCountAlreadyChecked += uncheckAmount;
1940 }
1941 break;
1942 }
1944 case PatternTerm::TypeDotStarEnclosure:
1945 assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor);
1946 break;
1947 }
1948 }
1949 }
1950 }
1952 private:
1953 YarrPattern& m_pattern;
1954 OwnPtr<ByteDisjunction> m_bodyDisjunction;
1955 unsigned m_currentAlternativeIndex;
1956 Vector<ParenthesesStackEntry> m_parenthesesStack;
1957 Vector<ByteDisjunction*> m_allParenthesesInfo;
1958 };
1960 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1961 {
1962 return ByteCompiler(pattern).compile(allocator);
1963 }
1965 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
1966 {
1967 #if YARR_8BIT_CHAR_SUPPORT
1968 if (input.is8Bit())
1969 return Interpreter<LChar>(cx, bytecode, output, input.characters8(), input.length(), start).interpret();
1970 #endif
1971 return Interpreter<UChar>(cx, bytecode, output, input.chars(), input.length(), start).interpret();
1972 }
1974 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
1975 {
1976 return Interpreter<LChar>(cx, bytecode, output, input, length, start).interpret();
1977 }
1979 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
1980 {
1981 return Interpreter<UChar>(cx, bytecode, output, input, length, start).interpret();
1982 }
1984 // These should be the same for both UChar & LChar.
1985 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1986 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1987 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1988 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1989 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1990 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1991 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1994 } }