|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * vim: set ts=8 sts=4 et sw=4 tw=99: |
|
3 * |
|
4 * Copyright (C) 2009 Apple Inc. All rights reserved. |
|
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged |
|
6 * |
|
7 * Redistribution and use in source and binary forms, with or without |
|
8 * modification, are permitted provided that the following conditions |
|
9 * are met: |
|
10 * 1. Redistributions of source code must retain the above copyright |
|
11 * notice, this list of conditions and the following disclaimer. |
|
12 * 2. Redistributions in binary form must reproduce the above copyright |
|
13 * notice, this list of conditions and the following disclaimer in the |
|
14 * documentation and/or other materials provided with the distribution. |
|
15 * |
|
16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
|
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
|
20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #include "yarr/YarrInterpreter.h" |
|
30 |
|
31 #include "jscntxt.h" |
|
32 |
|
33 #include "yarr/Yarr.h" |
|
34 #include "yarr/YarrCanonicalizeUCS2.h" |
|
35 #include "yarr/BumpPointerAllocator.h" |
|
36 |
|
37 using namespace WTF; |
|
38 |
|
39 namespace JSC { namespace Yarr { |
|
40 |
|
41 template<typename CharType> |
|
42 class Interpreter { |
|
43 public: |
|
44 struct ParenthesesDisjunctionContext; |
|
45 |
|
46 struct BackTrackInfoPatternCharacter { |
|
47 uintptr_t matchAmount; |
|
48 }; |
|
49 struct BackTrackInfoCharacterClass { |
|
50 uintptr_t matchAmount; |
|
51 }; |
|
52 struct BackTrackInfoBackReference { |
|
53 uintptr_t begin; // Not really needed for greedy quantifiers. |
|
54 uintptr_t matchAmount; // Not really needed for fixed quantifiers. |
|
55 }; |
|
56 struct BackTrackInfoAlternative { |
|
57 uintptr_t offset; |
|
58 }; |
|
59 struct BackTrackInfoParentheticalAssertion { |
|
60 uintptr_t begin; |
|
61 }; |
|
62 struct BackTrackInfoParenthesesOnce { |
|
63 uintptr_t begin; |
|
64 }; |
|
65 struct BackTrackInfoParenthesesTerminal { |
|
66 uintptr_t begin; |
|
67 }; |
|
68 struct BackTrackInfoParentheses { |
|
69 uintptr_t matchAmount; |
|
70 ParenthesesDisjunctionContext* lastContext; |
|
71 }; |
|
72 |
|
73 static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context) |
|
74 { |
|
75 context->next = backTrack->lastContext; |
|
76 backTrack->lastContext = context; |
|
77 ++backTrack->matchAmount; |
|
78 } |
|
79 |
|
80 static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack) |
|
81 { |
|
82 ASSERT(backTrack->matchAmount); |
|
83 ASSERT(backTrack->lastContext); |
|
84 backTrack->lastContext = backTrack->lastContext->next; |
|
85 --backTrack->matchAmount; |
|
86 } |
|
87 |
|
88 struct DisjunctionContext |
|
89 { |
|
90 DisjunctionContext() |
|
91 : term(0) |
|
92 { |
|
93 } |
|
94 |
|
95 void* operator new(size_t, void* where) |
|
96 { |
|
97 return where; |
|
98 } |
|
99 |
|
100 int term; |
|
101 unsigned matchBegin; |
|
102 unsigned matchEnd; |
|
103 uintptr_t frame[1]; |
|
104 }; |
|
105 |
|
106 DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction) |
|
107 { |
|
108 size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); |
|
109 allocatorPool = allocatorPool->ensureCapacity(size); |
|
110 if (!allocatorPool) |
|
111 CRASH(); |
|
112 return new (allocatorPool->alloc(size)) DisjunctionContext(); |
|
113 } |
|
114 |
|
115 void freeDisjunctionContext(DisjunctionContext* context) |
|
116 { |
|
117 allocatorPool = allocatorPool->dealloc(context); |
|
118 } |
|
119 |
|
120 struct ParenthesesDisjunctionContext |
|
121 { |
|
122 ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term) |
|
123 : next(0) |
|
124 { |
|
125 unsigned firstSubpatternId = term.atom.subpatternId; |
|
126 unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns; |
|
127 |
|
128 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) { |
|
129 subpatternBackup[i] = output[(firstSubpatternId << 1) + i]; |
|
130 output[(firstSubpatternId << 1) + i] = offsetNoMatch; |
|
131 } |
|
132 |
|
133 new (getDisjunctionContext(term)) DisjunctionContext(); |
|
134 } |
|
135 |
|
136 void* operator new(size_t, void* where) |
|
137 { |
|
138 return where; |
|
139 } |
|
140 |
|
141 void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns) |
|
142 { |
|
143 for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) |
|
144 output[(firstSubpatternId << 1) + i] = subpatternBackup[i]; |
|
145 } |
|
146 |
|
147 DisjunctionContext* getDisjunctionContext(ByteTerm& term) |
|
148 { |
|
149 return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1])); |
|
150 } |
|
151 |
|
152 ParenthesesDisjunctionContext* next; |
|
153 unsigned subpatternBackup[1]; |
|
154 }; |
|
155 |
|
156 ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) |
|
157 { |
|
158 size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); |
|
159 size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext)); |
|
160 allocatorPool = allocatorPool->ensureCapacity(size); |
|
161 if (!allocatorPool) |
|
162 CRASH(); |
|
163 return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); |
|
164 } |
|
165 |
|
166 void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context) |
|
167 { |
|
168 allocatorPool = allocatorPool->dealloc(context); |
|
169 } |
|
170 |
|
171 class InputStream { |
|
172 public: |
|
173 InputStream(const CharType* input, unsigned start, unsigned length) |
|
174 : input(input) |
|
175 , pos(start) |
|
176 , length(length) |
|
177 { |
|
178 } |
|
179 |
|
180 void next() |
|
181 { |
|
182 ++pos; |
|
183 } |
|
184 |
|
185 void rewind(unsigned amount) |
|
186 { |
|
187 ASSERT(pos >= amount); |
|
188 pos -= amount; |
|
189 } |
|
190 |
|
191 int read() |
|
192 { |
|
193 ASSERT(pos < length); |
|
194 if (pos < length) |
|
195 return input[pos]; |
|
196 return -1; |
|
197 } |
|
198 |
|
199 int readPair() |
|
200 { |
|
201 ASSERT(pos + 1 < length); |
|
202 return input[pos] | input[pos + 1] << 16; |
|
203 } |
|
204 |
|
205 int readChecked(unsigned negativePositionOffest) |
|
206 { |
|
207 if (pos < negativePositionOffest) |
|
208 CRASH(); |
|
209 unsigned p = pos - negativePositionOffest; |
|
210 ASSERT(p < length); |
|
211 return input[p]; |
|
212 } |
|
213 |
|
214 int reread(unsigned from) |
|
215 { |
|
216 ASSERT(from < length); |
|
217 return input[from]; |
|
218 } |
|
219 |
|
220 int prev() |
|
221 { |
|
222 ASSERT(!(pos > length)); |
|
223 if (pos && length) |
|
224 return input[pos - 1]; |
|
225 return -1; |
|
226 } |
|
227 |
|
228 unsigned getPos() |
|
229 { |
|
230 return pos; |
|
231 } |
|
232 |
|
233 void setPos(unsigned p) |
|
234 { |
|
235 pos = p; |
|
236 } |
|
237 |
|
238 bool atStart() |
|
239 { |
|
240 return pos == 0; |
|
241 } |
|
242 |
|
243 bool atEnd() |
|
244 { |
|
245 return pos == length; |
|
246 } |
|
247 |
|
248 unsigned end() |
|
249 { |
|
250 return length; |
|
251 } |
|
252 |
|
253 bool checkInput(unsigned count) |
|
254 { |
|
255 if (((pos + count) <= length) && ((pos + count) >= pos)) { |
|
256 pos += count; |
|
257 return true; |
|
258 } |
|
259 return false; |
|
260 } |
|
261 |
|
262 void uncheckInput(unsigned count) |
|
263 { |
|
264 if (pos < count) |
|
265 CRASH(); |
|
266 pos -= count; |
|
267 } |
|
268 |
|
269 bool atStart(unsigned negativePositionOffest) |
|
270 { |
|
271 return pos == negativePositionOffest; |
|
272 } |
|
273 |
|
274 bool atEnd(unsigned negativePositionOffest) |
|
275 { |
|
276 if (pos < negativePositionOffest) |
|
277 CRASH(); |
|
278 return (pos - negativePositionOffest) == length; |
|
279 } |
|
280 |
|
281 bool isAvailableInput(unsigned offset) |
|
282 { |
|
283 return (((pos + offset) <= length) && ((pos + offset) >= pos)); |
|
284 } |
|
285 |
|
286 private: |
|
287 const CharType* input; |
|
288 unsigned pos; |
|
289 unsigned length; |
|
290 }; |
|
291 |
|
292 bool testCharacterClass(CharacterClass* characterClass, int ch) |
|
293 { |
|
294 if (ch & 0xFF80) { |
|
295 for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) |
|
296 if (ch == characterClass->m_matchesUnicode[i]) |
|
297 return true; |
|
298 for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i) |
|
299 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end)) |
|
300 return true; |
|
301 } else { |
|
302 for (unsigned i = 0; i < characterClass->m_matches.size(); ++i) |
|
303 if (ch == characterClass->m_matches[i]) |
|
304 return true; |
|
305 for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i) |
|
306 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end)) |
|
307 return true; |
|
308 } |
|
309 |
|
310 return false; |
|
311 } |
|
312 |
|
313 bool checkCharacter(int testChar, unsigned negativeInputOffset) |
|
314 { |
|
315 return testChar == input.readChecked(negativeInputOffset); |
|
316 } |
|
317 |
|
318 bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset) |
|
319 { |
|
320 int ch = input.readChecked(negativeInputOffset); |
|
321 return (loChar == ch) || (hiChar == ch); |
|
322 } |
|
323 |
|
324 bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset) |
|
325 { |
|
326 bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset)); |
|
327 return invert ? !match : match; |
|
328 } |
|
329 |
|
330 bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset) |
|
331 { |
|
332 unsigned matchSize = (unsigned)(matchEnd - matchBegin); |
|
333 |
|
334 if (!input.checkInput(matchSize)) |
|
335 return false; |
|
336 |
|
337 if (pattern->m_ignoreCase) { |
|
338 for (unsigned i = 0; i < matchSize; ++i) { |
|
339 int oldCh = input.reread(matchBegin + i); |
|
340 int ch = input.readChecked(negativeInputOffset + matchSize - i); |
|
341 |
|
342 if (oldCh == ch) |
|
343 continue; |
|
344 |
|
345 // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that |
|
346 // unicode values are never allowed to match against ascii ones. |
|
347 if (isASCII(oldCh) || isASCII(ch)) { |
|
348 if (toASCIIUpper(oldCh) == toASCIIUpper(ch)) |
|
349 continue; |
|
350 } else if (areCanonicallyEquivalent(oldCh, ch)) |
|
351 continue; |
|
352 |
|
353 input.uncheckInput(matchSize); |
|
354 return false; |
|
355 } |
|
356 } else { |
|
357 for (unsigned i = 0; i < matchSize; ++i) { |
|
358 if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) { |
|
359 input.uncheckInput(matchSize); |
|
360 return false; |
|
361 } |
|
362 } |
|
363 } |
|
364 |
|
365 return true; |
|
366 } |
|
367 |
|
368 bool matchAssertionBOL(ByteTerm& term) |
|
369 { |
|
370 return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); |
|
371 } |
|
372 |
|
373 bool matchAssertionEOL(ByteTerm& term) |
|
374 { |
|
375 if (term.inputPosition) |
|
376 return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); |
|
377 |
|
378 return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read())); |
|
379 } |
|
380 |
|
381 bool matchAssertionWordBoundary(ByteTerm& term) |
|
382 { |
|
383 bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1)); |
|
384 bool readIsWordchar; |
|
385 if (term.inputPosition) |
|
386 readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition)); |
|
387 else |
|
388 readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read()); |
|
389 |
|
390 bool wordBoundary = prevIsWordchar != readIsWordchar; |
|
391 return term.invert() ? !wordBoundary : wordBoundary; |
|
392 } |
|
393 |
|
394 bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context) |
|
395 { |
|
396 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
|
397 |
|
398 switch (term.atom.quantityType) { |
|
399 case QuantifierFixedCount: |
|
400 break; |
|
401 |
|
402 case QuantifierGreedy: |
|
403 if (backTrack->matchAmount) { |
|
404 --backTrack->matchAmount; |
|
405 input.uncheckInput(1); |
|
406 return true; |
|
407 } |
|
408 break; |
|
409 |
|
410 case QuantifierNonGreedy: |
|
411 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
|
412 ++backTrack->matchAmount; |
|
413 if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1)) |
|
414 return true; |
|
415 } |
|
416 input.uncheckInput(backTrack->matchAmount); |
|
417 break; |
|
418 } |
|
419 |
|
420 return false; |
|
421 } |
|
422 |
|
423 bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context) |
|
424 { |
|
425 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
|
426 |
|
427 switch (term.atom.quantityType) { |
|
428 case QuantifierFixedCount: |
|
429 break; |
|
430 |
|
431 case QuantifierGreedy: |
|
432 if (backTrack->matchAmount) { |
|
433 --backTrack->matchAmount; |
|
434 input.uncheckInput(1); |
|
435 return true; |
|
436 } |
|
437 break; |
|
438 |
|
439 case QuantifierNonGreedy: |
|
440 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
|
441 ++backTrack->matchAmount; |
|
442 if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1)) |
|
443 return true; |
|
444 } |
|
445 input.uncheckInput(backTrack->matchAmount); |
|
446 break; |
|
447 } |
|
448 |
|
449 return false; |
|
450 } |
|
451 |
|
452 bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) |
|
453 { |
|
454 ASSERT(term.type == ByteTerm::TypeCharacterClass); |
|
455 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
|
456 |
|
457 switch (term.atom.quantityType) { |
|
458 case QuantifierFixedCount: { |
|
459 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { |
|
460 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) |
|
461 return false; |
|
462 } |
|
463 return true; |
|
464 } |
|
465 |
|
466 case QuantifierGreedy: { |
|
467 unsigned matchAmount = 0; |
|
468 while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
|
469 if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { |
|
470 input.uncheckInput(1); |
|
471 break; |
|
472 } |
|
473 ++matchAmount; |
|
474 } |
|
475 backTrack->matchAmount = matchAmount; |
|
476 |
|
477 return true; |
|
478 } |
|
479 |
|
480 case QuantifierNonGreedy: |
|
481 backTrack->matchAmount = 0; |
|
482 return true; |
|
483 } |
|
484 |
|
485 ASSERT_NOT_REACHED(); |
|
486 return false; |
|
487 } |
|
488 |
|
489 bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) |
|
490 { |
|
491 ASSERT(term.type == ByteTerm::TypeCharacterClass); |
|
492 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
|
493 |
|
494 switch (term.atom.quantityType) { |
|
495 case QuantifierFixedCount: |
|
496 break; |
|
497 |
|
498 case QuantifierGreedy: |
|
499 if (backTrack->matchAmount) { |
|
500 --backTrack->matchAmount; |
|
501 input.uncheckInput(1); |
|
502 return true; |
|
503 } |
|
504 break; |
|
505 |
|
506 case QuantifierNonGreedy: |
|
507 if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
|
508 ++backTrack->matchAmount; |
|
509 if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) |
|
510 return true; |
|
511 } |
|
512 input.uncheckInput(backTrack->matchAmount); |
|
513 break; |
|
514 } |
|
515 |
|
516 return false; |
|
517 } |
|
518 |
|
519 bool matchBackReference(ByteTerm& term, DisjunctionContext* context) |
|
520 { |
|
521 ASSERT(term.type == ByteTerm::TypeBackReference); |
|
522 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); |
|
523 |
|
524 unsigned matchBegin = output[(term.atom.subpatternId << 1)]; |
|
525 unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1]; |
|
526 |
|
527 // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that. |
|
528 // In this case the result of match is empty string like when it references to a parentheses with zero-width match. |
|
529 // Eg.: /(a\1)/ |
|
530 if (matchEnd == offsetNoMatch) |
|
531 return true; |
|
532 |
|
533 if (matchBegin == offsetNoMatch) |
|
534 return true; |
|
535 |
|
536 ASSERT(matchBegin <= matchEnd); |
|
537 |
|
538 if (matchBegin == matchEnd) |
|
539 return true; |
|
540 |
|
541 switch (term.atom.quantityType) { |
|
542 case QuantifierFixedCount: { |
|
543 backTrack->begin = input.getPos(); |
|
544 for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { |
|
545 if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { |
|
546 input.setPos(backTrack->begin); |
|
547 return false; |
|
548 } |
|
549 } |
|
550 return true; |
|
551 } |
|
552 |
|
553 case QuantifierGreedy: { |
|
554 unsigned matchAmount = 0; |
|
555 while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) |
|
556 ++matchAmount; |
|
557 backTrack->matchAmount = matchAmount; |
|
558 return true; |
|
559 } |
|
560 |
|
561 case QuantifierNonGreedy: |
|
562 backTrack->begin = input.getPos(); |
|
563 backTrack->matchAmount = 0; |
|
564 return true; |
|
565 } |
|
566 |
|
567 ASSERT_NOT_REACHED(); |
|
568 return false; |
|
569 } |
|
570 |
|
571 bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context) |
|
572 { |
|
573 ASSERT(term.type == ByteTerm::TypeBackReference); |
|
574 BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); |
|
575 |
|
576 unsigned matchBegin = output[(term.atom.subpatternId << 1)]; |
|
577 unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1]; |
|
578 |
|
579 if (matchBegin == offsetNoMatch) |
|
580 return false; |
|
581 |
|
582 ASSERT(matchBegin <= matchEnd); |
|
583 |
|
584 if (matchBegin == matchEnd) |
|
585 return false; |
|
586 |
|
587 switch (term.atom.quantityType) { |
|
588 case QuantifierFixedCount: |
|
589 // for quantityCount == 1, could rewind. |
|
590 input.setPos(backTrack->begin); |
|
591 break; |
|
592 |
|
593 case QuantifierGreedy: |
|
594 if (backTrack->matchAmount) { |
|
595 --backTrack->matchAmount; |
|
596 input.rewind(matchEnd - matchBegin); |
|
597 return true; |
|
598 } |
|
599 break; |
|
600 |
|
601 case QuantifierNonGreedy: |
|
602 if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { |
|
603 ++backTrack->matchAmount; |
|
604 return true; |
|
605 } |
|
606 input.setPos(backTrack->begin); |
|
607 break; |
|
608 } |
|
609 |
|
610 return false; |
|
611 } |
|
612 |
|
613 void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context) |
|
614 { |
|
615 if (term.capture()) { |
|
616 unsigned subpatternId = term.atom.subpatternId; |
|
617 output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; |
|
618 output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; |
|
619 } |
|
620 } |
|
621 void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) |
|
622 { |
|
623 unsigned firstSubpatternId = term.atom.subpatternId; |
|
624 unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; |
|
625 context->restoreOutput(output, firstSubpatternId, count); |
|
626 } |
|
627 JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) |
|
628 { |
|
629 while (backTrack->matchAmount) { |
|
630 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
631 |
|
632 JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true); |
|
633 if (result == JSRegExpMatch) |
|
634 return JSRegExpMatch; |
|
635 |
|
636 resetMatches(term, context); |
|
637 popParenthesesDisjunctionContext(backTrack); |
|
638 freeParenthesesDisjunctionContext(context); |
|
639 |
|
640 if (result != JSRegExpNoMatch) |
|
641 return result; |
|
642 } |
|
643 |
|
644 return JSRegExpNoMatch; |
|
645 } |
|
646 |
|
647 bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) |
|
648 { |
|
649 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
|
650 ASSERT(term.atom.quantityCount == 1); |
|
651 |
|
652 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
|
653 |
|
654 switch (term.atom.quantityType) { |
|
655 case QuantifierGreedy: { |
|
656 // set this speculatively; if we get to the parens end this will be true. |
|
657 backTrack->begin = input.getPos(); |
|
658 break; |
|
659 } |
|
660 case QuantifierNonGreedy: { |
|
661 backTrack->begin = notFound; |
|
662 context->term += term.atom.parenthesesWidth; |
|
663 return true; |
|
664 } |
|
665 case QuantifierFixedCount: |
|
666 break; |
|
667 } |
|
668 |
|
669 if (term.capture()) { |
|
670 unsigned subpatternId = term.atom.subpatternId; |
|
671 output[(subpatternId << 1)] = input.getPos() - term.inputPosition; |
|
672 } |
|
673 |
|
674 return true; |
|
675 } |
|
676 |
|
677 bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) |
|
678 { |
|
679 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); |
|
680 ASSERT(term.atom.quantityCount == 1); |
|
681 |
|
682 if (term.capture()) { |
|
683 unsigned subpatternId = term.atom.subpatternId; |
|
684 output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; |
|
685 } |
|
686 |
|
687 if (term.atom.quantityType == QuantifierFixedCount) |
|
688 return true; |
|
689 |
|
690 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
|
691 return backTrack->begin != input.getPos(); |
|
692 } |
|
693 |
|
694 bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) |
|
695 { |
|
696 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
|
697 ASSERT(term.atom.quantityCount == 1); |
|
698 |
|
699 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
|
700 |
|
701 if (term.capture()) { |
|
702 unsigned subpatternId = term.atom.subpatternId; |
|
703 output[(subpatternId << 1)] = offsetNoMatch; |
|
704 output[(subpatternId << 1) + 1] = offsetNoMatch; |
|
705 } |
|
706 |
|
707 switch (term.atom.quantityType) { |
|
708 case QuantifierGreedy: |
|
709 // if we backtrack to this point, there is another chance - try matching nothing. |
|
710 ASSERT(backTrack->begin != notFound); |
|
711 backTrack->begin = notFound; |
|
712 context->term += term.atom.parenthesesWidth; |
|
713 return true; |
|
714 case QuantifierNonGreedy: |
|
715 ASSERT(backTrack->begin != notFound); |
|
716 case QuantifierFixedCount: |
|
717 break; |
|
718 } |
|
719 |
|
720 return false; |
|
721 } |
|
722 |
|
723 bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) |
|
724 { |
|
725 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); |
|
726 ASSERT(term.atom.quantityCount == 1); |
|
727 |
|
728 BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
|
729 |
|
730 switch (term.atom.quantityType) { |
|
731 case QuantifierGreedy: |
|
732 if (backTrack->begin == notFound) { |
|
733 context->term -= term.atom.parenthesesWidth; |
|
734 return false; |
|
735 } |
|
736 case QuantifierNonGreedy: |
|
737 if (backTrack->begin == notFound) { |
|
738 backTrack->begin = input.getPos(); |
|
739 if (term.capture()) { |
|
740 // Technically this access to inputPosition should be accessing the begin term's |
|
741 // inputPosition, but for repeats other than fixed these values should be |
|
742 // the same anyway! (We don't pre-check for greedy or non-greedy matches.) |
|
743 ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
|
744 |
|
745 // Disabled, see bug 808478 |
|
746 #if 0 |
|
747 ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition); |
|
748 #endif |
|
749 unsigned subpatternId = term.atom.subpatternId; |
|
750 output[subpatternId << 1] = input.getPos() + term.inputPosition; |
|
751 } |
|
752 context->term -= term.atom.parenthesesWidth; |
|
753 return true; |
|
754 } |
|
755 case QuantifierFixedCount: |
|
756 break; |
|
757 } |
|
758 |
|
759 return false; |
|
760 } |
|
761 |
|
762 bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) |
|
763 { |
|
764 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); |
|
765 ASSERT(term.atom.quantityType == QuantifierGreedy); |
|
766 ASSERT(term.atom.quantityCount == quantifyInfinite); |
|
767 ASSERT(!term.capture()); |
|
768 |
|
769 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); |
|
770 backTrack->begin = input.getPos(); |
|
771 return true; |
|
772 } |
|
773 |
|
774 bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context) |
|
775 { |
|
776 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd); |
|
777 |
|
778 BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); |
|
779 // Empty match is a failed match. |
|
780 if (backTrack->begin == input.getPos()) |
|
781 return false; |
|
782 |
|
783 // Successful match! Okay, what's next? - loop around and try to match moar! |
|
784 context->term -= (term.atom.parenthesesWidth + 1); |
|
785 return true; |
|
786 } |
|
787 |
|
788 bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) |
|
789 { |
|
790 ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); |
|
791 ASSERT(term.atom.quantityType == QuantifierGreedy); |
|
792 ASSERT(term.atom.quantityCount == quantifyInfinite); |
|
793 ASSERT(!term.capture()); |
|
794 |
|
795 // If we backtrack to this point, we have failed to match this iteration of the parens. |
|
796 // Since this is greedy / zero minimum a failed is also accepted as a match! |
|
797 context->term += term.atom.parenthesesWidth; |
|
798 return true; |
|
799 } |
|
800 |
|
801 bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*) |
|
802 { |
|
803 // 'Terminal' parentheses are at the end of the regex, and as such a match past end |
|
804 // should always be returned as a successful match - we should never backtrack to here. |
|
805 ASSERT_NOT_REACHED(); |
|
806 return false; |
|
807 } |
|
808 |
|
809 bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) |
|
810 { |
|
811 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); |
|
812 ASSERT(term.atom.quantityCount == 1); |
|
813 |
|
814 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
|
815 |
|
816 backTrack->begin = input.getPos(); |
|
817 return true; |
|
818 } |
|
819 |
|
820 bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) |
|
821 { |
|
822 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); |
|
823 ASSERT(term.atom.quantityCount == 1); |
|
824 |
|
825 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
|
826 |
|
827 input.setPos(backTrack->begin); |
|
828 |
|
829 // We've reached the end of the parens; if they are inverted, this is failure. |
|
830 if (term.invert()) { |
|
831 context->term -= term.atom.parenthesesWidth; |
|
832 return false; |
|
833 } |
|
834 |
|
835 return true; |
|
836 } |
|
837 |
|
838 bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) |
|
839 { |
|
840 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); |
|
841 ASSERT(term.atom.quantityCount == 1); |
|
842 |
|
843 // We've failed to match parens; if they are inverted, this is win! |
|
844 if (term.invert()) { |
|
845 context->term += term.atom.parenthesesWidth; |
|
846 return true; |
|
847 } |
|
848 |
|
849 return false; |
|
850 } |
|
851 |
|
852 bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) |
|
853 { |
|
854 ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); |
|
855 ASSERT(term.atom.quantityCount == 1); |
|
856 |
|
857 BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
|
858 |
|
859 input.setPos(backTrack->begin); |
|
860 |
|
861 context->term -= term.atom.parenthesesWidth; |
|
862 return false; |
|
863 } |
|
864 |
|
865 JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context) |
|
866 { |
|
867 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); |
|
868 |
|
869 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); |
|
870 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; |
|
871 |
|
872 backTrack->matchAmount = 0; |
|
873 backTrack->lastContext = 0; |
|
874 |
|
875 switch (term.atom.quantityType) { |
|
876 case QuantifierFixedCount: { |
|
877 // While we haven't yet reached our fixed limit, |
|
878 while (backTrack->matchAmount < term.atom.quantityCount) { |
|
879 // Try to do a match, and it it succeeds, add it to the list. |
|
880 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
881 JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
|
882 if (result == JSRegExpMatch) |
|
883 appendParenthesesDisjunctionContext(backTrack, context); |
|
884 else { |
|
885 // The match failed; try to find an alternate point to carry on from. |
|
886 resetMatches(term, context); |
|
887 freeParenthesesDisjunctionContext(context); |
|
888 |
|
889 if (result != JSRegExpNoMatch) |
|
890 return result; |
|
891 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); |
|
892 if (backtrackResult != JSRegExpMatch) |
|
893 return backtrackResult; |
|
894 } |
|
895 } |
|
896 |
|
897 ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
|
898 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
899 recordParenthesesMatch(term, context); |
|
900 return JSRegExpMatch; |
|
901 } |
|
902 |
|
903 case QuantifierGreedy: { |
|
904 while (backTrack->matchAmount < term.atom.quantityCount) { |
|
905 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
906 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
|
907 if (result == JSRegExpMatch) |
|
908 appendParenthesesDisjunctionContext(backTrack, context); |
|
909 else { |
|
910 resetMatches(term, context); |
|
911 freeParenthesesDisjunctionContext(context); |
|
912 |
|
913 if (result != JSRegExpNoMatch) |
|
914 return result; |
|
915 |
|
916 break; |
|
917 } |
|
918 } |
|
919 |
|
920 if (backTrack->matchAmount) { |
|
921 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
922 recordParenthesesMatch(term, context); |
|
923 } |
|
924 return JSRegExpMatch; |
|
925 } |
|
926 |
|
927 case QuantifierNonGreedy: |
|
928 return JSRegExpMatch; |
|
929 } |
|
930 |
|
931 ASSERT_NOT_REACHED(); |
|
932 return JSRegExpErrorNoMatch; |
|
933 } |
|
934 |
|
935 // Rules for backtracking differ depending on whether this is greedy or non-greedy. |
|
936 // |
|
937 // Greedy matches never should try just adding more - you should already have done |
|
938 // the 'more' cases. Always backtrack, at least a leetle bit. However cases where |
|
939 // you backtrack an item off the list needs checking, since we'll never have matched |
|
940 // the one less case. Tracking forwards, still add as much as possible. |
|
941 // |
|
942 // Non-greedy, we've already done the one less case, so don't match on popping. |
|
943 // We haven't done the one more case, so always try to add that. |
|
944 // |
|
945 JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context) |
|
946 { |
|
947 ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); |
|
948 |
|
949 BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); |
|
950 ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; |
|
951 |
|
952 switch (term.atom.quantityType) { |
|
953 case QuantifierFixedCount: { |
|
954 ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
|
955 |
|
956 ParenthesesDisjunctionContext* context = 0; |
|
957 JSRegExpResult result = parenthesesDoBacktrack(term, backTrack); |
|
958 |
|
959 if (result != JSRegExpMatch) |
|
960 return result; |
|
961 |
|
962 // While we haven't yet reached our fixed limit, |
|
963 while (backTrack->matchAmount < term.atom.quantityCount) { |
|
964 // Try to do a match, and it it succeeds, add it to the list. |
|
965 context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
966 result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
|
967 |
|
968 if (result == JSRegExpMatch) |
|
969 appendParenthesesDisjunctionContext(backTrack, context); |
|
970 else { |
|
971 // The match failed; try to find an alternate point to carry on from. |
|
972 resetMatches(term, context); |
|
973 freeParenthesesDisjunctionContext(context); |
|
974 |
|
975 if (result != JSRegExpNoMatch) |
|
976 return result; |
|
977 JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); |
|
978 if (backtrackResult != JSRegExpMatch) |
|
979 return backtrackResult; |
|
980 } |
|
981 } |
|
982 |
|
983 ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
|
984 context = backTrack->lastContext; |
|
985 recordParenthesesMatch(term, context); |
|
986 return JSRegExpMatch; |
|
987 } |
|
988 |
|
989 case QuantifierGreedy: { |
|
990 if (!backTrack->matchAmount) |
|
991 return JSRegExpNoMatch; |
|
992 |
|
993 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
994 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); |
|
995 if (result == JSRegExpMatch) { |
|
996 while (backTrack->matchAmount < term.atom.quantityCount) { |
|
997 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
998 JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
|
999 if (parenthesesResult == JSRegExpMatch) |
|
1000 appendParenthesesDisjunctionContext(backTrack, context); |
|
1001 else { |
|
1002 resetMatches(term, context); |
|
1003 freeParenthesesDisjunctionContext(context); |
|
1004 |
|
1005 if (parenthesesResult != JSRegExpNoMatch) |
|
1006 return parenthesesResult; |
|
1007 |
|
1008 break; |
|
1009 } |
|
1010 } |
|
1011 } else { |
|
1012 // Avoid a topcrash before it occurs. |
|
1013 if (!backTrack->lastContext) { |
|
1014 ASSERT(!"Tripped Bug 856796!"); |
|
1015 return JSRegExpErrorInternal; |
|
1016 } |
|
1017 |
|
1018 resetMatches(term, context); |
|
1019 popParenthesesDisjunctionContext(backTrack); |
|
1020 freeParenthesesDisjunctionContext(context); |
|
1021 |
|
1022 if (result != JSRegExpNoMatch) |
|
1023 return result; |
|
1024 } |
|
1025 |
|
1026 if (backTrack->matchAmount) { |
|
1027 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
1028 recordParenthesesMatch(term, context); |
|
1029 } |
|
1030 return JSRegExpMatch; |
|
1031 } |
|
1032 |
|
1033 case QuantifierNonGreedy: { |
|
1034 // If we've not reached the limit, try to add one more match. |
|
1035 if (backTrack->matchAmount < term.atom.quantityCount) { |
|
1036 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
|
1037 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
|
1038 if (result == JSRegExpMatch) { |
|
1039 appendParenthesesDisjunctionContext(backTrack, context); |
|
1040 recordParenthesesMatch(term, context); |
|
1041 return JSRegExpMatch; |
|
1042 } |
|
1043 |
|
1044 resetMatches(term, context); |
|
1045 freeParenthesesDisjunctionContext(context); |
|
1046 |
|
1047 if (result != JSRegExpNoMatch) |
|
1048 return result; |
|
1049 } |
|
1050 |
|
1051 // Nope - okay backtrack looking for an alternative. |
|
1052 while (backTrack->matchAmount) { |
|
1053 ParenthesesDisjunctionContext* context = backTrack->lastContext; |
|
1054 JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); |
|
1055 if (result == JSRegExpMatch) { |
|
1056 // successful backtrack! we're back in the game! |
|
1057 if (backTrack->matchAmount) { |
|
1058 context = backTrack->lastContext; |
|
1059 recordParenthesesMatch(term, context); |
|
1060 } |
|
1061 return JSRegExpMatch; |
|
1062 } |
|
1063 |
|
1064 // Avoid a topcrash before it occurs. |
|
1065 if (!backTrack->lastContext) { |
|
1066 ASSERT(!"Tripped Bug 856796!"); |
|
1067 return JSRegExpErrorInternal; |
|
1068 } |
|
1069 |
|
1070 // pop a match off the stack |
|
1071 resetMatches(term, context); |
|
1072 popParenthesesDisjunctionContext(backTrack); |
|
1073 freeParenthesesDisjunctionContext(context); |
|
1074 |
|
1075 if (result != JSRegExpNoMatch) |
|
1076 return result; |
|
1077 } |
|
1078 |
|
1079 return JSRegExpNoMatch; |
|
1080 } |
|
1081 } |
|
1082 |
|
1083 ASSERT_NOT_REACHED(); |
|
1084 return JSRegExpErrorNoMatch; |
|
1085 } |
|
1086 |
|
1087 bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context) |
|
1088 { |
|
1089 UNUSED_PARAM(term); |
|
1090 unsigned matchBegin = context->matchBegin; |
|
1091 |
|
1092 if (matchBegin) { |
|
1093 for (matchBegin--; true; matchBegin--) { |
|
1094 if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) { |
|
1095 ++matchBegin; |
|
1096 break; |
|
1097 } |
|
1098 |
|
1099 if (!matchBegin) |
|
1100 break; |
|
1101 } |
|
1102 } |
|
1103 |
|
1104 unsigned matchEnd = input.getPos(); |
|
1105 |
|
1106 for (; (matchEnd != input.end()) |
|
1107 && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { } |
|
1108 |
|
1109 if (((matchBegin && term.anchors.m_bol) |
|
1110 || ((matchEnd != input.end()) && term.anchors.m_eol)) |
|
1111 && !pattern->m_multiline) |
|
1112 return false; |
|
1113 |
|
1114 context->matchBegin = matchBegin; |
|
1115 context->matchEnd = matchEnd; |
|
1116 return true; |
|
1117 } |
|
1118 |
|
1119 #define MATCH_NEXT() { ++context->term; goto matchAgain; } |
|
1120 #define BACKTRACK() { --context->term; goto backtrack; } |
|
1121 #define currentTerm() (disjunction->terms[context->term]) |
|
1122 JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) |
|
1123 { |
|
1124 if (!--remainingMatchCount) |
|
1125 return JSRegExpErrorHitLimit; |
|
1126 |
|
1127 if (btrack) |
|
1128 BACKTRACK(); |
|
1129 |
|
1130 context->matchBegin = input.getPos(); |
|
1131 context->term = 0; |
|
1132 |
|
1133 matchAgain: |
|
1134 ASSERT(context->term < static_cast<int>(disjunction->terms.size())); |
|
1135 |
|
1136 // Prevent jank resulting from getting stuck in Yarr for a long time. |
|
1137 if (!CheckForInterrupt(this->cx)) |
|
1138 return JSRegExpErrorInternal; |
|
1139 |
|
1140 switch (currentTerm().type) { |
|
1141 case ByteTerm::TypeSubpatternBegin: |
|
1142 MATCH_NEXT(); |
|
1143 case ByteTerm::TypeSubpatternEnd: |
|
1144 context->matchEnd = input.getPos(); |
|
1145 return JSRegExpMatch; |
|
1146 |
|
1147 case ByteTerm::TypeBodyAlternativeBegin: |
|
1148 MATCH_NEXT(); |
|
1149 case ByteTerm::TypeBodyAlternativeDisjunction: |
|
1150 case ByteTerm::TypeBodyAlternativeEnd: |
|
1151 context->matchEnd = input.getPos(); |
|
1152 return JSRegExpMatch; |
|
1153 |
|
1154 case ByteTerm::TypeAlternativeBegin: |
|
1155 MATCH_NEXT(); |
|
1156 case ByteTerm::TypeAlternativeDisjunction: |
|
1157 case ByteTerm::TypeAlternativeEnd: { |
|
1158 int offset = currentTerm().alternative.end; |
|
1159 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); |
|
1160 backTrack->offset = offset; |
|
1161 context->term += offset; |
|
1162 MATCH_NEXT(); |
|
1163 } |
|
1164 |
|
1165 case ByteTerm::TypeAssertionBOL: |
|
1166 if (matchAssertionBOL(currentTerm())) |
|
1167 MATCH_NEXT(); |
|
1168 BACKTRACK(); |
|
1169 case ByteTerm::TypeAssertionEOL: |
|
1170 if (matchAssertionEOL(currentTerm())) |
|
1171 MATCH_NEXT(); |
|
1172 BACKTRACK(); |
|
1173 case ByteTerm::TypeAssertionWordBoundary: |
|
1174 if (matchAssertionWordBoundary(currentTerm())) |
|
1175 MATCH_NEXT(); |
|
1176 BACKTRACK(); |
|
1177 |
|
1178 case ByteTerm::TypePatternCharacterOnce: |
|
1179 case ByteTerm::TypePatternCharacterFixed: { |
|
1180 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { |
|
1181 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) |
|
1182 BACKTRACK(); |
|
1183 } |
|
1184 MATCH_NEXT(); |
|
1185 } |
|
1186 case ByteTerm::TypePatternCharacterGreedy: { |
|
1187 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
|
1188 unsigned matchAmount = 0; |
|
1189 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { |
|
1190 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) { |
|
1191 input.uncheckInput(1); |
|
1192 break; |
|
1193 } |
|
1194 ++matchAmount; |
|
1195 } |
|
1196 backTrack->matchAmount = matchAmount; |
|
1197 |
|
1198 MATCH_NEXT(); |
|
1199 } |
|
1200 case ByteTerm::TypePatternCharacterNonGreedy: { |
|
1201 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
|
1202 backTrack->matchAmount = 0; |
|
1203 MATCH_NEXT(); |
|
1204 } |
|
1205 |
|
1206 case ByteTerm::TypePatternCasedCharacterOnce: |
|
1207 case ByteTerm::TypePatternCasedCharacterFixed: { |
|
1208 for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { |
|
1209 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) |
|
1210 BACKTRACK(); |
|
1211 } |
|
1212 MATCH_NEXT(); |
|
1213 } |
|
1214 case ByteTerm::TypePatternCasedCharacterGreedy: { |
|
1215 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
|
1216 unsigned matchAmount = 0; |
|
1217 while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { |
|
1218 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) { |
|
1219 input.uncheckInput(1); |
|
1220 break; |
|
1221 } |
|
1222 ++matchAmount; |
|
1223 } |
|
1224 backTrack->matchAmount = matchAmount; |
|
1225 |
|
1226 MATCH_NEXT(); |
|
1227 } |
|
1228 case ByteTerm::TypePatternCasedCharacterNonGreedy: { |
|
1229 BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
|
1230 backTrack->matchAmount = 0; |
|
1231 MATCH_NEXT(); |
|
1232 } |
|
1233 |
|
1234 case ByteTerm::TypeCharacterClass: |
|
1235 if (matchCharacterClass(currentTerm(), context)) |
|
1236 MATCH_NEXT(); |
|
1237 BACKTRACK(); |
|
1238 case ByteTerm::TypeBackReference: |
|
1239 if (matchBackReference(currentTerm(), context)) |
|
1240 MATCH_NEXT(); |
|
1241 BACKTRACK(); |
|
1242 case ByteTerm::TypeParenthesesSubpattern: { |
|
1243 JSRegExpResult result = matchParentheses(currentTerm(), context); |
|
1244 |
|
1245 if (result == JSRegExpMatch) { |
|
1246 MATCH_NEXT(); |
|
1247 } else if (result != JSRegExpNoMatch) |
|
1248 return result; |
|
1249 |
|
1250 BACKTRACK(); |
|
1251 } |
|
1252 case ByteTerm::TypeParenthesesSubpatternOnceBegin: |
|
1253 if (matchParenthesesOnceBegin(currentTerm(), context)) |
|
1254 MATCH_NEXT(); |
|
1255 BACKTRACK(); |
|
1256 case ByteTerm::TypeParenthesesSubpatternOnceEnd: |
|
1257 if (matchParenthesesOnceEnd(currentTerm(), context)) |
|
1258 MATCH_NEXT(); |
|
1259 BACKTRACK(); |
|
1260 case ByteTerm::TypeParenthesesSubpatternTerminalBegin: |
|
1261 if (matchParenthesesTerminalBegin(currentTerm(), context)) |
|
1262 MATCH_NEXT(); |
|
1263 BACKTRACK(); |
|
1264 case ByteTerm::TypeParenthesesSubpatternTerminalEnd: |
|
1265 if (matchParenthesesTerminalEnd(currentTerm(), context)) |
|
1266 MATCH_NEXT(); |
|
1267 BACKTRACK(); |
|
1268 case ByteTerm::TypeParentheticalAssertionBegin: |
|
1269 if (matchParentheticalAssertionBegin(currentTerm(), context)) |
|
1270 MATCH_NEXT(); |
|
1271 BACKTRACK(); |
|
1272 case ByteTerm::TypeParentheticalAssertionEnd: |
|
1273 if (matchParentheticalAssertionEnd(currentTerm(), context)) |
|
1274 MATCH_NEXT(); |
|
1275 BACKTRACK(); |
|
1276 |
|
1277 case ByteTerm::TypeCheckInput: |
|
1278 if (input.checkInput(currentTerm().checkInputCount)) |
|
1279 MATCH_NEXT(); |
|
1280 BACKTRACK(); |
|
1281 |
|
1282 case ByteTerm::TypeUncheckInput: |
|
1283 input.uncheckInput(currentTerm().checkInputCount); |
|
1284 MATCH_NEXT(); |
|
1285 |
|
1286 case ByteTerm::TypeDotStarEnclosure: |
|
1287 if (matchDotStarEnclosure(currentTerm(), context)) |
|
1288 return JSRegExpMatch; |
|
1289 BACKTRACK(); |
|
1290 } |
|
1291 |
|
1292 // We should never fall-through to here. |
|
1293 ASSERT_NOT_REACHED(); |
|
1294 |
|
1295 backtrack: |
|
1296 ASSERT(context->term < static_cast<int>(disjunction->terms.size())); |
|
1297 |
|
1298 // Prevent jank resulting from getting stuck in Yarr for a long time. |
|
1299 if (!CheckForInterrupt(this->cx)) |
|
1300 return JSRegExpErrorInternal; |
|
1301 |
|
1302 switch (currentTerm().type) { |
|
1303 case ByteTerm::TypeSubpatternBegin: |
|
1304 return JSRegExpNoMatch; |
|
1305 case ByteTerm::TypeSubpatternEnd: |
|
1306 ASSERT_NOT_REACHED(); |
|
1307 |
|
1308 case ByteTerm::TypeBodyAlternativeBegin: |
|
1309 case ByteTerm::TypeBodyAlternativeDisjunction: { |
|
1310 int offset = currentTerm().alternative.next; |
|
1311 context->term += offset; |
|
1312 if (offset > 0) |
|
1313 MATCH_NEXT(); |
|
1314 |
|
1315 if (input.atEnd()) |
|
1316 return JSRegExpNoMatch; |
|
1317 |
|
1318 input.next(); |
|
1319 |
|
1320 context->matchBegin = input.getPos(); |
|
1321 |
|
1322 if (currentTerm().alternative.onceThrough) |
|
1323 context->term += currentTerm().alternative.next; |
|
1324 |
|
1325 MATCH_NEXT(); |
|
1326 } |
|
1327 case ByteTerm::TypeBodyAlternativeEnd: |
|
1328 ASSERT_NOT_REACHED(); |
|
1329 |
|
1330 case ByteTerm::TypeAlternativeBegin: |
|
1331 case ByteTerm::TypeAlternativeDisjunction: { |
|
1332 int offset = currentTerm().alternative.next; |
|
1333 context->term += offset; |
|
1334 if (offset > 0) |
|
1335 MATCH_NEXT(); |
|
1336 BACKTRACK(); |
|
1337 } |
|
1338 case ByteTerm::TypeAlternativeEnd: { |
|
1339 // We should never backtrack back into an alternative of the main body of the regex. |
|
1340 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); |
|
1341 unsigned offset = backTrack->offset; |
|
1342 context->term -= offset; |
|
1343 BACKTRACK(); |
|
1344 } |
|
1345 |
|
1346 case ByteTerm::TypeAssertionBOL: |
|
1347 case ByteTerm::TypeAssertionEOL: |
|
1348 case ByteTerm::TypeAssertionWordBoundary: |
|
1349 BACKTRACK(); |
|
1350 |
|
1351 case ByteTerm::TypePatternCharacterOnce: |
|
1352 case ByteTerm::TypePatternCharacterFixed: |
|
1353 case ByteTerm::TypePatternCharacterGreedy: |
|
1354 case ByteTerm::TypePatternCharacterNonGreedy: |
|
1355 if (backtrackPatternCharacter(currentTerm(), context)) |
|
1356 MATCH_NEXT(); |
|
1357 BACKTRACK(); |
|
1358 case ByteTerm::TypePatternCasedCharacterOnce: |
|
1359 case ByteTerm::TypePatternCasedCharacterFixed: |
|
1360 case ByteTerm::TypePatternCasedCharacterGreedy: |
|
1361 case ByteTerm::TypePatternCasedCharacterNonGreedy: |
|
1362 if (backtrackPatternCasedCharacter(currentTerm(), context)) |
|
1363 MATCH_NEXT(); |
|
1364 BACKTRACK(); |
|
1365 case ByteTerm::TypeCharacterClass: |
|
1366 if (backtrackCharacterClass(currentTerm(), context)) |
|
1367 MATCH_NEXT(); |
|
1368 BACKTRACK(); |
|
1369 case ByteTerm::TypeBackReference: |
|
1370 if (backtrackBackReference(currentTerm(), context)) |
|
1371 MATCH_NEXT(); |
|
1372 BACKTRACK(); |
|
1373 case ByteTerm::TypeParenthesesSubpattern: { |
|
1374 JSRegExpResult result = backtrackParentheses(currentTerm(), context); |
|
1375 |
|
1376 if (result == JSRegExpMatch) { |
|
1377 MATCH_NEXT(); |
|
1378 } else if (result != JSRegExpNoMatch) |
|
1379 return result; |
|
1380 |
|
1381 BACKTRACK(); |
|
1382 } |
|
1383 case ByteTerm::TypeParenthesesSubpatternOnceBegin: |
|
1384 if (backtrackParenthesesOnceBegin(currentTerm(), context)) |
|
1385 MATCH_NEXT(); |
|
1386 BACKTRACK(); |
|
1387 case ByteTerm::TypeParenthesesSubpatternOnceEnd: |
|
1388 if (backtrackParenthesesOnceEnd(currentTerm(), context)) |
|
1389 MATCH_NEXT(); |
|
1390 BACKTRACK(); |
|
1391 case ByteTerm::TypeParenthesesSubpatternTerminalBegin: |
|
1392 if (backtrackParenthesesTerminalBegin(currentTerm(), context)) |
|
1393 MATCH_NEXT(); |
|
1394 BACKTRACK(); |
|
1395 case ByteTerm::TypeParenthesesSubpatternTerminalEnd: |
|
1396 if (backtrackParenthesesTerminalEnd(currentTerm(), context)) |
|
1397 MATCH_NEXT(); |
|
1398 BACKTRACK(); |
|
1399 case ByteTerm::TypeParentheticalAssertionBegin: |
|
1400 if (backtrackParentheticalAssertionBegin(currentTerm(), context)) |
|
1401 MATCH_NEXT(); |
|
1402 BACKTRACK(); |
|
1403 case ByteTerm::TypeParentheticalAssertionEnd: |
|
1404 if (backtrackParentheticalAssertionEnd(currentTerm(), context)) |
|
1405 MATCH_NEXT(); |
|
1406 BACKTRACK(); |
|
1407 |
|
1408 case ByteTerm::TypeCheckInput: |
|
1409 input.uncheckInput(currentTerm().checkInputCount); |
|
1410 BACKTRACK(); |
|
1411 |
|
1412 case ByteTerm::TypeUncheckInput: |
|
1413 input.checkInput(currentTerm().checkInputCount); |
|
1414 BACKTRACK(); |
|
1415 |
|
1416 case ByteTerm::TypeDotStarEnclosure: |
|
1417 ASSERT_NOT_REACHED(); |
|
1418 } |
|
1419 |
|
1420 ASSERT_NOT_REACHED(); |
|
1421 return JSRegExpErrorNoMatch; |
|
1422 } |
|
1423 |
|
1424 JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) |
|
1425 { |
|
1426 JSRegExpResult result = matchDisjunction(disjunction, context, btrack); |
|
1427 |
|
1428 if (result == JSRegExpMatch) { |
|
1429 while (context->matchBegin == context->matchEnd) { |
|
1430 result = matchDisjunction(disjunction, context, true); |
|
1431 if (result != JSRegExpMatch) |
|
1432 return result; |
|
1433 } |
|
1434 return JSRegExpMatch; |
|
1435 } |
|
1436 |
|
1437 return result; |
|
1438 } |
|
1439 |
|
1440 unsigned interpret() |
|
1441 { |
|
1442 if (!input.isAvailableInput(0)) |
|
1443 return offsetNoMatch; |
|
1444 |
|
1445 for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i) |
|
1446 output[i << 1] = offsetNoMatch; |
|
1447 |
|
1448 allocatorPool = pattern->m_allocator->startAllocator(); |
|
1449 if (!allocatorPool) |
|
1450 CRASH(); |
|
1451 |
|
1452 DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get()); |
|
1453 |
|
1454 JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false); |
|
1455 if (result == JSRegExpMatch) { |
|
1456 output[0] = context->matchBegin; |
|
1457 output[1] = context->matchEnd; |
|
1458 } |
|
1459 |
|
1460 freeDisjunctionContext(context); |
|
1461 |
|
1462 pattern->m_allocator->stopAllocator(); |
|
1463 |
|
1464 ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch)); |
|
1465 return output[0]; |
|
1466 } |
|
1467 |
|
1468 Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start) |
|
1469 : cx(cx) |
|
1470 , pattern(pattern) |
|
1471 , output(output) |
|
1472 , input(input, start, length) |
|
1473 , allocatorPool(0) |
|
1474 , remainingMatchCount(matchLimit) |
|
1475 { |
|
1476 } |
|
1477 |
|
1478 private: |
|
1479 JSContext *cx; |
|
1480 BytecodePattern* pattern; |
|
1481 unsigned* output; |
|
1482 InputStream input; |
|
1483 BumpPointerPool* allocatorPool; |
|
1484 unsigned remainingMatchCount; |
|
1485 }; |
|
1486 |
|
1487 |
|
1488 |
|
1489 class ByteCompiler { |
|
1490 struct ParenthesesStackEntry { |
|
1491 unsigned beginTerm; |
|
1492 unsigned savedAlternativeIndex; |
|
1493 |
|
1494 // For js::Vector. Does not create a valid object. |
|
1495 ParenthesesStackEntry() { } |
|
1496 |
|
1497 ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/) |
|
1498 : beginTerm(beginTerm) |
|
1499 , savedAlternativeIndex(savedAlternativeIndex) |
|
1500 { |
|
1501 } |
|
1502 }; |
|
1503 |
|
1504 public: |
|
1505 ByteCompiler(YarrPattern& pattern) |
|
1506 : m_pattern(pattern) |
|
1507 { |
|
1508 m_currentAlternativeIndex = 0; |
|
1509 } |
|
1510 |
|
1511 PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator) |
|
1512 { |
|
1513 regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); |
|
1514 emitDisjunction(m_pattern.m_body); |
|
1515 regexEnd(); |
|
1516 |
|
1517 return adoptPtr(newOrCrash<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator)); |
|
1518 } |
|
1519 |
|
1520 void checkInput(unsigned count) |
|
1521 { |
|
1522 m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count)); |
|
1523 } |
|
1524 |
|
1525 void uncheckInput(unsigned count) |
|
1526 { |
|
1527 m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count)); |
|
1528 } |
|
1529 |
|
1530 void assertionBOL(unsigned inputPosition) |
|
1531 { |
|
1532 m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition)); |
|
1533 } |
|
1534 |
|
1535 void assertionEOL(unsigned inputPosition) |
|
1536 { |
|
1537 m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition)); |
|
1538 } |
|
1539 |
|
1540 void assertionWordBoundary(bool invert, unsigned inputPosition) |
|
1541 { |
|
1542 m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition)); |
|
1543 } |
|
1544 |
|
1545 void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
|
1546 { |
|
1547 if (m_pattern.m_ignoreCase) { |
|
1548 UChar lo = Unicode::toLower(ch); |
|
1549 UChar hi = Unicode::toUpper(ch); |
|
1550 |
|
1551 if (lo != hi) { |
|
1552 m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); |
|
1553 return; |
|
1554 } |
|
1555 } |
|
1556 |
|
1557 m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); |
|
1558 } |
|
1559 |
|
1560 void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
|
1561 { |
|
1562 m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition)); |
|
1563 |
|
1564 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); |
|
1565 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; |
|
1566 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1567 } |
|
1568 |
|
1569 void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
|
1570 { |
|
1571 ASSERT(subpatternId); |
|
1572 |
|
1573 m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition)); |
|
1574 |
|
1575 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); |
|
1576 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; |
|
1577 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1578 } |
|
1579 |
|
1580 void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) |
|
1581 { |
|
1582 int beginTerm = m_bodyDisjunction->terms.size(); |
|
1583 |
|
1584 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); |
|
1585 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1586 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
|
1587 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
|
1588 |
|
1589 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
|
1590 m_currentAlternativeIndex = beginTerm + 1; |
|
1591 } |
|
1592 |
|
1593 void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) |
|
1594 { |
|
1595 int beginTerm = m_bodyDisjunction->terms.size(); |
|
1596 |
|
1597 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition)); |
|
1598 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1599 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
|
1600 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
|
1601 |
|
1602 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
|
1603 m_currentAlternativeIndex = beginTerm + 1; |
|
1604 } |
|
1605 |
|
1606 void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) |
|
1607 { |
|
1608 // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin, |
|
1609 // then fix this up at the end! - simplifying this should make it much clearer. |
|
1610 // https://bugs.webkit.org/show_bug.cgi?id=50136 |
|
1611 |
|
1612 int beginTerm = m_bodyDisjunction->terms.size(); |
|
1613 |
|
1614 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); |
|
1615 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1616 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
|
1617 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
|
1618 |
|
1619 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
|
1620 m_currentAlternativeIndex = beginTerm + 1; |
|
1621 } |
|
1622 |
|
1623 void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation) |
|
1624 { |
|
1625 int beginTerm = m_bodyDisjunction->terms.size(); |
|
1626 |
|
1627 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0)); |
|
1628 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
|
1629 m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
|
1630 m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
|
1631 |
|
1632 m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
|
1633 m_currentAlternativeIndex = beginTerm + 1; |
|
1634 } |
|
1635 |
|
1636 void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
|
1637 { |
|
1638 unsigned beginTerm = popParenthesesStack(); |
|
1639 closeAlternative(beginTerm + 1); |
|
1640 unsigned endTerm = m_bodyDisjunction->terms.size(); |
|
1641 |
|
1642 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin); |
|
1643 |
|
1644 bool invert = m_bodyDisjunction->terms[beginTerm].invert(); |
|
1645 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; |
|
1646 |
|
1647 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition)); |
|
1648 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; |
|
1649 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; |
|
1650 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; |
|
1651 |
|
1652 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); |
|
1653 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
|
1654 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); |
|
1655 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; |
|
1656 } |
|
1657 |
|
1658 void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored) |
|
1659 { |
|
1660 m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored)); |
|
1661 } |
|
1662 |
|
1663 unsigned popParenthesesStack() |
|
1664 { |
|
1665 ASSERT(m_parenthesesStack.size()); |
|
1666 int stackEnd = m_parenthesesStack.size() - 1; |
|
1667 unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm; |
|
1668 m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex; |
|
1669 m_parenthesesStack.shrink(stackEnd); |
|
1670 |
|
1671 ASSERT(beginTerm < m_bodyDisjunction->terms.size()); |
|
1672 ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size()); |
|
1673 |
|
1674 return beginTerm; |
|
1675 } |
|
1676 |
|
1677 #ifndef NDEBUG |
|
1678 void dumpDisjunction(ByteDisjunction* disjunction) |
|
1679 { |
|
1680 dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction); |
|
1681 for (unsigned i = 0; i < disjunction->terms.size(); ++i) |
|
1682 dataLogF("{ %d } ", disjunction->terms[i].type); |
|
1683 dataLogF("\n"); |
|
1684 } |
|
1685 #endif |
|
1686 |
|
1687 void closeAlternative(int beginTerm) |
|
1688 { |
|
1689 int origBeginTerm = beginTerm; |
|
1690 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin); |
|
1691 int endIndex = m_bodyDisjunction->terms.size(); |
|
1692 |
|
1693 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; |
|
1694 |
|
1695 if (!m_bodyDisjunction->terms[beginTerm].alternative.next) |
|
1696 m_bodyDisjunction->terms.remove(beginTerm); |
|
1697 else { |
|
1698 while (m_bodyDisjunction->terms[beginTerm].alternative.next) { |
|
1699 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; |
|
1700 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction); |
|
1701 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; |
|
1702 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
|
1703 } |
|
1704 |
|
1705 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; |
|
1706 |
|
1707 m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd()); |
|
1708 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; |
|
1709 } |
|
1710 } |
|
1711 |
|
1712 void closeBodyAlternative() |
|
1713 { |
|
1714 int beginTerm = 0; |
|
1715 int origBeginTerm = 0; |
|
1716 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin); |
|
1717 int endIndex = m_bodyDisjunction->terms.size(); |
|
1718 |
|
1719 unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; |
|
1720 |
|
1721 while (m_bodyDisjunction->terms[beginTerm].alternative.next) { |
|
1722 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; |
|
1723 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction); |
|
1724 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; |
|
1725 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
|
1726 } |
|
1727 |
|
1728 m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; |
|
1729 |
|
1730 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd()); |
|
1731 m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; |
|
1732 } |
|
1733 |
|
1734 void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) |
|
1735 { |
|
1736 unsigned beginTerm = popParenthesesStack(); |
|
1737 closeAlternative(beginTerm + 1); |
|
1738 unsigned endTerm = m_bodyDisjunction->terms.size(); |
|
1739 |
|
1740 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
|
1741 |
|
1742 ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm]; |
|
1743 |
|
1744 bool capture = parenthesesBegin.capture(); |
|
1745 unsigned subpatternId = parenthesesBegin.atom.subpatternId; |
|
1746 |
|
1747 unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; |
|
1748 ByteDisjunction* parenthesesDisjunction = newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize); |
|
1749 |
|
1750 parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1); |
|
1751 parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin()); |
|
1752 for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses) |
|
1753 parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]); |
|
1754 parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd()); |
|
1755 |
|
1756 m_bodyDisjunction->terms.shrink(beginTerm); |
|
1757 |
|
1758 m_allParenthesesInfo.append(parenthesesDisjunction); |
|
1759 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition)); |
|
1760 |
|
1761 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); |
|
1762 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
|
1763 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
|
1764 } |
|
1765 |
|
1766 void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
|
1767 { |
|
1768 unsigned beginTerm = popParenthesesStack(); |
|
1769 closeAlternative(beginTerm + 1); |
|
1770 unsigned endTerm = m_bodyDisjunction->terms.size(); |
|
1771 |
|
1772 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
|
1773 |
|
1774 bool capture = m_bodyDisjunction->terms[beginTerm].capture(); |
|
1775 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; |
|
1776 |
|
1777 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition)); |
|
1778 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; |
|
1779 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; |
|
1780 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; |
|
1781 |
|
1782 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); |
|
1783 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
|
1784 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); |
|
1785 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; |
|
1786 } |
|
1787 |
|
1788 void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
|
1789 { |
|
1790 unsigned beginTerm = popParenthesesStack(); |
|
1791 closeAlternative(beginTerm + 1); |
|
1792 unsigned endTerm = m_bodyDisjunction->terms.size(); |
|
1793 |
|
1794 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); |
|
1795 |
|
1796 bool capture = m_bodyDisjunction->terms[beginTerm].capture(); |
|
1797 unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; |
|
1798 |
|
1799 m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition)); |
|
1800 m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; |
|
1801 m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; |
|
1802 m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; |
|
1803 |
|
1804 m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); |
|
1805 m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
|
1806 m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); |
|
1807 m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; |
|
1808 } |
|
1809 |
|
1810 void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) |
|
1811 { |
|
1812 m_bodyDisjunction = adoptPtr(newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize)); |
|
1813 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); |
|
1814 m_bodyDisjunction->terms[0].frameLocation = 0; |
|
1815 m_currentAlternativeIndex = 0; |
|
1816 } |
|
1817 |
|
1818 void regexEnd() |
|
1819 { |
|
1820 closeBodyAlternative(); |
|
1821 } |
|
1822 |
|
1823 void alternativeBodyDisjunction(bool onceThrough) |
|
1824 { |
|
1825 int newAlternativeIndex = m_bodyDisjunction->terms.size(); |
|
1826 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; |
|
1827 m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough)); |
|
1828 |
|
1829 m_currentAlternativeIndex = newAlternativeIndex; |
|
1830 } |
|
1831 |
|
1832 void alternativeDisjunction() |
|
1833 { |
|
1834 int newAlternativeIndex = m_bodyDisjunction->terms.size(); |
|
1835 m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; |
|
1836 m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction()); |
|
1837 |
|
1838 m_currentAlternativeIndex = newAlternativeIndex; |
|
1839 } |
|
1840 |
|
1841 void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0) |
|
1842 { |
|
1843 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { |
|
1844 unsigned currentCountAlreadyChecked = inputCountAlreadyChecked; |
|
1845 |
|
1846 PatternAlternative* alternative = disjunction->m_alternatives[alt]; |
|
1847 |
|
1848 if (alt) { |
|
1849 if (disjunction == m_pattern.m_body) |
|
1850 alternativeBodyDisjunction(alternative->onceThrough()); |
|
1851 else |
|
1852 alternativeDisjunction(); |
|
1853 } |
|
1854 |
|
1855 unsigned minimumSize = alternative->m_minimumSize; |
|
1856 ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked); |
|
1857 unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked; |
|
1858 |
|
1859 if (countToCheck) { |
|
1860 checkInput(countToCheck); |
|
1861 currentCountAlreadyChecked += countToCheck; |
|
1862 } |
|
1863 |
|
1864 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { |
|
1865 PatternTerm& term = alternative->m_terms[i]; |
|
1866 |
|
1867 switch (term.type) { |
|
1868 case PatternTerm::TypeAssertionBOL: |
|
1869 assertionBOL(currentCountAlreadyChecked - term.inputPosition); |
|
1870 break; |
|
1871 |
|
1872 case PatternTerm::TypeAssertionEOL: |
|
1873 assertionEOL(currentCountAlreadyChecked - term.inputPosition); |
|
1874 break; |
|
1875 |
|
1876 case PatternTerm::TypeAssertionWordBoundary: |
|
1877 assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition); |
|
1878 break; |
|
1879 |
|
1880 case PatternTerm::TypePatternCharacter: |
|
1881 atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); |
|
1882 break; |
|
1883 |
|
1884 case PatternTerm::TypeCharacterClass: |
|
1885 atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); |
|
1886 break; |
|
1887 |
|
1888 case PatternTerm::TypeBackReference: |
|
1889 atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); |
|
1890 break; |
|
1891 |
|
1892 case PatternTerm::TypeForwardReference: |
|
1893 break; |
|
1894 |
|
1895 case PatternTerm::TypeParenthesesSubpattern: { |
|
1896 unsigned disjunctionAlreadyCheckedCount = 0; |
|
1897 if (term.quantityCount == 1 && !term.parentheses.isCopy) { |
|
1898 unsigned alternativeFrameLocation = term.frameLocation; |
|
1899 // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame. |
|
1900 if (term.quantityType == QuantifierFixedCount) |
|
1901 disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; |
|
1902 else |
|
1903 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; |
|
1904 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
|
1905 atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation); |
|
1906 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); |
|
1907 atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); |
|
1908 } else if (term.parentheses.isTerminal) { |
|
1909 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
|
1910 atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); |
|
1911 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); |
|
1912 atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); |
|
1913 } else { |
|
1914 unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
|
1915 atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0); |
|
1916 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); |
|
1917 atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); |
|
1918 } |
|
1919 break; |
|
1920 } |
|
1921 |
|
1922 case PatternTerm::TypeParentheticalAssertion: { |
|
1923 unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; |
|
1924 |
|
1925 ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition)); |
|
1926 unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition); |
|
1927 unsigned uncheckAmount = 0; |
|
1928 if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) { |
|
1929 uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize; |
|
1930 uncheckInput(uncheckAmount); |
|
1931 currentCountAlreadyChecked -= uncheckAmount; |
|
1932 } |
|
1933 |
|
1934 atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation); |
|
1935 emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount); |
|
1936 atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType); |
|
1937 if (uncheckAmount) { |
|
1938 checkInput(uncheckAmount); |
|
1939 currentCountAlreadyChecked += uncheckAmount; |
|
1940 } |
|
1941 break; |
|
1942 } |
|
1943 |
|
1944 case PatternTerm::TypeDotStarEnclosure: |
|
1945 assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor); |
|
1946 break; |
|
1947 } |
|
1948 } |
|
1949 } |
|
1950 } |
|
1951 |
|
1952 private: |
|
1953 YarrPattern& m_pattern; |
|
1954 OwnPtr<ByteDisjunction> m_bodyDisjunction; |
|
1955 unsigned m_currentAlternativeIndex; |
|
1956 Vector<ParenthesesStackEntry> m_parenthesesStack; |
|
1957 Vector<ByteDisjunction*> m_allParenthesesInfo; |
|
1958 }; |
|
1959 |
|
1960 PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) |
|
1961 { |
|
1962 return ByteCompiler(pattern).compile(allocator); |
|
1963 } |
|
1964 |
|
1965 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output) |
|
1966 { |
|
1967 #if YARR_8BIT_CHAR_SUPPORT |
|
1968 if (input.is8Bit()) |
|
1969 return Interpreter<LChar>(cx, bytecode, output, input.characters8(), input.length(), start).interpret(); |
|
1970 #endif |
|
1971 return Interpreter<UChar>(cx, bytecode, output, input.chars(), input.length(), start).interpret(); |
|
1972 } |
|
1973 |
|
1974 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output) |
|
1975 { |
|
1976 return Interpreter<LChar>(cx, bytecode, output, input, length, start).interpret(); |
|
1977 } |
|
1978 |
|
1979 unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output) |
|
1980 { |
|
1981 return Interpreter<UChar>(cx, bytecode, output, input, length, start).interpret(); |
|
1982 } |
|
1983 |
|
1984 // These should be the same for both UChar & LChar. |
|
1985 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter); |
|
1986 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass); |
|
1987 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference); |
|
1988 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative); |
|
1989 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion); |
|
1990 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce); |
|
1991 COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses); |
|
1992 |
|
1993 |
|
1994 } } |