Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 *
4 * Copyright (C) 2009 Apple Inc. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
28 #ifndef yarr_YarrParser_h
29 #define yarr_YarrParser_h
31 #include "yarr/Yarr.h"
33 namespace JSC { namespace Yarr {
35 enum BuiltInCharacterClassID {
36 DigitClassID,
37 SpaceClassID,
38 WordClassID,
39 NewlineClassID
40 };
42 // The Parser class should not be used directly - only via the Yarr::parse() method.
43 template<class Delegate, typename CharType>
44 class Parser {
45 private:
46 template<class FriendDelegate>
47 friend ErrorCode parse(FriendDelegate&, const String& pattern, unsigned backReferenceLimit);
49 /*
50 * CharacterClassParserDelegate:
51 *
52 * The class CharacterClassParserDelegate is used in the parsing of character
53 * classes. This class handles detection of character ranges. This class
54 * implements enough of the delegate interface such that it can be passed to
55 * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
56 * to perform the parsing of escape characters in character sets.
57 */
58 class CharacterClassParserDelegate {
59 public:
60 CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
61 : m_delegate(delegate)
62 , m_err(err)
63 , m_state(Empty)
64 , m_character(0)
65 {
66 }
68 /*
69 * begin():
70 *
71 * Called at beginning of construction.
72 */
73 void begin(bool invert)
74 {
75 m_delegate.atomCharacterClassBegin(invert);
76 }
78 /*
79 * atomPatternCharacter():
80 *
81 * This method is called either from parseCharacterClass() (for an unescaped
82 * character in a character class), or from parseEscape(). In the former case
83 * the value true will be passed for the argument 'hyphenIsRange', and in this
84 * mode we will allow a hypen to be treated as indicating a range (i.e. /[a-z]/
85 * is different to /[a\-z]/).
86 */
87 void atomPatternCharacter(UChar ch, bool hyphenIsRange = false)
88 {
89 switch (m_state) {
90 case AfterCharacterClass:
91 // Following a builtin character class we need look out for a hyphen.
92 // We're looking for invalid ranges, such as /[\d-x]/ or /[\d-\d]/.
93 // If we see a hyphen following a charater class then unlike usual
94 // we'll report it to the delegate immediately, and put ourself into
95 // a poisoned state. Any following calls to add another character or
96 // character class will result in an error. (A hypen following a
97 // character-class is itself valid, but only at the end of a regex).
98 if (hyphenIsRange && ch == '-') {
99 m_delegate.atomCharacterClassAtom('-');
100 m_state = AfterCharacterClassHyphen;
101 return;
102 }
103 // Otherwise just fall through - cached character so treat this as Empty.
105 case Empty:
106 m_character = ch;
107 m_state = CachedCharacter;
108 return;
110 case CachedCharacter:
111 if (hyphenIsRange && ch == '-')
112 m_state = CachedCharacterHyphen;
113 else {
114 m_delegate.atomCharacterClassAtom(m_character);
115 m_character = ch;
116 }
117 return;
119 case CachedCharacterHyphen:
120 if (ch < m_character) {
121 m_err = CharacterClassOutOfOrder;
122 return;
123 }
124 m_delegate.atomCharacterClassRange(m_character, ch);
125 m_state = Empty;
126 return;
128 case AfterCharacterClassHyphen:
129 m_delegate.atomCharacterClassAtom(ch);
130 m_state = Empty;
131 return;
132 }
133 }
135 /*
136 * atomBuiltInCharacterClass():
137 *
138 * Adds a built-in character class, called by parseEscape().
139 */
140 void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
141 {
142 switch (m_state) {
143 case CachedCharacter:
144 // Flush the currently cached character, then fall through.
145 m_delegate.atomCharacterClassAtom(m_character);
147 case Empty:
148 case AfterCharacterClass:
149 m_state = AfterCharacterClass;
150 m_delegate.atomCharacterClassBuiltIn(classID, invert);
151 return;
153 case CachedCharacterHyphen:
154 // Error! We have a range that looks like [x-\d]. We require
155 // the end of the range to be a single character.
156 m_err = CharacterClassInvalidRange;
157 return;
158 case AfterCharacterClassHyphen:
159 m_delegate.atomCharacterClassBuiltIn(classID, invert);
160 m_state = Empty;
161 return;
162 }
163 }
165 /*
166 * end():
167 *
168 * Called at end of construction.
169 */
170 void end()
171 {
172 if (m_state == CachedCharacter)
173 m_delegate.atomCharacterClassAtom(m_character);
174 else if (m_state == CachedCharacterHyphen) {
175 m_delegate.atomCharacterClassAtom(m_character);
176 m_delegate.atomCharacterClassAtom('-');
177 }
178 m_delegate.atomCharacterClassEnd();
179 }
181 // parseEscape() should never call these delegate methods when
182 // invoked with inCharacterClass set.
183 NO_RETURN_DUE_TO_ASSERT void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
184 NO_RETURN_DUE_TO_ASSERT void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
186 private:
187 Delegate& m_delegate;
188 ErrorCode& m_err;
189 enum CharacterClassConstructionState {
190 Empty,
191 CachedCharacter,
192 CachedCharacterHyphen,
193 AfterCharacterClass,
194 AfterCharacterClassHyphen
195 } m_state;
196 UChar m_character;
197 };
199 Parser(Delegate& delegate, const String& pattern, unsigned backReferenceLimit)
200 : m_delegate(delegate)
201 , m_backReferenceLimit(backReferenceLimit)
202 , m_err(NoError)
203 , m_data(pattern.chars())
204 , m_size(pattern.length())
205 , m_index(0)
206 , m_parenthesesNestingDepth(0)
207 {
208 }
210 /*
211 * parseEscape():
212 *
213 * Helper for parseTokens() AND parseCharacterClass().
214 * Unlike the other parser methods, this function does not report tokens
215 * directly to the member delegate (m_delegate), instead tokens are
216 * emitted to the delegate provided as an argument. In the case of atom
217 * escapes, parseTokens() will call parseEscape() passing m_delegate as
218 * an argument, and as such the escape will be reported to the delegate.
219 *
220 * However this method may also be used by parseCharacterClass(), in which
221 * case a CharacterClassParserDelegate will be passed as the delegate that
222 * tokens should be added to. A boolean flag is also provided to indicate
223 * whether that an escape in a CharacterClass is being parsed (some parsing
224 * rules change in this context).
225 *
226 * The boolean value returned by this method indicates whether the token
227 * parsed was an atom (outside of a characted class \b and \B will be
228 * interpreted as assertions).
229 */
230 template<bool inCharacterClass, class EscapeDelegate>
231 bool parseEscape(EscapeDelegate& delegate)
232 {
233 ASSERT(!m_err);
234 ASSERT(peek() == '\\');
235 consume();
237 if (atEndOfPattern()) {
238 m_err = EscapeUnterminated;
239 return false;
240 }
242 switch (peek()) {
243 // Assertions
244 case 'b':
245 consume();
246 if (inCharacterClass)
247 delegate.atomPatternCharacter('\b');
248 else {
249 delegate.assertionWordBoundary(false);
250 return false;
251 }
252 break;
253 case 'B':
254 consume();
255 if (inCharacterClass)
256 delegate.atomPatternCharacter('B');
257 else {
258 delegate.assertionWordBoundary(true);
259 return false;
260 }
261 break;
263 // CharacterClassEscape
264 case 'd':
265 consume();
266 delegate.atomBuiltInCharacterClass(DigitClassID, false);
267 break;
268 case 's':
269 consume();
270 delegate.atomBuiltInCharacterClass(SpaceClassID, false);
271 break;
272 case 'w':
273 consume();
274 delegate.atomBuiltInCharacterClass(WordClassID, false);
275 break;
276 case 'D':
277 consume();
278 delegate.atomBuiltInCharacterClass(DigitClassID, true);
279 break;
280 case 'S':
281 consume();
282 delegate.atomBuiltInCharacterClass(SpaceClassID, true);
283 break;
284 case 'W':
285 consume();
286 delegate.atomBuiltInCharacterClass(WordClassID, true);
287 break;
289 // DecimalEscape
290 case '1':
291 case '2':
292 case '3':
293 case '4':
294 case '5':
295 case '6':
296 case '7':
297 case '8':
298 case '9': {
299 // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
300 // First, try to parse this as backreference.
301 if (!inCharacterClass) {
302 ParseState state = saveState();
304 unsigned backReference;
305 if (!consumeNumber(backReference))
306 break;
307 if (backReference <= m_backReferenceLimit) {
308 delegate.atomBackReference(backReference);
309 break;
310 }
312 restoreState(state);
313 }
315 // Not a backreference, and not octal.
316 if (peek() >= '8') {
317 delegate.atomPatternCharacter('\\');
318 break;
319 }
321 // Fall-through to handle this as an octal escape.
322 }
324 // Octal escape
325 case '0':
326 delegate.atomPatternCharacter(consumeOctal());
327 break;
329 // ControlEscape
330 case 'f':
331 consume();
332 delegate.atomPatternCharacter('\f');
333 break;
334 case 'n':
335 consume();
336 delegate.atomPatternCharacter('\n');
337 break;
338 case 'r':
339 consume();
340 delegate.atomPatternCharacter('\r');
341 break;
342 case 't':
343 consume();
344 delegate.atomPatternCharacter('\t');
345 break;
346 case 'v':
347 consume();
348 delegate.atomPatternCharacter('\v');
349 break;
351 // ControlLetter
352 case 'c': {
353 ParseState state = saveState();
354 consume();
355 if (!atEndOfPattern()) {
356 int control = consume();
358 // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
359 if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
360 delegate.atomPatternCharacter(control & 0x1f);
361 break;
362 }
363 }
364 restoreState(state);
365 delegate.atomPatternCharacter('\\');
366 break;
367 }
369 // HexEscape
370 case 'x': {
371 consume();
372 int x = tryConsumeHex(2);
373 if (x == -1)
374 delegate.atomPatternCharacter('x');
375 else
376 delegate.atomPatternCharacter(x);
377 break;
378 }
380 // UnicodeEscape
381 case 'u': {
382 consume();
383 int u = tryConsumeHex(4);
384 if (u == -1)
385 delegate.atomPatternCharacter('u');
386 else
387 delegate.atomPatternCharacter(u);
388 break;
389 }
391 // IdentityEscape
392 default:
393 delegate.atomPatternCharacter(consume());
394 }
396 return true;
397 }
399 /*
400 * parseAtomEscape(), parseCharacterClassEscape():
401 *
402 * These methods alias to parseEscape().
403 */
404 bool parseAtomEscape()
405 {
406 return parseEscape<false>(m_delegate);
407 }
408 void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
409 {
410 parseEscape<true>(delegate);
411 }
413 /*
414 * parseCharacterClass():
415 *
416 * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
417 * to an instance of CharacterClassParserDelegate, to describe the character class to the
418 * delegate.
419 */
420 void parseCharacterClass()
421 {
422 ASSERT(!m_err);
423 ASSERT(peek() == '[');
424 consume();
426 CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
428 characterClassConstructor.begin(tryConsume('^'));
430 while (!atEndOfPattern()) {
431 switch (peek()) {
432 case ']':
433 consume();
434 characterClassConstructor.end();
435 return;
437 case '\\':
438 parseCharacterClassEscape(characterClassConstructor);
439 break;
441 default:
442 characterClassConstructor.atomPatternCharacter(consume(), true);
443 }
445 if (m_err)
446 return;
447 }
449 m_err = CharacterClassUnmatched;
450 }
452 /*
453 * parseParenthesesBegin():
454 *
455 * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
456 */
457 void parseParenthesesBegin()
458 {
459 ASSERT(!m_err);
460 ASSERT(peek() == '(');
461 consume();
463 if (tryConsume('?')) {
464 if (atEndOfPattern()) {
465 m_err = ParenthesesTypeInvalid;
466 return;
467 }
469 switch (consume()) {
470 case ':':
471 m_delegate.atomParenthesesSubpatternBegin(false);
472 break;
474 case '=':
475 m_delegate.atomParentheticalAssertionBegin();
476 break;
478 case '!':
479 m_delegate.atomParentheticalAssertionBegin(true);
480 break;
482 default:
483 m_err = ParenthesesTypeInvalid;
484 }
485 } else
486 m_delegate.atomParenthesesSubpatternBegin();
488 ++m_parenthesesNestingDepth;
489 }
491 /*
492 * parseParenthesesEnd():
493 *
494 * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
495 */
496 void parseParenthesesEnd()
497 {
498 ASSERT(!m_err);
499 ASSERT(peek() == ')');
500 consume();
502 if (m_parenthesesNestingDepth > 0)
503 m_delegate.atomParenthesesEnd();
504 else
505 m_err = ParenthesesUnmatched;
507 --m_parenthesesNestingDepth;
508 }
510 /*
511 * parseQuantifier():
512 *
513 * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
514 */
515 void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
516 {
517 ASSERT(!m_err);
518 ASSERT(min <= max);
520 if (min == UINT_MAX) {
521 m_err = QuantifierTooLarge;
522 return;
523 }
525 if (lastTokenWasAnAtom)
526 m_delegate.quantifyAtom(min, max, !tryConsume('?'));
527 else
528 m_err = QuantifierWithoutAtom;
529 }
531 /*
532 * parseTokens():
533 *
534 * This method loops over the input pattern reporting tokens to the delegate.
535 * The method returns when a parse error is detected, or the end of the pattern
536 * is reached. One piece of state is tracked around the loop, which is whether
537 * the last token passed to the delegate was an atom (this is necessary to detect
538 * a parse error when a quantifier provided without an atom to quantify).
539 */
540 void parseTokens()
541 {
542 bool lastTokenWasAnAtom = false;
544 while (!atEndOfPattern()) {
545 switch (peek()) {
546 case '|':
547 consume();
548 m_delegate.disjunction();
549 lastTokenWasAnAtom = false;
550 break;
552 case '(':
553 parseParenthesesBegin();
554 lastTokenWasAnAtom = false;
555 break;
557 case ')':
558 parseParenthesesEnd();
559 lastTokenWasAnAtom = true;
560 break;
562 case '^':
563 consume();
564 m_delegate.assertionBOL();
565 lastTokenWasAnAtom = false;
566 break;
568 case '$':
569 consume();
570 m_delegate.assertionEOL();
571 lastTokenWasAnAtom = false;
572 break;
574 case '.':
575 consume();
576 m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
577 lastTokenWasAnAtom = true;
578 break;
580 case '[':
581 parseCharacterClass();
582 lastTokenWasAnAtom = true;
583 break;
585 case '\\':
586 lastTokenWasAnAtom = parseAtomEscape();
587 break;
589 case '*':
590 consume();
591 parseQuantifier(lastTokenWasAnAtom, 0, quantifyInfinite);
592 lastTokenWasAnAtom = false;
593 break;
595 case '+':
596 consume();
597 parseQuantifier(lastTokenWasAnAtom, 1, quantifyInfinite);
598 lastTokenWasAnAtom = false;
599 break;
601 case '?':
602 consume();
603 parseQuantifier(lastTokenWasAnAtom, 0, 1);
604 lastTokenWasAnAtom = false;
605 break;
607 case '{': {
608 ParseState state = saveState();
610 consume();
611 if (peekIsDigit()) {
612 unsigned min;
613 if (!consumeNumber(min))
614 break;
616 unsigned max = min;
617 if (tryConsume(',')) {
618 if (peekIsDigit()) {
619 if (!consumeNumber(max))
620 break;
621 } else {
622 max = quantifyInfinite;
623 }
624 }
626 if (tryConsume('}')) {
627 if (min <= max)
628 parseQuantifier(lastTokenWasAnAtom, min, max);
629 else
630 m_err = QuantifierOutOfOrder;
631 lastTokenWasAnAtom = false;
632 break;
633 }
634 }
636 restoreState(state);
637 } // if we did not find a complete quantifer, fall through to the default case.
639 default:
640 m_delegate.atomPatternCharacter(consume());
641 lastTokenWasAnAtom = true;
642 }
644 if (m_err)
645 return;
646 }
648 if (m_parenthesesNestingDepth > 0)
649 m_err = MissingParentheses;
650 }
652 /*
653 * parse():
654 *
655 * This method calls parseTokens() to parse over the input and converts any
656 * error code to a const char* for a result.
657 */
658 ErrorCode parse()
659 {
660 if (m_size > MAX_PATTERN_SIZE)
661 m_err = PatternTooLarge;
662 else
663 parseTokens();
664 ASSERT(atEndOfPattern() || m_err);
666 return m_err;
667 }
669 // Misc helper functions:
671 typedef unsigned ParseState;
673 ParseState saveState()
674 {
675 return m_index;
676 }
678 void restoreState(ParseState state)
679 {
680 m_index = state;
681 }
683 bool atEndOfPattern()
684 {
685 ASSERT(m_index <= m_size);
686 return m_index == m_size;
687 }
689 int peek()
690 {
691 ASSERT(m_index < m_size);
692 return m_data[m_index];
693 }
695 bool peekIsDigit()
696 {
697 return !atEndOfPattern() && WTF::isASCIIDigit(peek());
698 }
700 unsigned peekDigit()
701 {
702 ASSERT(peekIsDigit());
703 return peek() - '0';
704 }
706 int consume()
707 {
708 ASSERT(m_index < m_size);
709 return m_data[m_index++];
710 }
712 unsigned consumeDigit()
713 {
714 ASSERT(peekIsDigit());
715 return consume() - '0';
716 }
718 bool consumeNumber(unsigned &accum)
719 {
720 accum = consumeDigit();
721 while (peekIsDigit()) {
722 unsigned newValue = accum * 10 + peekDigit();
723 if (newValue < accum) { /* Overflow check. */
724 m_err = QuantifierTooLarge;
725 return false;
726 }
727 accum = newValue;
728 consume();
729 }
730 return true;
731 }
733 unsigned consumeOctal()
734 {
735 ASSERT(WTF::isASCIIOctalDigit(peek()));
737 unsigned n = consumeDigit();
738 while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
739 n = n * 8 + consumeDigit();
740 return n;
741 }
743 bool tryConsume(UChar ch)
744 {
745 if (atEndOfPattern() || (m_data[m_index] != ch))
746 return false;
747 ++m_index;
748 return true;
749 }
751 int tryConsumeHex(int count)
752 {
753 ParseState state = saveState();
755 int n = 0;
756 while (count--) {
757 if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
758 restoreState(state);
759 return -1;
760 }
761 n = (n << 4) | WTF::toASCIIHexValue(consume());
762 }
763 return n;
764 }
766 Delegate& m_delegate;
767 unsigned m_backReferenceLimit;
768 ErrorCode m_err;
769 const CharType* m_data;
770 unsigned m_size;
771 unsigned m_index;
772 unsigned m_parenthesesNestingDepth;
774 // Derived by empirical testing of compile time in PCRE and WREC.
775 static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
776 };
778 /*
779 * Yarr::parse():
780 *
781 * The parse method is passed a pattern to be parsed and a delegate upon which
782 * callbacks will be made to record the parsed tokens forming the regex.
783 * Yarr::parse() returns null on success, or a const C string providing an error
784 * message where a parse error occurs.
785 *
786 * The Delegate must implement the following interface:
787 *
788 * void assertionBOL();
789 * void assertionEOL();
790 * void assertionWordBoundary(bool invert);
791 *
792 * void atomPatternCharacter(UChar ch);
793 * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
794 * void atomCharacterClassBegin(bool invert)
795 * void atomCharacterClassAtom(UChar ch)
796 * void atomCharacterClassRange(UChar begin, UChar end)
797 * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
798 * void atomCharacterClassEnd()
799 * void atomParenthesesSubpatternBegin(bool capture = true);
800 * void atomParentheticalAssertionBegin(bool invert = false);
801 * void atomParenthesesEnd();
802 * void atomBackReference(unsigned subpatternId);
803 *
804 * void quantifyAtom(unsigned min, unsigned max, bool greedy);
805 *
806 * void disjunction();
807 *
808 * The regular expression is described by a sequence of assertion*() and atom*()
809 * callbacks to the delegate, describing the terms in the regular expression.
810 * Following an atom a quantifyAtom() call may occur to indicate that the previous
811 * atom should be quantified. In the case of atoms described across multiple
812 * calls (parentheses and character classes) the call to quantifyAtom() will come
813 * after the call to the atom*End() method, never after atom*Begin().
814 *
815 * Character classes may either be described by a single call to
816 * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
817 * In the latter case, ...Begin() will be called, followed by a sequence of
818 * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
819 *
820 * Sequences of atoms and assertions are broken into alternatives via calls to
821 * disjunction(). Assertions, atoms, and disjunctions emitted between calls to
822 * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
823 * atomParenthesesBegin() is passed a subpatternId. In the case of a regular
824 * capturing subpattern, this will be the subpatternId associated with these
825 * parentheses, and will also by definition be the lowest subpatternId of these
826 * parentheses and of any nested paretheses. The atomParenthesesEnd() method
827 * is passed the subpatternId of the last capturing subexpression nested within
828 * these paretheses. In the case of a capturing subpattern with no nested
829 * capturing subpatterns, the same subpatternId will be passed to the begin and
830 * end functions. In the case of non-capturing subpatterns the subpatternId
831 * passed to the begin method is also the first possible subpatternId that might
832 * be nested within these paretheses. If a set of non-capturing parentheses does
833 * not contain any capturing subpatterns, then the subpatternId passed to begin
834 * will be greater than the subpatternId passed to end.
835 */
837 template<class Delegate>
838 ErrorCode parse(Delegate& delegate, const String& pattern, unsigned backReferenceLimit = quantifyInfinite)
839 {
840 #ifdef YARR_8BIT_CHAR_SUPPORT
841 if (pattern.is8Bit())
842 return Parser<Delegate, LChar>(delegate, pattern, backReferenceLimit).parse();
843 #endif
844 return Parser<Delegate, UChar>(delegate, pattern, backReferenceLimit).parse();
845 }
847 } } // namespace JSC::Yarr
849 #endif /* yarr_YarrParser_h */