js/src/yarr/YarrPattern.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0 2 * vim: set ts=8 sts=4 et sw=4 tw=99:
michael@0 3 *
michael@0 4 * Copyright (C) 2009 Apple Inc. All rights reserved.
michael@0 5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
michael@0 6 *
michael@0 7 * Redistribution and use in source and binary forms, with or without
michael@0 8 * modification, are permitted provided that the following conditions
michael@0 9 * are met:
michael@0 10 * 1. Redistributions of source code must retain the above copyright
michael@0 11 * notice, this list of conditions and the following disclaimer.
michael@0 12 * 2. Redistributions in binary form must reproduce the above copyright
michael@0 13 * notice, this list of conditions and the following disclaimer in the
michael@0 14 * documentation and/or other materials provided with the distribution.
michael@0 15 *
michael@0 16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
michael@0 17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
michael@0 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
michael@0 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
michael@0 20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
michael@0 21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
michael@0 22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
michael@0 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
michael@0 24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
michael@0 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
michael@0 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0 27 */
michael@0 28
michael@0 29 #include "yarr/YarrPattern.h"
michael@0 30
michael@0 31 #include "yarr/Yarr.h"
michael@0 32 #include "yarr/YarrCanonicalizeUCS2.h"
michael@0 33 #include "yarr/YarrParser.h"
michael@0 34
michael@0 35 using namespace WTF;
michael@0 36
michael@0 37 namespace JSC { namespace Yarr {
michael@0 38
michael@0 39 #include "yarr/RegExpJitTables.h"
michael@0 40
michael@0 41 #if WTF_CPU_SPARC
michael@0 42 # define BASE_FRAME_SIZE 24
michael@0 43 #else
michael@0 44 # define BASE_FRAME_SIZE 0
michael@0 45 #endif
michael@0 46
michael@0 47 // Thanks, windows.h!
michael@0 48 #undef min
michael@0 49 #undef max
michael@0 50
michael@0 51 class CharacterClassConstructor {
michael@0 52 public:
michael@0 53 CharacterClassConstructor(bool isCaseInsensitive = false)
michael@0 54 : m_isCaseInsensitive(isCaseInsensitive)
michael@0 55 {
michael@0 56 }
michael@0 57
michael@0 58 void reset()
michael@0 59 {
michael@0 60 m_matches.clear();
michael@0 61 m_ranges.clear();
michael@0 62 m_matchesUnicode.clear();
michael@0 63 m_rangesUnicode.clear();
michael@0 64 }
michael@0 65
michael@0 66 void append(const CharacterClass* other)
michael@0 67 {
michael@0 68 for (size_t i = 0; i < other->m_matches.size(); ++i)
michael@0 69 addSorted(m_matches, other->m_matches[i]);
michael@0 70 for (size_t i = 0; i < other->m_ranges.size(); ++i)
michael@0 71 addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end);
michael@0 72 for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i)
michael@0 73 addSorted(m_matchesUnicode, other->m_matchesUnicode[i]);
michael@0 74 for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i)
michael@0 75 addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
michael@0 76 }
michael@0 77
michael@0 78 void putChar(UChar ch)
michael@0 79 {
michael@0 80 // Handle ascii cases.
michael@0 81 if (ch <= 0x7f) {
michael@0 82 if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
michael@0 83 addSorted(m_matches, toASCIIUpper(ch));
michael@0 84 addSorted(m_matches, toASCIILower(ch));
michael@0 85 } else
michael@0 86 addSorted(m_matches, ch);
michael@0 87 return;
michael@0 88 }
michael@0 89
michael@0 90 // Simple case, not a case-insensitive match.
michael@0 91 if (!m_isCaseInsensitive) {
michael@0 92 addSorted(m_matchesUnicode, ch);
michael@0 93 return;
michael@0 94 }
michael@0 95
michael@0 96 // Add multiple matches, if necessary.
michael@0 97 const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
michael@0 98 if (info->type == CanonicalizeUnique)
michael@0 99 addSorted(m_matchesUnicode, ch);
michael@0 100 else
michael@0 101 putUnicodeIgnoreCase(ch, info);
michael@0 102 }
michael@0 103
michael@0 104 void putUnicodeIgnoreCase(UChar ch, const UCS2CanonicalizationRange* info)
michael@0 105 {
michael@0 106 ASSERT(m_isCaseInsensitive);
michael@0 107 ASSERT(ch > 0x7f);
michael@0 108 ASSERT(ch >= info->begin && ch <= info->end);
michael@0 109 ASSERT(info->type != CanonicalizeUnique);
michael@0 110 if (info->type == CanonicalizeSet) {
michael@0 111 for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
michael@0 112 addSorted(m_matchesUnicode, ch);
michael@0 113 } else {
michael@0 114 addSorted(m_matchesUnicode, ch);
michael@0 115 addSorted(m_matchesUnicode, getCanonicalPair(info, ch));
michael@0 116 }
michael@0 117 }
michael@0 118
michael@0 119 void putRange(UChar lo, UChar hi)
michael@0 120 {
michael@0 121 if (lo <= 0x7f) {
michael@0 122 char asciiLo = lo;
michael@0 123 char asciiHi = std::min(hi, (UChar)0x7f);
michael@0 124 addSortedRange(m_ranges, lo, asciiHi);
michael@0 125
michael@0 126 if (m_isCaseInsensitive) {
michael@0 127 if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
michael@0 128 addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
michael@0 129 if ((asciiLo <= 'z') && (asciiHi >= 'a'))
michael@0 130 addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
michael@0 131 }
michael@0 132 }
michael@0 133 if (hi <= 0x7f)
michael@0 134 return;
michael@0 135
michael@0 136 lo = std::max(lo, (UChar)0x80);
michael@0 137 addSortedRange(m_rangesUnicode, lo, hi);
michael@0 138
michael@0 139 if (!m_isCaseInsensitive)
michael@0 140 return;
michael@0 141
michael@0 142 const UCS2CanonicalizationRange* info = rangeInfoFor(lo);
michael@0 143 while (true) {
michael@0 144 // Handle the range [lo .. end]
michael@0 145 UChar end = std::min<UChar>(info->end, hi);
michael@0 146
michael@0 147 switch (info->type) {
michael@0 148 case CanonicalizeUnique:
michael@0 149 // Nothing to do - no canonical equivalents.
michael@0 150 break;
michael@0 151 case CanonicalizeSet: {
michael@0 152 UChar ch;
michael@0 153 for (const uint16_t* set = characterSetInfo[info->value]; (ch = *set); ++set)
michael@0 154 addSorted(m_matchesUnicode, ch);
michael@0 155 break;
michael@0 156 }
michael@0 157 case CanonicalizeRangeLo:
michael@0 158 addSortedRange(m_rangesUnicode, lo + info->value, end + info->value);
michael@0 159 break;
michael@0 160 case CanonicalizeRangeHi:
michael@0 161 addSortedRange(m_rangesUnicode, lo - info->value, end - info->value);
michael@0 162 break;
michael@0 163 case CanonicalizeAlternatingAligned:
michael@0 164 // Use addSortedRange since there is likely an abutting range to combine with.
michael@0 165 if (lo & 1)
michael@0 166 addSortedRange(m_rangesUnicode, lo - 1, lo - 1);
michael@0 167 if (!(end & 1))
michael@0 168 addSortedRange(m_rangesUnicode, end + 1, end + 1);
michael@0 169 break;
michael@0 170 case CanonicalizeAlternatingUnaligned:
michael@0 171 // Use addSortedRange since there is likely an abutting range to combine with.
michael@0 172 if (!(lo & 1))
michael@0 173 addSortedRange(m_rangesUnicode, lo - 1, lo - 1);
michael@0 174 if (end & 1)
michael@0 175 addSortedRange(m_rangesUnicode, end + 1, end + 1);
michael@0 176 break;
michael@0 177 }
michael@0 178
michael@0 179 if (hi == end)
michael@0 180 return;
michael@0 181
michael@0 182 ++info;
michael@0 183 lo = info->begin;
michael@0 184 };
michael@0 185
michael@0 186 }
michael@0 187
michael@0 188 CharacterClass* charClass()
michael@0 189 {
michael@0 190 CharacterClass* characterClass = newOrCrash<CharacterClass>();
michael@0 191
michael@0 192 characterClass->m_matches.swap(m_matches);
michael@0 193 characterClass->m_ranges.swap(m_ranges);
michael@0 194 characterClass->m_matchesUnicode.swap(m_matchesUnicode);
michael@0 195 characterClass->m_rangesUnicode.swap(m_rangesUnicode);
michael@0 196
michael@0 197 return characterClass;
michael@0 198 }
michael@0 199
michael@0 200 private:
michael@0 201 void addSorted(Vector<UChar>& matches, UChar ch)
michael@0 202 {
michael@0 203 unsigned pos = 0;
michael@0 204 unsigned range = matches.size();
michael@0 205
michael@0 206 // binary chop, find position to insert char.
michael@0 207 while (range) {
michael@0 208 unsigned index = range >> 1;
michael@0 209
michael@0 210 int val = matches[pos+index] - ch;
michael@0 211 if (!val)
michael@0 212 return;
michael@0 213 else if (val > 0)
michael@0 214 range = index;
michael@0 215 else {
michael@0 216 pos += (index+1);
michael@0 217 range -= (index+1);
michael@0 218 }
michael@0 219 }
michael@0 220
michael@0 221 if (pos == matches.size())
michael@0 222 matches.append(ch);
michael@0 223 else
michael@0 224 matches.insert(pos, ch);
michael@0 225 }
michael@0 226
michael@0 227 void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
michael@0 228 {
michael@0 229 unsigned end = ranges.size();
michael@0 230
michael@0 231 // Simple linear scan - I doubt there are that many ranges anyway...
michael@0 232 // feel free to fix this with something faster (eg binary chop).
michael@0 233 for (unsigned i = 0; i < end; ++i) {
michael@0 234 // does the new range fall before the current position in the array
michael@0 235 if (hi < ranges[i].begin) {
michael@0 236 // optional optimization: concatenate appending ranges? - may not be worthwhile.
michael@0 237 if (hi == (ranges[i].begin - 1)) {
michael@0 238 ranges[i].begin = lo;
michael@0 239 return;
michael@0 240 }
michael@0 241 ranges.insert(i, CharacterRange(lo, hi));
michael@0 242 return;
michael@0 243 }
michael@0 244 // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
michael@0 245 // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
michael@0 246 // end of the last range they concatenate, which is just as good.
michael@0 247 if (lo <= (ranges[i].end + 1)) {
michael@0 248 // found an intersect! we'll replace this entry in the array.
michael@0 249 ranges[i].begin = std::min(ranges[i].begin, lo);
michael@0 250 ranges[i].end = std::max(ranges[i].end, hi);
michael@0 251
michael@0 252 // now check if the new range can subsume any subsequent ranges.
michael@0 253 unsigned next = i+1;
michael@0 254 // each iteration of the loop we will either remove something from the list, or break the loop.
michael@0 255 while (next < ranges.size()) {
michael@0 256 if (ranges[next].begin <= (ranges[i].end + 1)) {
michael@0 257 // the next entry now overlaps / concatenates this one.
michael@0 258 ranges[i].end = std::max(ranges[i].end, ranges[next].end);
michael@0 259 ranges.remove(next);
michael@0 260 } else
michael@0 261 break;
michael@0 262 }
michael@0 263
michael@0 264 return;
michael@0 265 }
michael@0 266 }
michael@0 267
michael@0 268 // CharacterRange comes after all existing ranges.
michael@0 269 ranges.append(CharacterRange(lo, hi));
michael@0 270 }
michael@0 271
michael@0 272 bool m_isCaseInsensitive;
michael@0 273
michael@0 274 Vector<UChar> m_matches;
michael@0 275 Vector<CharacterRange> m_ranges;
michael@0 276 Vector<UChar> m_matchesUnicode;
michael@0 277 Vector<CharacterRange> m_rangesUnicode;
michael@0 278 };
michael@0 279
michael@0 280 class YarrPatternConstructor {
michael@0 281 public:
michael@0 282 YarrPatternConstructor(YarrPattern& pattern)
michael@0 283 : m_pattern(pattern)
michael@0 284 , m_stackBase(nullptr)
michael@0 285 , m_characterClassConstructor(pattern.m_ignoreCase)
michael@0 286 , m_invertParentheticalAssertion(false)
michael@0 287 {
michael@0 288 m_pattern.m_body = newOrCrash<PatternDisjunction>();
michael@0 289 m_alternative = m_pattern.m_body->addNewAlternative();
michael@0 290 m_pattern.m_disjunctions.append(m_pattern.m_body);
michael@0 291 }
michael@0 292
michael@0 293 ~YarrPatternConstructor()
michael@0 294 {
michael@0 295 }
michael@0 296
michael@0 297 void reset()
michael@0 298 {
michael@0 299 m_pattern.reset();
michael@0 300 m_characterClassConstructor.reset();
michael@0 301
michael@0 302 m_pattern.m_body = newOrCrash<PatternDisjunction>();
michael@0 303 m_alternative = m_pattern.m_body->addNewAlternative();
michael@0 304 m_pattern.m_disjunctions.append(m_pattern.m_body);
michael@0 305 }
michael@0 306
michael@0 307 void assertionBOL()
michael@0 308 {
michael@0 309 if (!m_alternative->m_terms.size() & !m_invertParentheticalAssertion) {
michael@0 310 m_alternative->m_startsWithBOL = true;
michael@0 311 m_alternative->m_containsBOL = true;
michael@0 312 m_pattern.m_containsBOL = true;
michael@0 313 }
michael@0 314 m_alternative->m_terms.append(PatternTerm::BOL());
michael@0 315 }
michael@0 316 void assertionEOL()
michael@0 317 {
michael@0 318 m_alternative->m_terms.append(PatternTerm::EOL());
michael@0 319 }
michael@0 320 void assertionWordBoundary(bool invert)
michael@0 321 {
michael@0 322 m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
michael@0 323 }
michael@0 324
michael@0 325 void atomPatternCharacter(UChar ch)
michael@0 326 {
michael@0 327 // We handle case-insensitive checking of unicode characters which do have both
michael@0 328 // cases by handling them as if they were defined using a CharacterClass.
michael@0 329 if (!m_pattern.m_ignoreCase || isASCII(ch)) {
michael@0 330 m_alternative->m_terms.append(PatternTerm(ch));
michael@0 331 return;
michael@0 332 }
michael@0 333
michael@0 334 const UCS2CanonicalizationRange* info = rangeInfoFor(ch);
michael@0 335 if (info->type == CanonicalizeUnique) {
michael@0 336 m_alternative->m_terms.append(PatternTerm(ch));
michael@0 337 return;
michael@0 338 }
michael@0 339
michael@0 340 m_characterClassConstructor.putUnicodeIgnoreCase(ch, info);
michael@0 341 CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
michael@0 342 m_pattern.m_userCharacterClasses.append(newCharacterClass);
michael@0 343 m_alternative->m_terms.append(PatternTerm(newCharacterClass, false));
michael@0 344 }
michael@0 345
michael@0 346 void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
michael@0 347 {
michael@0 348 switch (classID) {
michael@0 349 case DigitClassID:
michael@0 350 m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
michael@0 351 break;
michael@0 352 case SpaceClassID:
michael@0 353 m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
michael@0 354 break;
michael@0 355 case WordClassID:
michael@0 356 m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
michael@0 357 break;
michael@0 358 case NewlineClassID:
michael@0 359 m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
michael@0 360 break;
michael@0 361 }
michael@0 362 }
michael@0 363
michael@0 364 void atomCharacterClassBegin(bool invert = false)
michael@0 365 {
michael@0 366 m_invertCharacterClass = invert;
michael@0 367 }
michael@0 368
michael@0 369 void atomCharacterClassAtom(UChar ch)
michael@0 370 {
michael@0 371 m_characterClassConstructor.putChar(ch);
michael@0 372 }
michael@0 373
michael@0 374 void atomCharacterClassRange(UChar begin, UChar end)
michael@0 375 {
michael@0 376 m_characterClassConstructor.putRange(begin, end);
michael@0 377 }
michael@0 378
michael@0 379 void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
michael@0 380 {
michael@0 381 ASSERT(classID != NewlineClassID);
michael@0 382
michael@0 383 switch (classID) {
michael@0 384 case DigitClassID:
michael@0 385 m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
michael@0 386 break;
michael@0 387
michael@0 388 case SpaceClassID:
michael@0 389 m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
michael@0 390 break;
michael@0 391
michael@0 392 case WordClassID:
michael@0 393 m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
michael@0 394 break;
michael@0 395
michael@0 396 default:
michael@0 397 ASSERT_NOT_REACHED();
michael@0 398 }
michael@0 399 }
michael@0 400
michael@0 401 void atomCharacterClassEnd()
michael@0 402 {
michael@0 403 CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
michael@0 404 m_pattern.m_userCharacterClasses.append(newCharacterClass);
michael@0 405 m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
michael@0 406 }
michael@0 407
michael@0 408 void atomParenthesesSubpatternBegin(bool capture = true)
michael@0 409 {
michael@0 410 unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
michael@0 411 if (capture)
michael@0 412 m_pattern.m_numSubpatterns++;
michael@0 413
michael@0 414 PatternDisjunction* parenthesesDisjunction = newOrCrash<PatternDisjunction>(m_alternative);
michael@0 415 m_pattern.m_disjunctions.append(parenthesesDisjunction);
michael@0 416 m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, false));
michael@0 417 m_alternative = parenthesesDisjunction->addNewAlternative();
michael@0 418 }
michael@0 419
michael@0 420 void atomParentheticalAssertionBegin(bool invert = false)
michael@0 421 {
michael@0 422 PatternDisjunction* parenthesesDisjunction = newOrCrash<PatternDisjunction>(m_alternative);
michael@0 423 m_pattern.m_disjunctions.append(parenthesesDisjunction);
michael@0 424 m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, false, invert));
michael@0 425 m_alternative = parenthesesDisjunction->addNewAlternative();
michael@0 426 m_invertParentheticalAssertion = invert;
michael@0 427 }
michael@0 428
michael@0 429 void atomParenthesesEnd()
michael@0 430 {
michael@0 431 ASSERT(m_alternative->m_parent);
michael@0 432 ASSERT(m_alternative->m_parent->m_parent);
michael@0 433
michael@0 434 PatternDisjunction* parenthesesDisjunction = m_alternative->m_parent;
michael@0 435 m_alternative = m_alternative->m_parent->m_parent;
michael@0 436
michael@0 437 PatternTerm& lastTerm = m_alternative->lastTerm();
michael@0 438
michael@0 439 unsigned numParenAlternatives = parenthesesDisjunction->m_alternatives.size();
michael@0 440 unsigned numBOLAnchoredAlts = 0;
michael@0 441
michael@0 442 for (unsigned i = 0; i < numParenAlternatives; i++) {
michael@0 443 // Bubble up BOL flags
michael@0 444 if (parenthesesDisjunction->m_alternatives[i]->m_startsWithBOL)
michael@0 445 numBOLAnchoredAlts++;
michael@0 446 }
michael@0 447
michael@0 448 if (numBOLAnchoredAlts) {
michael@0 449 m_alternative->m_containsBOL = true;
michael@0 450 // If all the alternatives in parens start with BOL, then so does this one
michael@0 451 if (numBOLAnchoredAlts == numParenAlternatives)
michael@0 452 m_alternative->m_startsWithBOL = true;
michael@0 453 }
michael@0 454
michael@0 455 lastTerm.parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
michael@0 456 m_invertParentheticalAssertion = false;
michael@0 457 }
michael@0 458
michael@0 459 void atomBackReference(unsigned subpatternId)
michael@0 460 {
michael@0 461 ASSERT(subpatternId);
michael@0 462 m_pattern.m_containsBackreferences = true;
michael@0 463 m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
michael@0 464
michael@0 465 if (subpatternId > m_pattern.m_numSubpatterns) {
michael@0 466 m_alternative->m_terms.append(PatternTerm::ForwardReference());
michael@0 467 return;
michael@0 468 }
michael@0 469
michael@0 470 PatternAlternative* currentAlternative = m_alternative;
michael@0 471 ASSERT(currentAlternative);
michael@0 472
michael@0 473 // Note to self: if we waited until the AST was baked, we could also remove forwards refs
michael@0 474 while ((currentAlternative = currentAlternative->m_parent->m_parent)) {
michael@0 475 PatternTerm& term = currentAlternative->lastTerm();
michael@0 476 ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
michael@0 477
michael@0 478 if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.capture() && (subpatternId == term.parentheses.subpatternId)) {
michael@0 479 m_alternative->m_terms.append(PatternTerm::ForwardReference());
michael@0 480 return;
michael@0 481 }
michael@0 482 }
michael@0 483
michael@0 484 m_alternative->m_terms.append(PatternTerm(subpatternId));
michael@0 485 }
michael@0 486
michael@0 487 // deep copy the argument disjunction. If filterStartsWithBOL is true,
michael@0 488 // skip alternatives with m_startsWithBOL set true.
michael@0 489 PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction, bool filterStartsWithBOL = false)
michael@0 490 {
michael@0 491 PatternDisjunction* newDisjunction = 0;
michael@0 492 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
michael@0 493 PatternAlternative* alternative = disjunction->m_alternatives[alt];
michael@0 494 if (!filterStartsWithBOL || !alternative->m_startsWithBOL) {
michael@0 495 if (!newDisjunction) {
michael@0 496 newDisjunction = newOrCrash<PatternDisjunction>();
michael@0 497 newDisjunction->m_parent = disjunction->m_parent;
michael@0 498 }
michael@0 499 PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
michael@0 500 newAlternative->m_terms.reserve(alternative->m_terms.size());
michael@0 501 for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
michael@0 502 newAlternative->m_terms.append(copyTerm(alternative->m_terms[i], filterStartsWithBOL));
michael@0 503 }
michael@0 504 }
michael@0 505
michael@0 506 if (newDisjunction)
michael@0 507 m_pattern.m_disjunctions.append(newDisjunction);
michael@0 508 return newDisjunction;
michael@0 509 }
michael@0 510
michael@0 511 PatternTerm copyTerm(PatternTerm& term, bool filterStartsWithBOL = false)
michael@0 512 {
michael@0 513 if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
michael@0 514 return PatternTerm(term);
michael@0 515
michael@0 516 PatternTerm termCopy = term;
michael@0 517 termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction, filterStartsWithBOL);
michael@0 518 return termCopy;
michael@0 519 }
michael@0 520
michael@0 521 void quantifyAtom(unsigned min, unsigned max, bool greedy)
michael@0 522 {
michael@0 523 ASSERT(min <= max);
michael@0 524 ASSERT(m_alternative->m_terms.size());
michael@0 525
michael@0 526 if (!max) {
michael@0 527 m_alternative->removeLastTerm();
michael@0 528 return;
michael@0 529 }
michael@0 530
michael@0 531 PatternTerm& term = m_alternative->lastTerm();
michael@0 532 ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
michael@0 533 ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
michael@0 534
michael@0 535 if (term.type == PatternTerm::TypeParentheticalAssertion) {
michael@0 536 // If an assertion is quantified with a minimum count of zero, it can simply be removed.
michael@0 537 // This arises from the RepeatMatcher behaviour in the spec. Matching an assertion never
michael@0 538 // results in any input being consumed, however the continuation passed to the assertion
michael@0 539 // (called in steps, 8c and 9 of the RepeatMatcher definition, ES5.1 15.10.2.5) will
michael@0 540 // reject all zero length matches (see step 2.1). A match from the continuation of the
michael@0 541 // expression will still be accepted regardless (via steps 8a and 11) - the upshot of all
michael@0 542 // this is that matches from the assertion are not required, and won't be accepted anyway,
michael@0 543 // so no need to ever run it.
michael@0 544 if (!min)
michael@0 545 m_alternative->removeLastTerm();
michael@0 546 // We never need to run an assertion more than once. Subsequent interations will be run
michael@0 547 // with the same start index (since assertions are non-capturing) and the same captures
michael@0 548 // (per step 4 of RepeatMatcher in ES5.1 15.10.2.5), and as such will always produce the
michael@0 549 // same result and captures. If the first match succeeds then the subsequent (min - 1)
michael@0 550 // matches will too. Any additional optional matches will fail (on the same basis as the
michael@0 551 // minimum zero quantified assertions, above), but this will still result in a match.
michael@0 552 return;
michael@0 553 }
michael@0 554
michael@0 555 if (min == 0)
michael@0 556 term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
michael@0 557 else if (min == max)
michael@0 558 term.quantify(min, QuantifierFixedCount);
michael@0 559 else {
michael@0 560 term.quantify(min, QuantifierFixedCount);
michael@0 561 m_alternative->m_terms.append(copyTerm(term));
michael@0 562 // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
michael@0 563 m_alternative->lastTerm().quantify((max == quantifyInfinite) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
michael@0 564 if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
michael@0 565 m_alternative->lastTerm().parentheses.isCopy = true;
michael@0 566 }
michael@0 567 }
michael@0 568
michael@0 569 void disjunction()
michael@0 570 {
michael@0 571 m_alternative = m_alternative->m_parent->addNewAlternative();
michael@0 572 }
michael@0 573
michael@0 574 ErrorCode setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition,
michael@0 575 unsigned *callFrameSizeOut)
michael@0 576 {
michael@0 577 /*
michael@0 578 * Attempt detection of over-recursion:
michael@0 579 * "1MB should be enough stack for anyone."
michael@0 580 */
michael@0 581 uint8_t stackDummy_;
michael@0 582 if (m_stackBase - &stackDummy_ > 1024*1024)
michael@0 583 return PatternTooLarge;
michael@0 584
michael@0 585 alternative->m_hasFixedSize = true;
michael@0 586 Checked<unsigned> currentInputPosition = initialInputPosition;
michael@0 587
michael@0 588 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
michael@0 589 PatternTerm& term = alternative->m_terms[i];
michael@0 590
michael@0 591 switch (term.type) {
michael@0 592 case PatternTerm::TypeAssertionBOL:
michael@0 593 case PatternTerm::TypeAssertionEOL:
michael@0 594 case PatternTerm::TypeAssertionWordBoundary:
michael@0 595 if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
michael@0 596 return RuntimeError;
michael@0 597 break;
michael@0 598
michael@0 599 case PatternTerm::TypeBackReference:
michael@0 600 if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
michael@0 601 return RuntimeError;
michael@0 602 term.frameLocation = currentCallFrameSize;
michael@0 603 currentCallFrameSize += YarrStackSpaceForBackTrackInfoBackReference;
michael@0 604 alternative->m_hasFixedSize = false;
michael@0 605 break;
michael@0 606
michael@0 607 case PatternTerm::TypeForwardReference:
michael@0 608 break;
michael@0 609
michael@0 610 case PatternTerm::TypePatternCharacter:
michael@0 611 if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
michael@0 612 return RuntimeError;
michael@0 613 if (term.quantityType != QuantifierFixedCount) {
michael@0 614 term.frameLocation = currentCallFrameSize;
michael@0 615 currentCallFrameSize += YarrStackSpaceForBackTrackInfoPatternCharacter;
michael@0 616 alternative->m_hasFixedSize = false;
michael@0 617 } else
michael@0 618 currentInputPosition += term.quantityCount;
michael@0 619 break;
michael@0 620
michael@0 621 case PatternTerm::TypeCharacterClass:
michael@0 622 if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
michael@0 623 return RuntimeError;
michael@0 624 if (term.quantityType != QuantifierFixedCount) {
michael@0 625 term.frameLocation = currentCallFrameSize;
michael@0 626 currentCallFrameSize += YarrStackSpaceForBackTrackInfoCharacterClass;
michael@0 627 alternative->m_hasFixedSize = false;
michael@0 628 } else
michael@0 629 currentInputPosition += term.quantityCount;
michael@0 630 break;
michael@0 631
michael@0 632 case PatternTerm::TypeParenthesesSubpattern:
michael@0 633 // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
michael@0 634 term.frameLocation = currentCallFrameSize;
michael@0 635 unsigned position;
michael@0 636 if (currentInputPosition.safeGet(position))
michael@0 637 return RuntimeError;
michael@0 638 if (term.quantityCount == 1 && !term.parentheses.isCopy) {
michael@0 639 if (term.quantityType != QuantifierFixedCount)
michael@0 640 currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesOnce;
michael@0 641 if (ErrorCode error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, position, &currentCallFrameSize))
michael@0 642 return error;
michael@0 643 // If quantity is fixed, then pre-check its minimum size.
michael@0 644 if (term.quantityType == QuantifierFixedCount)
michael@0 645 currentInputPosition += term.parentheses.disjunction->m_minimumSize;
michael@0 646 if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
michael@0 647 return RuntimeError;
michael@0 648 } else if (term.parentheses.isTerminal) {
michael@0 649 currentCallFrameSize += YarrStackSpaceForBackTrackInfoParenthesesTerminal;
michael@0 650 if (ErrorCode error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, position, &currentCallFrameSize))
michael@0 651 return error;
michael@0 652 if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
michael@0 653 return RuntimeError;
michael@0 654 } else {
michael@0 655 if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
michael@0 656 return RuntimeError;
michael@0 657 unsigned dummy;
michael@0 658 if (ErrorCode error = setupDisjunctionOffsets(term.parentheses.disjunction, BASE_FRAME_SIZE, position, &dummy))
michael@0 659 return error;
michael@0 660 currentCallFrameSize += YarrStackSpaceForBackTrackInfoParentheses;
michael@0 661 }
michael@0 662 // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
michael@0 663 alternative->m_hasFixedSize = false;
michael@0 664 break;
michael@0 665
michael@0 666 case PatternTerm::TypeParentheticalAssertion:
michael@0 667 if (Checked<int>(currentInputPosition).safeGet(term.inputPosition))
michael@0 668 return RuntimeError;
michael@0 669 term.frameLocation = currentCallFrameSize;
michael@0 670 if (ErrorCode error = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + YarrStackSpaceForBackTrackInfoParentheticalAssertion, term.inputPosition, &currentCallFrameSize))
michael@0 671 return error;
michael@0 672 break;
michael@0 673
michael@0 674 case PatternTerm::TypeDotStarEnclosure:
michael@0 675 alternative->m_hasFixedSize = false;
michael@0 676 term.inputPosition = initialInputPosition;
michael@0 677 break;
michael@0 678 }
michael@0 679 }
michael@0 680
michael@0 681 if ((currentInputPosition - initialInputPosition).safeGet(alternative->m_minimumSize))
michael@0 682 return RuntimeError;
michael@0 683 *callFrameSizeOut = currentCallFrameSize;
michael@0 684 return NoError;
michael@0 685 }
michael@0 686
michael@0 687 ErrorCode setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition, unsigned *maximumCallFrameSizeOut)
michael@0 688 {
michael@0 689 if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
michael@0 690 initialCallFrameSize += YarrStackSpaceForBackTrackInfoAlternative;
michael@0 691
michael@0 692 unsigned minimumInputSize = UINT_MAX;
michael@0 693 unsigned maximumCallFrameSize = 0;
michael@0 694 bool hasFixedSize = true;
michael@0 695
michael@0 696 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
michael@0 697 PatternAlternative* alternative = disjunction->m_alternatives[alt];
michael@0 698 unsigned currentAlternativeCallFrameSize;
michael@0 699 if (ErrorCode error = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition, &currentAlternativeCallFrameSize))
michael@0 700 return error;
michael@0 701 minimumInputSize = std::min(minimumInputSize, alternative->m_minimumSize);
michael@0 702 maximumCallFrameSize = std::max(maximumCallFrameSize, currentAlternativeCallFrameSize);
michael@0 703 hasFixedSize &= alternative->m_hasFixedSize;
michael@0 704 }
michael@0 705
michael@0 706 ASSERT(minimumInputSize != UINT_MAX);
michael@0 707 if (minimumInputSize == UINT_MAX)
michael@0 708 return PatternTooLarge;
michael@0 709
michael@0 710 ASSERT(minimumInputSize != UINT_MAX);
michael@0 711 ASSERT(maximumCallFrameSize >= initialCallFrameSize);
michael@0 712
michael@0 713 disjunction->m_hasFixedSize = hasFixedSize;
michael@0 714 disjunction->m_minimumSize = minimumInputSize;
michael@0 715 disjunction->m_callFrameSize = maximumCallFrameSize;
michael@0 716 *maximumCallFrameSizeOut = maximumCallFrameSize;
michael@0 717 return NoError;
michael@0 718 }
michael@0 719
michael@0 720 ErrorCode setupOffsets()
michael@0 721 {
michael@0 722 unsigned dummy;
michael@0 723 return setupDisjunctionOffsets(m_pattern.m_body, BASE_FRAME_SIZE, 0, &dummy);
michael@0 724 }
michael@0 725
michael@0 726 // This optimization identifies sets of parentheses that we will never need to backtrack.
michael@0 727 // In these cases we do not need to store state from prior iterations.
michael@0 728 // We can presently avoid backtracking for:
michael@0 729 // * where the parens are at the end of the regular expression (last term in any of the
michael@0 730 // alternatives of the main body disjunction).
michael@0 731 // * where the parens are non-capturing, and quantified unbounded greedy (*).
michael@0 732 // * where the parens do not contain any capturing subpatterns.
michael@0 733 void checkForTerminalParentheses()
michael@0 734 {
michael@0 735 // This check is much too crude; should be just checking whether the candidate
michael@0 736 // node contains nested capturing subpatterns, not the whole expression!
michael@0 737 if (m_pattern.m_numSubpatterns)
michael@0 738 return;
michael@0 739
michael@0 740 Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
michael@0 741 for (size_t i = 0; i < alternatives.size(); ++i) {
michael@0 742 Vector<PatternTerm>& terms = alternatives[i]->m_terms;
michael@0 743 if (terms.size()) {
michael@0 744 PatternTerm& term = terms.last();
michael@0 745 if (term.type == PatternTerm::TypeParenthesesSubpattern
michael@0 746 && term.quantityType == QuantifierGreedy
michael@0 747 && term.quantityCount == quantifyInfinite
michael@0 748 && !term.capture())
michael@0 749 term.parentheses.isTerminal = true;
michael@0 750 }
michael@0 751 }
michael@0 752 }
michael@0 753
michael@0 754 void optimizeBOL()
michael@0 755 {
michael@0 756 // Look for expressions containing beginning of line (^) anchoring and unroll them.
michael@0 757 // e.g. /^a|^b|c/ becomes /^a|^b|c/ which is executed once followed by /c/ which loops
michael@0 758 // This code relies on the parsing code tagging alternatives with m_containsBOL and
michael@0 759 // m_startsWithBOL and rolling those up to containing alternatives.
michael@0 760 // At this point, this is only valid for non-multiline expressions.
michael@0 761 PatternDisjunction* disjunction = m_pattern.m_body;
michael@0 762
michael@0 763 if (!m_pattern.m_containsBOL || m_pattern.m_multiline)
michael@0 764 return;
michael@0 765
michael@0 766 PatternDisjunction* loopDisjunction = copyDisjunction(disjunction, true);
michael@0 767
michael@0 768 // Set alternatives in disjunction to "onceThrough"
michael@0 769 for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt)
michael@0 770 disjunction->m_alternatives[alt]->setOnceThrough();
michael@0 771
michael@0 772 if (loopDisjunction) {
michael@0 773 // Move alternatives from loopDisjunction to disjunction
michael@0 774 for (unsigned alt = 0; alt < loopDisjunction->m_alternatives.size(); ++alt)
michael@0 775 disjunction->m_alternatives.append(loopDisjunction->m_alternatives[alt]);
michael@0 776
michael@0 777 loopDisjunction->m_alternatives.clear();
michael@0 778 }
michael@0 779 }
michael@0 780
michael@0 781 bool containsCapturingTerms(PatternAlternative* alternative, size_t firstTermIndex, size_t lastTermIndex)
michael@0 782 {
michael@0 783 Vector<PatternTerm>& terms = alternative->m_terms;
michael@0 784
michael@0 785 for (size_t termIndex = firstTermIndex; termIndex <= lastTermIndex; ++termIndex) {
michael@0 786 PatternTerm& term = terms[termIndex];
michael@0 787
michael@0 788 if (term.m_capture)
michael@0 789 return true;
michael@0 790
michael@0 791 if (term.type == PatternTerm::TypeParenthesesSubpattern) {
michael@0 792 PatternDisjunction* nestedDisjunction = term.parentheses.disjunction;
michael@0 793 for (unsigned alt = 0; alt < nestedDisjunction->m_alternatives.size(); ++alt) {
michael@0 794 PatternAlternative *pattern = nestedDisjunction->m_alternatives[alt];
michael@0 795 if (pattern->m_terms.size() == 0)
michael@0 796 continue;
michael@0 797 if (containsCapturingTerms(pattern, 0, pattern->m_terms.size() - 1))
michael@0 798 return true;
michael@0 799 }
michael@0 800 }
michael@0 801 }
michael@0 802
michael@0 803 return false;
michael@0 804 }
michael@0 805
michael@0 806 // This optimization identifies alternatives in the form of
michael@0 807 // [^].*[?]<expression>.*[$] for expressions that don't have any
michael@0 808 // capturing terms. The alternative is changed to <expression>
michael@0 809 // followed by processing of the dot stars to find and adjust the
michael@0 810 // beginning and the end of the match.
michael@0 811 void optimizeDotStarWrappedExpressions()
michael@0 812 {
michael@0 813 Vector<PatternAlternative*>& alternatives = m_pattern.m_body->m_alternatives;
michael@0 814 if (alternatives.size() != 1)
michael@0 815 return;
michael@0 816
michael@0 817 PatternAlternative* alternative = alternatives[0];
michael@0 818 Vector<PatternTerm>& terms = alternative->m_terms;
michael@0 819 if (terms.size() >= 3) {
michael@0 820 bool startsWithBOL = false;
michael@0 821 bool endsWithEOL = false;
michael@0 822 size_t termIndex, firstExpressionTerm, lastExpressionTerm;
michael@0 823
michael@0 824 termIndex = 0;
michael@0 825 if (terms[termIndex].type == PatternTerm::TypeAssertionBOL) {
michael@0 826 startsWithBOL = true;
michael@0 827 ++termIndex;
michael@0 828 }
michael@0 829
michael@0 830 PatternTerm& firstNonAnchorTerm = terms[termIndex];
michael@0 831 if ((firstNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (firstNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || !((firstNonAnchorTerm.quantityType == QuantifierGreedy) || (firstNonAnchorTerm.quantityType == QuantifierNonGreedy)))
michael@0 832 return;
michael@0 833
michael@0 834 firstExpressionTerm = termIndex + 1;
michael@0 835
michael@0 836 termIndex = terms.size() - 1;
michael@0 837 if (terms[termIndex].type == PatternTerm::TypeAssertionEOL) {
michael@0 838 endsWithEOL = true;
michael@0 839 --termIndex;
michael@0 840 }
michael@0 841
michael@0 842 PatternTerm& lastNonAnchorTerm = terms[termIndex];
michael@0 843 if ((lastNonAnchorTerm.type != PatternTerm::TypeCharacterClass) || (lastNonAnchorTerm.characterClass != m_pattern.newlineCharacterClass()) || (lastNonAnchorTerm.quantityType != QuantifierGreedy))
michael@0 844 return;
michael@0 845
michael@0 846 lastExpressionTerm = termIndex - 1;
michael@0 847
michael@0 848 if (firstExpressionTerm > lastExpressionTerm)
michael@0 849 return;
michael@0 850
michael@0 851 if (!containsCapturingTerms(alternative, firstExpressionTerm, lastExpressionTerm)) {
michael@0 852 for (termIndex = terms.size() - 1; termIndex > lastExpressionTerm; --termIndex)
michael@0 853 terms.remove(termIndex);
michael@0 854
michael@0 855 for (termIndex = firstExpressionTerm; termIndex > 0; --termIndex)
michael@0 856 terms.remove(termIndex - 1);
michael@0 857
michael@0 858 terms.append(PatternTerm(startsWithBOL, endsWithEOL));
michael@0 859
michael@0 860 m_pattern.m_containsBOL = false;
michael@0 861 }
michael@0 862 }
michael@0 863 }
michael@0 864
michael@0 865 void setStackBase(uint8_t *stackBase) {
michael@0 866 m_stackBase = stackBase;
michael@0 867 }
michael@0 868
michael@0 869 private:
michael@0 870 YarrPattern& m_pattern;
michael@0 871 uint8_t * m_stackBase;
michael@0 872 PatternAlternative* m_alternative;
michael@0 873 CharacterClassConstructor m_characterClassConstructor;
michael@0 874 bool m_invertCharacterClass;
michael@0 875 bool m_invertParentheticalAssertion;
michael@0 876 };
michael@0 877
michael@0 878 ErrorCode YarrPattern::compile(const String& patternString)
michael@0 879 {
michael@0 880 YarrPatternConstructor constructor(*this);
michael@0 881
michael@0 882 if (ErrorCode error = parse(constructor, patternString))
michael@0 883 return error;
michael@0 884
michael@0 885 // If the pattern contains illegal backreferences reset & reparse.
michael@0 886 // Quoting Netscape's "What's new in JavaScript 1.2",
michael@0 887 // "Note: if the number of left parentheses is less than the number specified
michael@0 888 // in \#, the \# is taken as an octal escape as described in the next row."
michael@0 889 if (containsIllegalBackReference()) {
michael@0 890 unsigned numSubpatterns = m_numSubpatterns;
michael@0 891
michael@0 892 constructor.reset();
michael@0 893 #if !ASSERT_DISABLED
michael@0 894 ErrorCode error =
michael@0 895 #endif
michael@0 896 parse(constructor, patternString, numSubpatterns);
michael@0 897
michael@0 898 ASSERT(!error);
michael@0 899 ASSERT(numSubpatterns == m_numSubpatterns);
michael@0 900 }
michael@0 901
michael@0 902 uint8_t stackDummy_;
michael@0 903 constructor.setStackBase(&stackDummy_);
michael@0 904
michael@0 905 constructor.checkForTerminalParentheses();
michael@0 906 constructor.optimizeDotStarWrappedExpressions();
michael@0 907 constructor.optimizeBOL();
michael@0 908
michael@0 909 if (ErrorCode error = constructor.setupOffsets())
michael@0 910 return error;
michael@0 911
michael@0 912 return NoError;
michael@0 913 }
michael@0 914
michael@0 915 YarrPattern::YarrPattern(const String& pattern, bool ignoreCase, bool multiline, ErrorCode* error)
michael@0 916 : m_ignoreCase(ignoreCase)
michael@0 917 , m_multiline(multiline)
michael@0 918 , m_containsBackreferences(false)
michael@0 919 , m_containsBOL(false)
michael@0 920 , m_numSubpatterns(0)
michael@0 921 , m_maxBackReference(0)
michael@0 922 , newlineCached(0)
michael@0 923 , digitsCached(0)
michael@0 924 , spacesCached(0)
michael@0 925 , wordcharCached(0)
michael@0 926 , nondigitsCached(0)
michael@0 927 , nonspacesCached(0)
michael@0 928 , nonwordcharCached(0)
michael@0 929 {
michael@0 930 *error = compile(pattern);
michael@0 931 }
michael@0 932
michael@0 933 } }

mercurial