Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
michael@0 | 2 | * vim: set ts=8 sts=4 et sw=4 tw=99: |
michael@0 | 3 | * |
michael@0 | 4 | * Copyright (C) 2009 Apple Inc. All rights reserved. |
michael@0 | 5 | * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged |
michael@0 | 6 | * |
michael@0 | 7 | * Redistribution and use in source and binary forms, with or without |
michael@0 | 8 | * modification, are permitted provided that the following conditions |
michael@0 | 9 | * are met: |
michael@0 | 10 | * 1. Redistributions of source code must retain the above copyright |
michael@0 | 11 | * notice, this list of conditions and the following disclaimer. |
michael@0 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
michael@0 | 13 | * notice, this list of conditions and the following disclaimer in the |
michael@0 | 14 | * documentation and/or other materials provided with the distribution. |
michael@0 | 15 | * |
michael@0 | 16 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
michael@0 | 17 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
michael@0 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
michael@0 | 19 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
michael@0 | 20 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
michael@0 | 21 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
michael@0 | 22 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
michael@0 | 23 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
michael@0 | 24 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
michael@0 | 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
michael@0 | 26 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 27 | */ |
michael@0 | 28 | |
michael@0 | 29 | #include "yarr/YarrInterpreter.h" |
michael@0 | 30 | |
michael@0 | 31 | #include "jscntxt.h" |
michael@0 | 32 | |
michael@0 | 33 | #include "yarr/Yarr.h" |
michael@0 | 34 | #include "yarr/YarrCanonicalizeUCS2.h" |
michael@0 | 35 | #include "yarr/BumpPointerAllocator.h" |
michael@0 | 36 | |
michael@0 | 37 | using namespace WTF; |
michael@0 | 38 | |
michael@0 | 39 | namespace JSC { namespace Yarr { |
michael@0 | 40 | |
michael@0 | 41 | template<typename CharType> |
michael@0 | 42 | class Interpreter { |
michael@0 | 43 | public: |
michael@0 | 44 | struct ParenthesesDisjunctionContext; |
michael@0 | 45 | |
michael@0 | 46 | struct BackTrackInfoPatternCharacter { |
michael@0 | 47 | uintptr_t matchAmount; |
michael@0 | 48 | }; |
michael@0 | 49 | struct BackTrackInfoCharacterClass { |
michael@0 | 50 | uintptr_t matchAmount; |
michael@0 | 51 | }; |
michael@0 | 52 | struct BackTrackInfoBackReference { |
michael@0 | 53 | uintptr_t begin; // Not really needed for greedy quantifiers. |
michael@0 | 54 | uintptr_t matchAmount; // Not really needed for fixed quantifiers. |
michael@0 | 55 | }; |
michael@0 | 56 | struct BackTrackInfoAlternative { |
michael@0 | 57 | uintptr_t offset; |
michael@0 | 58 | }; |
michael@0 | 59 | struct BackTrackInfoParentheticalAssertion { |
michael@0 | 60 | uintptr_t begin; |
michael@0 | 61 | }; |
michael@0 | 62 | struct BackTrackInfoParenthesesOnce { |
michael@0 | 63 | uintptr_t begin; |
michael@0 | 64 | }; |
michael@0 | 65 | struct BackTrackInfoParenthesesTerminal { |
michael@0 | 66 | uintptr_t begin; |
michael@0 | 67 | }; |
michael@0 | 68 | struct BackTrackInfoParentheses { |
michael@0 | 69 | uintptr_t matchAmount; |
michael@0 | 70 | ParenthesesDisjunctionContext* lastContext; |
michael@0 | 71 | }; |
michael@0 | 72 | |
michael@0 | 73 | static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context) |
michael@0 | 74 | { |
michael@0 | 75 | context->next = backTrack->lastContext; |
michael@0 | 76 | backTrack->lastContext = context; |
michael@0 | 77 | ++backTrack->matchAmount; |
michael@0 | 78 | } |
michael@0 | 79 | |
michael@0 | 80 | static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack) |
michael@0 | 81 | { |
michael@0 | 82 | ASSERT(backTrack->matchAmount); |
michael@0 | 83 | ASSERT(backTrack->lastContext); |
michael@0 | 84 | backTrack->lastContext = backTrack->lastContext->next; |
michael@0 | 85 | --backTrack->matchAmount; |
michael@0 | 86 | } |
michael@0 | 87 | |
michael@0 | 88 | struct DisjunctionContext |
michael@0 | 89 | { |
michael@0 | 90 | DisjunctionContext() |
michael@0 | 91 | : term(0) |
michael@0 | 92 | { |
michael@0 | 93 | } |
michael@0 | 94 | |
michael@0 | 95 | void* operator new(size_t, void* where) |
michael@0 | 96 | { |
michael@0 | 97 | return where; |
michael@0 | 98 | } |
michael@0 | 99 | |
michael@0 | 100 | int term; |
michael@0 | 101 | unsigned matchBegin; |
michael@0 | 102 | unsigned matchEnd; |
michael@0 | 103 | uintptr_t frame[1]; |
michael@0 | 104 | }; |
michael@0 | 105 | |
michael@0 | 106 | DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction) |
michael@0 | 107 | { |
michael@0 | 108 | size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); |
michael@0 | 109 | allocatorPool = allocatorPool->ensureCapacity(size); |
michael@0 | 110 | if (!allocatorPool) |
michael@0 | 111 | CRASH(); |
michael@0 | 112 | return new (allocatorPool->alloc(size)) DisjunctionContext(); |
michael@0 | 113 | } |
michael@0 | 114 | |
michael@0 | 115 | void freeDisjunctionContext(DisjunctionContext* context) |
michael@0 | 116 | { |
michael@0 | 117 | allocatorPool = allocatorPool->dealloc(context); |
michael@0 | 118 | } |
michael@0 | 119 | |
michael@0 | 120 | struct ParenthesesDisjunctionContext |
michael@0 | 121 | { |
michael@0 | 122 | ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term) |
michael@0 | 123 | : next(0) |
michael@0 | 124 | { |
michael@0 | 125 | unsigned firstSubpatternId = term.atom.subpatternId; |
michael@0 | 126 | unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns; |
michael@0 | 127 | |
michael@0 | 128 | for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) { |
michael@0 | 129 | subpatternBackup[i] = output[(firstSubpatternId << 1) + i]; |
michael@0 | 130 | output[(firstSubpatternId << 1) + i] = offsetNoMatch; |
michael@0 | 131 | } |
michael@0 | 132 | |
michael@0 | 133 | new (getDisjunctionContext(term)) DisjunctionContext(); |
michael@0 | 134 | } |
michael@0 | 135 | |
michael@0 | 136 | void* operator new(size_t, void* where) |
michael@0 | 137 | { |
michael@0 | 138 | return where; |
michael@0 | 139 | } |
michael@0 | 140 | |
michael@0 | 141 | void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns) |
michael@0 | 142 | { |
michael@0 | 143 | for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) |
michael@0 | 144 | output[(firstSubpatternId << 1) + i] = subpatternBackup[i]; |
michael@0 | 145 | } |
michael@0 | 146 | |
michael@0 | 147 | DisjunctionContext* getDisjunctionContext(ByteTerm& term) |
michael@0 | 148 | { |
michael@0 | 149 | return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1])); |
michael@0 | 150 | } |
michael@0 | 151 | |
michael@0 | 152 | ParenthesesDisjunctionContext* next; |
michael@0 | 153 | unsigned subpatternBackup[1]; |
michael@0 | 154 | }; |
michael@0 | 155 | |
michael@0 | 156 | ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term) |
michael@0 | 157 | { |
michael@0 | 158 | size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(unsigned) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t); |
michael@0 | 159 | size = JS_ROUNDUP(size, JS_ALIGNMENT_OF(ParenthesesDisjunctionContext)); |
michael@0 | 160 | allocatorPool = allocatorPool->ensureCapacity(size); |
michael@0 | 161 | if (!allocatorPool) |
michael@0 | 162 | CRASH(); |
michael@0 | 163 | return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term); |
michael@0 | 164 | } |
michael@0 | 165 | |
michael@0 | 166 | void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context) |
michael@0 | 167 | { |
michael@0 | 168 | allocatorPool = allocatorPool->dealloc(context); |
michael@0 | 169 | } |
michael@0 | 170 | |
michael@0 | 171 | class InputStream { |
michael@0 | 172 | public: |
michael@0 | 173 | InputStream(const CharType* input, unsigned start, unsigned length) |
michael@0 | 174 | : input(input) |
michael@0 | 175 | , pos(start) |
michael@0 | 176 | , length(length) |
michael@0 | 177 | { |
michael@0 | 178 | } |
michael@0 | 179 | |
michael@0 | 180 | void next() |
michael@0 | 181 | { |
michael@0 | 182 | ++pos; |
michael@0 | 183 | } |
michael@0 | 184 | |
michael@0 | 185 | void rewind(unsigned amount) |
michael@0 | 186 | { |
michael@0 | 187 | ASSERT(pos >= amount); |
michael@0 | 188 | pos -= amount; |
michael@0 | 189 | } |
michael@0 | 190 | |
michael@0 | 191 | int read() |
michael@0 | 192 | { |
michael@0 | 193 | ASSERT(pos < length); |
michael@0 | 194 | if (pos < length) |
michael@0 | 195 | return input[pos]; |
michael@0 | 196 | return -1; |
michael@0 | 197 | } |
michael@0 | 198 | |
michael@0 | 199 | int readPair() |
michael@0 | 200 | { |
michael@0 | 201 | ASSERT(pos + 1 < length); |
michael@0 | 202 | return input[pos] | input[pos + 1] << 16; |
michael@0 | 203 | } |
michael@0 | 204 | |
michael@0 | 205 | int readChecked(unsigned negativePositionOffest) |
michael@0 | 206 | { |
michael@0 | 207 | if (pos < negativePositionOffest) |
michael@0 | 208 | CRASH(); |
michael@0 | 209 | unsigned p = pos - negativePositionOffest; |
michael@0 | 210 | ASSERT(p < length); |
michael@0 | 211 | return input[p]; |
michael@0 | 212 | } |
michael@0 | 213 | |
michael@0 | 214 | int reread(unsigned from) |
michael@0 | 215 | { |
michael@0 | 216 | ASSERT(from < length); |
michael@0 | 217 | return input[from]; |
michael@0 | 218 | } |
michael@0 | 219 | |
michael@0 | 220 | int prev() |
michael@0 | 221 | { |
michael@0 | 222 | ASSERT(!(pos > length)); |
michael@0 | 223 | if (pos && length) |
michael@0 | 224 | return input[pos - 1]; |
michael@0 | 225 | return -1; |
michael@0 | 226 | } |
michael@0 | 227 | |
michael@0 | 228 | unsigned getPos() |
michael@0 | 229 | { |
michael@0 | 230 | return pos; |
michael@0 | 231 | } |
michael@0 | 232 | |
michael@0 | 233 | void setPos(unsigned p) |
michael@0 | 234 | { |
michael@0 | 235 | pos = p; |
michael@0 | 236 | } |
michael@0 | 237 | |
michael@0 | 238 | bool atStart() |
michael@0 | 239 | { |
michael@0 | 240 | return pos == 0; |
michael@0 | 241 | } |
michael@0 | 242 | |
michael@0 | 243 | bool atEnd() |
michael@0 | 244 | { |
michael@0 | 245 | return pos == length; |
michael@0 | 246 | } |
michael@0 | 247 | |
michael@0 | 248 | unsigned end() |
michael@0 | 249 | { |
michael@0 | 250 | return length; |
michael@0 | 251 | } |
michael@0 | 252 | |
michael@0 | 253 | bool checkInput(unsigned count) |
michael@0 | 254 | { |
michael@0 | 255 | if (((pos + count) <= length) && ((pos + count) >= pos)) { |
michael@0 | 256 | pos += count; |
michael@0 | 257 | return true; |
michael@0 | 258 | } |
michael@0 | 259 | return false; |
michael@0 | 260 | } |
michael@0 | 261 | |
michael@0 | 262 | void uncheckInput(unsigned count) |
michael@0 | 263 | { |
michael@0 | 264 | if (pos < count) |
michael@0 | 265 | CRASH(); |
michael@0 | 266 | pos -= count; |
michael@0 | 267 | } |
michael@0 | 268 | |
michael@0 | 269 | bool atStart(unsigned negativePositionOffest) |
michael@0 | 270 | { |
michael@0 | 271 | return pos == negativePositionOffest; |
michael@0 | 272 | } |
michael@0 | 273 | |
michael@0 | 274 | bool atEnd(unsigned negativePositionOffest) |
michael@0 | 275 | { |
michael@0 | 276 | if (pos < negativePositionOffest) |
michael@0 | 277 | CRASH(); |
michael@0 | 278 | return (pos - negativePositionOffest) == length; |
michael@0 | 279 | } |
michael@0 | 280 | |
michael@0 | 281 | bool isAvailableInput(unsigned offset) |
michael@0 | 282 | { |
michael@0 | 283 | return (((pos + offset) <= length) && ((pos + offset) >= pos)); |
michael@0 | 284 | } |
michael@0 | 285 | |
michael@0 | 286 | private: |
michael@0 | 287 | const CharType* input; |
michael@0 | 288 | unsigned pos; |
michael@0 | 289 | unsigned length; |
michael@0 | 290 | }; |
michael@0 | 291 | |
michael@0 | 292 | bool testCharacterClass(CharacterClass* characterClass, int ch) |
michael@0 | 293 | { |
michael@0 | 294 | if (ch & 0xFF80) { |
michael@0 | 295 | for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) |
michael@0 | 296 | if (ch == characterClass->m_matchesUnicode[i]) |
michael@0 | 297 | return true; |
michael@0 | 298 | for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i) |
michael@0 | 299 | if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end)) |
michael@0 | 300 | return true; |
michael@0 | 301 | } else { |
michael@0 | 302 | for (unsigned i = 0; i < characterClass->m_matches.size(); ++i) |
michael@0 | 303 | if (ch == characterClass->m_matches[i]) |
michael@0 | 304 | return true; |
michael@0 | 305 | for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i) |
michael@0 | 306 | if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end)) |
michael@0 | 307 | return true; |
michael@0 | 308 | } |
michael@0 | 309 | |
michael@0 | 310 | return false; |
michael@0 | 311 | } |
michael@0 | 312 | |
michael@0 | 313 | bool checkCharacter(int testChar, unsigned negativeInputOffset) |
michael@0 | 314 | { |
michael@0 | 315 | return testChar == input.readChecked(negativeInputOffset); |
michael@0 | 316 | } |
michael@0 | 317 | |
michael@0 | 318 | bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset) |
michael@0 | 319 | { |
michael@0 | 320 | int ch = input.readChecked(negativeInputOffset); |
michael@0 | 321 | return (loChar == ch) || (hiChar == ch); |
michael@0 | 322 | } |
michael@0 | 323 | |
michael@0 | 324 | bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset) |
michael@0 | 325 | { |
michael@0 | 326 | bool match = testCharacterClass(characterClass, input.readChecked(negativeInputOffset)); |
michael@0 | 327 | return invert ? !match : match; |
michael@0 | 328 | } |
michael@0 | 329 | |
michael@0 | 330 | bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset) |
michael@0 | 331 | { |
michael@0 | 332 | unsigned matchSize = (unsigned)(matchEnd - matchBegin); |
michael@0 | 333 | |
michael@0 | 334 | if (!input.checkInput(matchSize)) |
michael@0 | 335 | return false; |
michael@0 | 336 | |
michael@0 | 337 | if (pattern->m_ignoreCase) { |
michael@0 | 338 | for (unsigned i = 0; i < matchSize; ++i) { |
michael@0 | 339 | int oldCh = input.reread(matchBegin + i); |
michael@0 | 340 | int ch = input.readChecked(negativeInputOffset + matchSize - i); |
michael@0 | 341 | |
michael@0 | 342 | if (oldCh == ch) |
michael@0 | 343 | continue; |
michael@0 | 344 | |
michael@0 | 345 | // The definition for canonicalize (see ES 5.1, 15.10.2.8) means that |
michael@0 | 346 | // unicode values are never allowed to match against ascii ones. |
michael@0 | 347 | if (isASCII(oldCh) || isASCII(ch)) { |
michael@0 | 348 | if (toASCIIUpper(oldCh) == toASCIIUpper(ch)) |
michael@0 | 349 | continue; |
michael@0 | 350 | } else if (areCanonicallyEquivalent(oldCh, ch)) |
michael@0 | 351 | continue; |
michael@0 | 352 | |
michael@0 | 353 | input.uncheckInput(matchSize); |
michael@0 | 354 | return false; |
michael@0 | 355 | } |
michael@0 | 356 | } else { |
michael@0 | 357 | for (unsigned i = 0; i < matchSize; ++i) { |
michael@0 | 358 | if (!checkCharacter(input.reread(matchBegin + i), negativeInputOffset + matchSize - i)) { |
michael@0 | 359 | input.uncheckInput(matchSize); |
michael@0 | 360 | return false; |
michael@0 | 361 | } |
michael@0 | 362 | } |
michael@0 | 363 | } |
michael@0 | 364 | |
michael@0 | 365 | return true; |
michael@0 | 366 | } |
michael@0 | 367 | |
michael@0 | 368 | bool matchAssertionBOL(ByteTerm& term) |
michael@0 | 369 | { |
michael@0 | 370 | return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition + 1))); |
michael@0 | 371 | } |
michael@0 | 372 | |
michael@0 | 373 | bool matchAssertionEOL(ByteTerm& term) |
michael@0 | 374 | { |
michael@0 | 375 | if (term.inputPosition) |
michael@0 | 376 | return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition))); |
michael@0 | 377 | |
michael@0 | 378 | return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read())); |
michael@0 | 379 | } |
michael@0 | 380 | |
michael@0 | 381 | bool matchAssertionWordBoundary(ByteTerm& term) |
michael@0 | 382 | { |
michael@0 | 383 | bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition + 1)); |
michael@0 | 384 | bool readIsWordchar; |
michael@0 | 385 | if (term.inputPosition) |
michael@0 | 386 | readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition)); |
michael@0 | 387 | else |
michael@0 | 388 | readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read()); |
michael@0 | 389 | |
michael@0 | 390 | bool wordBoundary = prevIsWordchar != readIsWordchar; |
michael@0 | 391 | return term.invert() ? !wordBoundary : wordBoundary; |
michael@0 | 392 | } |
michael@0 | 393 | |
michael@0 | 394 | bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 395 | { |
michael@0 | 396 | BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
michael@0 | 397 | |
michael@0 | 398 | switch (term.atom.quantityType) { |
michael@0 | 399 | case QuantifierFixedCount: |
michael@0 | 400 | break; |
michael@0 | 401 | |
michael@0 | 402 | case QuantifierGreedy: |
michael@0 | 403 | if (backTrack->matchAmount) { |
michael@0 | 404 | --backTrack->matchAmount; |
michael@0 | 405 | input.uncheckInput(1); |
michael@0 | 406 | return true; |
michael@0 | 407 | } |
michael@0 | 408 | break; |
michael@0 | 409 | |
michael@0 | 410 | case QuantifierNonGreedy: |
michael@0 | 411 | if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
michael@0 | 412 | ++backTrack->matchAmount; |
michael@0 | 413 | if (checkCharacter(term.atom.patternCharacter, term.inputPosition + 1)) |
michael@0 | 414 | return true; |
michael@0 | 415 | } |
michael@0 | 416 | input.uncheckInput(backTrack->matchAmount); |
michael@0 | 417 | break; |
michael@0 | 418 | } |
michael@0 | 419 | |
michael@0 | 420 | return false; |
michael@0 | 421 | } |
michael@0 | 422 | |
michael@0 | 423 | bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 424 | { |
michael@0 | 425 | BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
michael@0 | 426 | |
michael@0 | 427 | switch (term.atom.quantityType) { |
michael@0 | 428 | case QuantifierFixedCount: |
michael@0 | 429 | break; |
michael@0 | 430 | |
michael@0 | 431 | case QuantifierGreedy: |
michael@0 | 432 | if (backTrack->matchAmount) { |
michael@0 | 433 | --backTrack->matchAmount; |
michael@0 | 434 | input.uncheckInput(1); |
michael@0 | 435 | return true; |
michael@0 | 436 | } |
michael@0 | 437 | break; |
michael@0 | 438 | |
michael@0 | 439 | case QuantifierNonGreedy: |
michael@0 | 440 | if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
michael@0 | 441 | ++backTrack->matchAmount; |
michael@0 | 442 | if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition + 1)) |
michael@0 | 443 | return true; |
michael@0 | 444 | } |
michael@0 | 445 | input.uncheckInput(backTrack->matchAmount); |
michael@0 | 446 | break; |
michael@0 | 447 | } |
michael@0 | 448 | |
michael@0 | 449 | return false; |
michael@0 | 450 | } |
michael@0 | 451 | |
michael@0 | 452 | bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 453 | { |
michael@0 | 454 | ASSERT(term.type == ByteTerm::TypeCharacterClass); |
michael@0 | 455 | BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
michael@0 | 456 | |
michael@0 | 457 | switch (term.atom.quantityType) { |
michael@0 | 458 | case QuantifierFixedCount: { |
michael@0 | 459 | for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { |
michael@0 | 460 | if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - matchAmount)) |
michael@0 | 461 | return false; |
michael@0 | 462 | } |
michael@0 | 463 | return true; |
michael@0 | 464 | } |
michael@0 | 465 | |
michael@0 | 466 | case QuantifierGreedy: { |
michael@0 | 467 | unsigned matchAmount = 0; |
michael@0 | 468 | while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
michael@0 | 469 | if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) { |
michael@0 | 470 | input.uncheckInput(1); |
michael@0 | 471 | break; |
michael@0 | 472 | } |
michael@0 | 473 | ++matchAmount; |
michael@0 | 474 | } |
michael@0 | 475 | backTrack->matchAmount = matchAmount; |
michael@0 | 476 | |
michael@0 | 477 | return true; |
michael@0 | 478 | } |
michael@0 | 479 | |
michael@0 | 480 | case QuantifierNonGreedy: |
michael@0 | 481 | backTrack->matchAmount = 0; |
michael@0 | 482 | return true; |
michael@0 | 483 | } |
michael@0 | 484 | |
michael@0 | 485 | ASSERT_NOT_REACHED(); |
michael@0 | 486 | return false; |
michael@0 | 487 | } |
michael@0 | 488 | |
michael@0 | 489 | bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 490 | { |
michael@0 | 491 | ASSERT(term.type == ByteTerm::TypeCharacterClass); |
michael@0 | 492 | BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); |
michael@0 | 493 | |
michael@0 | 494 | switch (term.atom.quantityType) { |
michael@0 | 495 | case QuantifierFixedCount: |
michael@0 | 496 | break; |
michael@0 | 497 | |
michael@0 | 498 | case QuantifierGreedy: |
michael@0 | 499 | if (backTrack->matchAmount) { |
michael@0 | 500 | --backTrack->matchAmount; |
michael@0 | 501 | input.uncheckInput(1); |
michael@0 | 502 | return true; |
michael@0 | 503 | } |
michael@0 | 504 | break; |
michael@0 | 505 | |
michael@0 | 506 | case QuantifierNonGreedy: |
michael@0 | 507 | if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) { |
michael@0 | 508 | ++backTrack->matchAmount; |
michael@0 | 509 | if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + 1)) |
michael@0 | 510 | return true; |
michael@0 | 511 | } |
michael@0 | 512 | input.uncheckInput(backTrack->matchAmount); |
michael@0 | 513 | break; |
michael@0 | 514 | } |
michael@0 | 515 | |
michael@0 | 516 | return false; |
michael@0 | 517 | } |
michael@0 | 518 | |
michael@0 | 519 | bool matchBackReference(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 520 | { |
michael@0 | 521 | ASSERT(term.type == ByteTerm::TypeBackReference); |
michael@0 | 522 | BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); |
michael@0 | 523 | |
michael@0 | 524 | unsigned matchBegin = output[(term.atom.subpatternId << 1)]; |
michael@0 | 525 | unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1]; |
michael@0 | 526 | |
michael@0 | 527 | // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that. |
michael@0 | 528 | // In this case the result of match is empty string like when it references to a parentheses with zero-width match. |
michael@0 | 529 | // Eg.: /(a\1)/ |
michael@0 | 530 | if (matchEnd == offsetNoMatch) |
michael@0 | 531 | return true; |
michael@0 | 532 | |
michael@0 | 533 | if (matchBegin == offsetNoMatch) |
michael@0 | 534 | return true; |
michael@0 | 535 | |
michael@0 | 536 | ASSERT(matchBegin <= matchEnd); |
michael@0 | 537 | |
michael@0 | 538 | if (matchBegin == matchEnd) |
michael@0 | 539 | return true; |
michael@0 | 540 | |
michael@0 | 541 | switch (term.atom.quantityType) { |
michael@0 | 542 | case QuantifierFixedCount: { |
michael@0 | 543 | backTrack->begin = input.getPos(); |
michael@0 | 544 | for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { |
michael@0 | 545 | if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { |
michael@0 | 546 | input.setPos(backTrack->begin); |
michael@0 | 547 | return false; |
michael@0 | 548 | } |
michael@0 | 549 | } |
michael@0 | 550 | return true; |
michael@0 | 551 | } |
michael@0 | 552 | |
michael@0 | 553 | case QuantifierGreedy: { |
michael@0 | 554 | unsigned matchAmount = 0; |
michael@0 | 555 | while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) |
michael@0 | 556 | ++matchAmount; |
michael@0 | 557 | backTrack->matchAmount = matchAmount; |
michael@0 | 558 | return true; |
michael@0 | 559 | } |
michael@0 | 560 | |
michael@0 | 561 | case QuantifierNonGreedy: |
michael@0 | 562 | backTrack->begin = input.getPos(); |
michael@0 | 563 | backTrack->matchAmount = 0; |
michael@0 | 564 | return true; |
michael@0 | 565 | } |
michael@0 | 566 | |
michael@0 | 567 | ASSERT_NOT_REACHED(); |
michael@0 | 568 | return false; |
michael@0 | 569 | } |
michael@0 | 570 | |
michael@0 | 571 | bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 572 | { |
michael@0 | 573 | ASSERT(term.type == ByteTerm::TypeBackReference); |
michael@0 | 574 | BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); |
michael@0 | 575 | |
michael@0 | 576 | unsigned matchBegin = output[(term.atom.subpatternId << 1)]; |
michael@0 | 577 | unsigned matchEnd = output[(term.atom.subpatternId << 1) + 1]; |
michael@0 | 578 | |
michael@0 | 579 | if (matchBegin == offsetNoMatch) |
michael@0 | 580 | return false; |
michael@0 | 581 | |
michael@0 | 582 | ASSERT(matchBegin <= matchEnd); |
michael@0 | 583 | |
michael@0 | 584 | if (matchBegin == matchEnd) |
michael@0 | 585 | return false; |
michael@0 | 586 | |
michael@0 | 587 | switch (term.atom.quantityType) { |
michael@0 | 588 | case QuantifierFixedCount: |
michael@0 | 589 | // for quantityCount == 1, could rewind. |
michael@0 | 590 | input.setPos(backTrack->begin); |
michael@0 | 591 | break; |
michael@0 | 592 | |
michael@0 | 593 | case QuantifierGreedy: |
michael@0 | 594 | if (backTrack->matchAmount) { |
michael@0 | 595 | --backTrack->matchAmount; |
michael@0 | 596 | input.rewind(matchEnd - matchBegin); |
michael@0 | 597 | return true; |
michael@0 | 598 | } |
michael@0 | 599 | break; |
michael@0 | 600 | |
michael@0 | 601 | case QuantifierNonGreedy: |
michael@0 | 602 | if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) { |
michael@0 | 603 | ++backTrack->matchAmount; |
michael@0 | 604 | return true; |
michael@0 | 605 | } |
michael@0 | 606 | input.setPos(backTrack->begin); |
michael@0 | 607 | break; |
michael@0 | 608 | } |
michael@0 | 609 | |
michael@0 | 610 | return false; |
michael@0 | 611 | } |
michael@0 | 612 | |
michael@0 | 613 | void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context) |
michael@0 | 614 | { |
michael@0 | 615 | if (term.capture()) { |
michael@0 | 616 | unsigned subpatternId = term.atom.subpatternId; |
michael@0 | 617 | output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; |
michael@0 | 618 | output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; |
michael@0 | 619 | } |
michael@0 | 620 | } |
michael@0 | 621 | void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) |
michael@0 | 622 | { |
michael@0 | 623 | unsigned firstSubpatternId = term.atom.subpatternId; |
michael@0 | 624 | unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; |
michael@0 | 625 | context->restoreOutput(output, firstSubpatternId, count); |
michael@0 | 626 | } |
michael@0 | 627 | JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) |
michael@0 | 628 | { |
michael@0 | 629 | while (backTrack->matchAmount) { |
michael@0 | 630 | ParenthesesDisjunctionContext* context = backTrack->lastContext; |
michael@0 | 631 | |
michael@0 | 632 | JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true); |
michael@0 | 633 | if (result == JSRegExpMatch) |
michael@0 | 634 | return JSRegExpMatch; |
michael@0 | 635 | |
michael@0 | 636 | resetMatches(term, context); |
michael@0 | 637 | popParenthesesDisjunctionContext(backTrack); |
michael@0 | 638 | freeParenthesesDisjunctionContext(context); |
michael@0 | 639 | |
michael@0 | 640 | if (result != JSRegExpNoMatch) |
michael@0 | 641 | return result; |
michael@0 | 642 | } |
michael@0 | 643 | |
michael@0 | 644 | return JSRegExpNoMatch; |
michael@0 | 645 | } |
michael@0 | 646 | |
michael@0 | 647 | bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 648 | { |
michael@0 | 649 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
michael@0 | 650 | ASSERT(term.atom.quantityCount == 1); |
michael@0 | 651 | |
michael@0 | 652 | BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
michael@0 | 653 | |
michael@0 | 654 | switch (term.atom.quantityType) { |
michael@0 | 655 | case QuantifierGreedy: { |
michael@0 | 656 | // set this speculatively; if we get to the parens end this will be true. |
michael@0 | 657 | backTrack->begin = input.getPos(); |
michael@0 | 658 | break; |
michael@0 | 659 | } |
michael@0 | 660 | case QuantifierNonGreedy: { |
michael@0 | 661 | backTrack->begin = notFound; |
michael@0 | 662 | context->term += term.atom.parenthesesWidth; |
michael@0 | 663 | return true; |
michael@0 | 664 | } |
michael@0 | 665 | case QuantifierFixedCount: |
michael@0 | 666 | break; |
michael@0 | 667 | } |
michael@0 | 668 | |
michael@0 | 669 | if (term.capture()) { |
michael@0 | 670 | unsigned subpatternId = term.atom.subpatternId; |
michael@0 | 671 | output[(subpatternId << 1)] = input.getPos() - term.inputPosition; |
michael@0 | 672 | } |
michael@0 | 673 | |
michael@0 | 674 | return true; |
michael@0 | 675 | } |
michael@0 | 676 | |
michael@0 | 677 | bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 678 | { |
michael@0 | 679 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); |
michael@0 | 680 | ASSERT(term.atom.quantityCount == 1); |
michael@0 | 681 | |
michael@0 | 682 | if (term.capture()) { |
michael@0 | 683 | unsigned subpatternId = term.atom.subpatternId; |
michael@0 | 684 | output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; |
michael@0 | 685 | } |
michael@0 | 686 | |
michael@0 | 687 | if (term.atom.quantityType == QuantifierFixedCount) |
michael@0 | 688 | return true; |
michael@0 | 689 | |
michael@0 | 690 | BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
michael@0 | 691 | return backTrack->begin != input.getPos(); |
michael@0 | 692 | } |
michael@0 | 693 | |
michael@0 | 694 | bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 695 | { |
michael@0 | 696 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
michael@0 | 697 | ASSERT(term.atom.quantityCount == 1); |
michael@0 | 698 | |
michael@0 | 699 | BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
michael@0 | 700 | |
michael@0 | 701 | if (term.capture()) { |
michael@0 | 702 | unsigned subpatternId = term.atom.subpatternId; |
michael@0 | 703 | output[(subpatternId << 1)] = offsetNoMatch; |
michael@0 | 704 | output[(subpatternId << 1) + 1] = offsetNoMatch; |
michael@0 | 705 | } |
michael@0 | 706 | |
michael@0 | 707 | switch (term.atom.quantityType) { |
michael@0 | 708 | case QuantifierGreedy: |
michael@0 | 709 | // if we backtrack to this point, there is another chance - try matching nothing. |
michael@0 | 710 | ASSERT(backTrack->begin != notFound); |
michael@0 | 711 | backTrack->begin = notFound; |
michael@0 | 712 | context->term += term.atom.parenthesesWidth; |
michael@0 | 713 | return true; |
michael@0 | 714 | case QuantifierNonGreedy: |
michael@0 | 715 | ASSERT(backTrack->begin != notFound); |
michael@0 | 716 | case QuantifierFixedCount: |
michael@0 | 717 | break; |
michael@0 | 718 | } |
michael@0 | 719 | |
michael@0 | 720 | return false; |
michael@0 | 721 | } |
michael@0 | 722 | |
michael@0 | 723 | bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 724 | { |
michael@0 | 725 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); |
michael@0 | 726 | ASSERT(term.atom.quantityCount == 1); |
michael@0 | 727 | |
michael@0 | 728 | BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); |
michael@0 | 729 | |
michael@0 | 730 | switch (term.atom.quantityType) { |
michael@0 | 731 | case QuantifierGreedy: |
michael@0 | 732 | if (backTrack->begin == notFound) { |
michael@0 | 733 | context->term -= term.atom.parenthesesWidth; |
michael@0 | 734 | return false; |
michael@0 | 735 | } |
michael@0 | 736 | case QuantifierNonGreedy: |
michael@0 | 737 | if (backTrack->begin == notFound) { |
michael@0 | 738 | backTrack->begin = input.getPos(); |
michael@0 | 739 | if (term.capture()) { |
michael@0 | 740 | // Technically this access to inputPosition should be accessing the begin term's |
michael@0 | 741 | // inputPosition, but for repeats other than fixed these values should be |
michael@0 | 742 | // the same anyway! (We don't pre-check for greedy or non-greedy matches.) |
michael@0 | 743 | ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
michael@0 | 744 | |
michael@0 | 745 | // Disabled, see bug 808478 |
michael@0 | 746 | #if 0 |
michael@0 | 747 | ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition); |
michael@0 | 748 | #endif |
michael@0 | 749 | unsigned subpatternId = term.atom.subpatternId; |
michael@0 | 750 | output[subpatternId << 1] = input.getPos() + term.inputPosition; |
michael@0 | 751 | } |
michael@0 | 752 | context->term -= term.atom.parenthesesWidth; |
michael@0 | 753 | return true; |
michael@0 | 754 | } |
michael@0 | 755 | case QuantifierFixedCount: |
michael@0 | 756 | break; |
michael@0 | 757 | } |
michael@0 | 758 | |
michael@0 | 759 | return false; |
michael@0 | 760 | } |
michael@0 | 761 | |
michael@0 | 762 | bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 763 | { |
michael@0 | 764 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); |
michael@0 | 765 | ASSERT(term.atom.quantityType == QuantifierGreedy); |
michael@0 | 766 | ASSERT(term.atom.quantityCount == quantifyInfinite); |
michael@0 | 767 | ASSERT(!term.capture()); |
michael@0 | 768 | |
michael@0 | 769 | BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); |
michael@0 | 770 | backTrack->begin = input.getPos(); |
michael@0 | 771 | return true; |
michael@0 | 772 | } |
michael@0 | 773 | |
michael@0 | 774 | bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 775 | { |
michael@0 | 776 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd); |
michael@0 | 777 | |
michael@0 | 778 | BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation); |
michael@0 | 779 | // Empty match is a failed match. |
michael@0 | 780 | if (backTrack->begin == input.getPos()) |
michael@0 | 781 | return false; |
michael@0 | 782 | |
michael@0 | 783 | // Successful match! Okay, what's next? - loop around and try to match moar! |
michael@0 | 784 | context->term -= (term.atom.parenthesesWidth + 1); |
michael@0 | 785 | return true; |
michael@0 | 786 | } |
michael@0 | 787 | |
michael@0 | 788 | bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 789 | { |
michael@0 | 790 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); |
michael@0 | 791 | ASSERT(term.atom.quantityType == QuantifierGreedy); |
michael@0 | 792 | ASSERT(term.atom.quantityCount == quantifyInfinite); |
michael@0 | 793 | ASSERT(!term.capture()); |
michael@0 | 794 | |
michael@0 | 795 | // If we backtrack to this point, we have failed to match this iteration of the parens. |
michael@0 | 796 | // Since this is greedy / zero minimum a failed is also accepted as a match! |
michael@0 | 797 | context->term += term.atom.parenthesesWidth; |
michael@0 | 798 | return true; |
michael@0 | 799 | } |
michael@0 | 800 | |
michael@0 | 801 | bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*) |
michael@0 | 802 | { |
michael@0 | 803 | // 'Terminal' parentheses are at the end of the regex, and as such a match past end |
michael@0 | 804 | // should always be returned as a successful match - we should never backtrack to here. |
michael@0 | 805 | ASSERT_NOT_REACHED(); |
michael@0 | 806 | return false; |
michael@0 | 807 | } |
michael@0 | 808 | |
michael@0 | 809 | bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 810 | { |
michael@0 | 811 | ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); |
michael@0 | 812 | ASSERT(term.atom.quantityCount == 1); |
michael@0 | 813 | |
michael@0 | 814 | BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
michael@0 | 815 | |
michael@0 | 816 | backTrack->begin = input.getPos(); |
michael@0 | 817 | return true; |
michael@0 | 818 | } |
michael@0 | 819 | |
michael@0 | 820 | bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 821 | { |
michael@0 | 822 | ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); |
michael@0 | 823 | ASSERT(term.atom.quantityCount == 1); |
michael@0 | 824 | |
michael@0 | 825 | BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
michael@0 | 826 | |
michael@0 | 827 | input.setPos(backTrack->begin); |
michael@0 | 828 | |
michael@0 | 829 | // We've reached the end of the parens; if they are inverted, this is failure. |
michael@0 | 830 | if (term.invert()) { |
michael@0 | 831 | context->term -= term.atom.parenthesesWidth; |
michael@0 | 832 | return false; |
michael@0 | 833 | } |
michael@0 | 834 | |
michael@0 | 835 | return true; |
michael@0 | 836 | } |
michael@0 | 837 | |
michael@0 | 838 | bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 839 | { |
michael@0 | 840 | ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); |
michael@0 | 841 | ASSERT(term.atom.quantityCount == 1); |
michael@0 | 842 | |
michael@0 | 843 | // We've failed to match parens; if they are inverted, this is win! |
michael@0 | 844 | if (term.invert()) { |
michael@0 | 845 | context->term += term.atom.parenthesesWidth; |
michael@0 | 846 | return true; |
michael@0 | 847 | } |
michael@0 | 848 | |
michael@0 | 849 | return false; |
michael@0 | 850 | } |
michael@0 | 851 | |
michael@0 | 852 | bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 853 | { |
michael@0 | 854 | ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); |
michael@0 | 855 | ASSERT(term.atom.quantityCount == 1); |
michael@0 | 856 | |
michael@0 | 857 | BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); |
michael@0 | 858 | |
michael@0 | 859 | input.setPos(backTrack->begin); |
michael@0 | 860 | |
michael@0 | 861 | context->term -= term.atom.parenthesesWidth; |
michael@0 | 862 | return false; |
michael@0 | 863 | } |
michael@0 | 864 | |
michael@0 | 865 | JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 866 | { |
michael@0 | 867 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); |
michael@0 | 868 | |
michael@0 | 869 | BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); |
michael@0 | 870 | ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; |
michael@0 | 871 | |
michael@0 | 872 | backTrack->matchAmount = 0; |
michael@0 | 873 | backTrack->lastContext = 0; |
michael@0 | 874 | |
michael@0 | 875 | switch (term.atom.quantityType) { |
michael@0 | 876 | case QuantifierFixedCount: { |
michael@0 | 877 | // While we haven't yet reached our fixed limit, |
michael@0 | 878 | while (backTrack->matchAmount < term.atom.quantityCount) { |
michael@0 | 879 | // Try to do a match, and it it succeeds, add it to the list. |
michael@0 | 880 | ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
michael@0 | 881 | JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
michael@0 | 882 | if (result == JSRegExpMatch) |
michael@0 | 883 | appendParenthesesDisjunctionContext(backTrack, context); |
michael@0 | 884 | else { |
michael@0 | 885 | // The match failed; try to find an alternate point to carry on from. |
michael@0 | 886 | resetMatches(term, context); |
michael@0 | 887 | freeParenthesesDisjunctionContext(context); |
michael@0 | 888 | |
michael@0 | 889 | if (result != JSRegExpNoMatch) |
michael@0 | 890 | return result; |
michael@0 | 891 | JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); |
michael@0 | 892 | if (backtrackResult != JSRegExpMatch) |
michael@0 | 893 | return backtrackResult; |
michael@0 | 894 | } |
michael@0 | 895 | } |
michael@0 | 896 | |
michael@0 | 897 | ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
michael@0 | 898 | ParenthesesDisjunctionContext* context = backTrack->lastContext; |
michael@0 | 899 | recordParenthesesMatch(term, context); |
michael@0 | 900 | return JSRegExpMatch; |
michael@0 | 901 | } |
michael@0 | 902 | |
michael@0 | 903 | case QuantifierGreedy: { |
michael@0 | 904 | while (backTrack->matchAmount < term.atom.quantityCount) { |
michael@0 | 905 | ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
michael@0 | 906 | JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
michael@0 | 907 | if (result == JSRegExpMatch) |
michael@0 | 908 | appendParenthesesDisjunctionContext(backTrack, context); |
michael@0 | 909 | else { |
michael@0 | 910 | resetMatches(term, context); |
michael@0 | 911 | freeParenthesesDisjunctionContext(context); |
michael@0 | 912 | |
michael@0 | 913 | if (result != JSRegExpNoMatch) |
michael@0 | 914 | return result; |
michael@0 | 915 | |
michael@0 | 916 | break; |
michael@0 | 917 | } |
michael@0 | 918 | } |
michael@0 | 919 | |
michael@0 | 920 | if (backTrack->matchAmount) { |
michael@0 | 921 | ParenthesesDisjunctionContext* context = backTrack->lastContext; |
michael@0 | 922 | recordParenthesesMatch(term, context); |
michael@0 | 923 | } |
michael@0 | 924 | return JSRegExpMatch; |
michael@0 | 925 | } |
michael@0 | 926 | |
michael@0 | 927 | case QuantifierNonGreedy: |
michael@0 | 928 | return JSRegExpMatch; |
michael@0 | 929 | } |
michael@0 | 930 | |
michael@0 | 931 | ASSERT_NOT_REACHED(); |
michael@0 | 932 | return JSRegExpErrorNoMatch; |
michael@0 | 933 | } |
michael@0 | 934 | |
michael@0 | 935 | // Rules for backtracking differ depending on whether this is greedy or non-greedy. |
michael@0 | 936 | // |
michael@0 | 937 | // Greedy matches never should try just adding more - you should already have done |
michael@0 | 938 | // the 'more' cases. Always backtrack, at least a leetle bit. However cases where |
michael@0 | 939 | // you backtrack an item off the list needs checking, since we'll never have matched |
michael@0 | 940 | // the one less case. Tracking forwards, still add as much as possible. |
michael@0 | 941 | // |
michael@0 | 942 | // Non-greedy, we've already done the one less case, so don't match on popping. |
michael@0 | 943 | // We haven't done the one more case, so always try to add that. |
michael@0 | 944 | // |
michael@0 | 945 | JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 946 | { |
michael@0 | 947 | ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); |
michael@0 | 948 | |
michael@0 | 949 | BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); |
michael@0 | 950 | ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; |
michael@0 | 951 | |
michael@0 | 952 | switch (term.atom.quantityType) { |
michael@0 | 953 | case QuantifierFixedCount: { |
michael@0 | 954 | ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
michael@0 | 955 | |
michael@0 | 956 | ParenthesesDisjunctionContext* context = 0; |
michael@0 | 957 | JSRegExpResult result = parenthesesDoBacktrack(term, backTrack); |
michael@0 | 958 | |
michael@0 | 959 | if (result != JSRegExpMatch) |
michael@0 | 960 | return result; |
michael@0 | 961 | |
michael@0 | 962 | // While we haven't yet reached our fixed limit, |
michael@0 | 963 | while (backTrack->matchAmount < term.atom.quantityCount) { |
michael@0 | 964 | // Try to do a match, and it it succeeds, add it to the list. |
michael@0 | 965 | context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
michael@0 | 966 | result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
michael@0 | 967 | |
michael@0 | 968 | if (result == JSRegExpMatch) |
michael@0 | 969 | appendParenthesesDisjunctionContext(backTrack, context); |
michael@0 | 970 | else { |
michael@0 | 971 | // The match failed; try to find an alternate point to carry on from. |
michael@0 | 972 | resetMatches(term, context); |
michael@0 | 973 | freeParenthesesDisjunctionContext(context); |
michael@0 | 974 | |
michael@0 | 975 | if (result != JSRegExpNoMatch) |
michael@0 | 976 | return result; |
michael@0 | 977 | JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack); |
michael@0 | 978 | if (backtrackResult != JSRegExpMatch) |
michael@0 | 979 | return backtrackResult; |
michael@0 | 980 | } |
michael@0 | 981 | } |
michael@0 | 982 | |
michael@0 | 983 | ASSERT(backTrack->matchAmount == term.atom.quantityCount); |
michael@0 | 984 | context = backTrack->lastContext; |
michael@0 | 985 | recordParenthesesMatch(term, context); |
michael@0 | 986 | return JSRegExpMatch; |
michael@0 | 987 | } |
michael@0 | 988 | |
michael@0 | 989 | case QuantifierGreedy: { |
michael@0 | 990 | if (!backTrack->matchAmount) |
michael@0 | 991 | return JSRegExpNoMatch; |
michael@0 | 992 | |
michael@0 | 993 | ParenthesesDisjunctionContext* context = backTrack->lastContext; |
michael@0 | 994 | JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); |
michael@0 | 995 | if (result == JSRegExpMatch) { |
michael@0 | 996 | while (backTrack->matchAmount < term.atom.quantityCount) { |
michael@0 | 997 | ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
michael@0 | 998 | JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
michael@0 | 999 | if (parenthesesResult == JSRegExpMatch) |
michael@0 | 1000 | appendParenthesesDisjunctionContext(backTrack, context); |
michael@0 | 1001 | else { |
michael@0 | 1002 | resetMatches(term, context); |
michael@0 | 1003 | freeParenthesesDisjunctionContext(context); |
michael@0 | 1004 | |
michael@0 | 1005 | if (parenthesesResult != JSRegExpNoMatch) |
michael@0 | 1006 | return parenthesesResult; |
michael@0 | 1007 | |
michael@0 | 1008 | break; |
michael@0 | 1009 | } |
michael@0 | 1010 | } |
michael@0 | 1011 | } else { |
michael@0 | 1012 | // Avoid a topcrash before it occurs. |
michael@0 | 1013 | if (!backTrack->lastContext) { |
michael@0 | 1014 | ASSERT(!"Tripped Bug 856796!"); |
michael@0 | 1015 | return JSRegExpErrorInternal; |
michael@0 | 1016 | } |
michael@0 | 1017 | |
michael@0 | 1018 | resetMatches(term, context); |
michael@0 | 1019 | popParenthesesDisjunctionContext(backTrack); |
michael@0 | 1020 | freeParenthesesDisjunctionContext(context); |
michael@0 | 1021 | |
michael@0 | 1022 | if (result != JSRegExpNoMatch) |
michael@0 | 1023 | return result; |
michael@0 | 1024 | } |
michael@0 | 1025 | |
michael@0 | 1026 | if (backTrack->matchAmount) { |
michael@0 | 1027 | ParenthesesDisjunctionContext* context = backTrack->lastContext; |
michael@0 | 1028 | recordParenthesesMatch(term, context); |
michael@0 | 1029 | } |
michael@0 | 1030 | return JSRegExpMatch; |
michael@0 | 1031 | } |
michael@0 | 1032 | |
michael@0 | 1033 | case QuantifierNonGreedy: { |
michael@0 | 1034 | // If we've not reached the limit, try to add one more match. |
michael@0 | 1035 | if (backTrack->matchAmount < term.atom.quantityCount) { |
michael@0 | 1036 | ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term); |
michael@0 | 1037 | JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)); |
michael@0 | 1038 | if (result == JSRegExpMatch) { |
michael@0 | 1039 | appendParenthesesDisjunctionContext(backTrack, context); |
michael@0 | 1040 | recordParenthesesMatch(term, context); |
michael@0 | 1041 | return JSRegExpMatch; |
michael@0 | 1042 | } |
michael@0 | 1043 | |
michael@0 | 1044 | resetMatches(term, context); |
michael@0 | 1045 | freeParenthesesDisjunctionContext(context); |
michael@0 | 1046 | |
michael@0 | 1047 | if (result != JSRegExpNoMatch) |
michael@0 | 1048 | return result; |
michael@0 | 1049 | } |
michael@0 | 1050 | |
michael@0 | 1051 | // Nope - okay backtrack looking for an alternative. |
michael@0 | 1052 | while (backTrack->matchAmount) { |
michael@0 | 1053 | ParenthesesDisjunctionContext* context = backTrack->lastContext; |
michael@0 | 1054 | JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true); |
michael@0 | 1055 | if (result == JSRegExpMatch) { |
michael@0 | 1056 | // successful backtrack! we're back in the game! |
michael@0 | 1057 | if (backTrack->matchAmount) { |
michael@0 | 1058 | context = backTrack->lastContext; |
michael@0 | 1059 | recordParenthesesMatch(term, context); |
michael@0 | 1060 | } |
michael@0 | 1061 | return JSRegExpMatch; |
michael@0 | 1062 | } |
michael@0 | 1063 | |
michael@0 | 1064 | // Avoid a topcrash before it occurs. |
michael@0 | 1065 | if (!backTrack->lastContext) { |
michael@0 | 1066 | ASSERT(!"Tripped Bug 856796!"); |
michael@0 | 1067 | return JSRegExpErrorInternal; |
michael@0 | 1068 | } |
michael@0 | 1069 | |
michael@0 | 1070 | // pop a match off the stack |
michael@0 | 1071 | resetMatches(term, context); |
michael@0 | 1072 | popParenthesesDisjunctionContext(backTrack); |
michael@0 | 1073 | freeParenthesesDisjunctionContext(context); |
michael@0 | 1074 | |
michael@0 | 1075 | if (result != JSRegExpNoMatch) |
michael@0 | 1076 | return result; |
michael@0 | 1077 | } |
michael@0 | 1078 | |
michael@0 | 1079 | return JSRegExpNoMatch; |
michael@0 | 1080 | } |
michael@0 | 1081 | } |
michael@0 | 1082 | |
michael@0 | 1083 | ASSERT_NOT_REACHED(); |
michael@0 | 1084 | return JSRegExpErrorNoMatch; |
michael@0 | 1085 | } |
michael@0 | 1086 | |
michael@0 | 1087 | bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context) |
michael@0 | 1088 | { |
michael@0 | 1089 | UNUSED_PARAM(term); |
michael@0 | 1090 | unsigned matchBegin = context->matchBegin; |
michael@0 | 1091 | |
michael@0 | 1092 | if (matchBegin) { |
michael@0 | 1093 | for (matchBegin--; true; matchBegin--) { |
michael@0 | 1094 | if (testCharacterClass(pattern->newlineCharacterClass, input.reread(matchBegin))) { |
michael@0 | 1095 | ++matchBegin; |
michael@0 | 1096 | break; |
michael@0 | 1097 | } |
michael@0 | 1098 | |
michael@0 | 1099 | if (!matchBegin) |
michael@0 | 1100 | break; |
michael@0 | 1101 | } |
michael@0 | 1102 | } |
michael@0 | 1103 | |
michael@0 | 1104 | unsigned matchEnd = input.getPos(); |
michael@0 | 1105 | |
michael@0 | 1106 | for (; (matchEnd != input.end()) |
michael@0 | 1107 | && (!testCharacterClass(pattern->newlineCharacterClass, input.reread(matchEnd))); matchEnd++) { } |
michael@0 | 1108 | |
michael@0 | 1109 | if (((matchBegin && term.anchors.m_bol) |
michael@0 | 1110 | || ((matchEnd != input.end()) && term.anchors.m_eol)) |
michael@0 | 1111 | && !pattern->m_multiline) |
michael@0 | 1112 | return false; |
michael@0 | 1113 | |
michael@0 | 1114 | context->matchBegin = matchBegin; |
michael@0 | 1115 | context->matchEnd = matchEnd; |
michael@0 | 1116 | return true; |
michael@0 | 1117 | } |
michael@0 | 1118 | |
michael@0 | 1119 | #define MATCH_NEXT() { ++context->term; goto matchAgain; } |
michael@0 | 1120 | #define BACKTRACK() { --context->term; goto backtrack; } |
michael@0 | 1121 | #define currentTerm() (disjunction->terms[context->term]) |
michael@0 | 1122 | JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) |
michael@0 | 1123 | { |
michael@0 | 1124 | if (!--remainingMatchCount) |
michael@0 | 1125 | return JSRegExpErrorHitLimit; |
michael@0 | 1126 | |
michael@0 | 1127 | if (btrack) |
michael@0 | 1128 | BACKTRACK(); |
michael@0 | 1129 | |
michael@0 | 1130 | context->matchBegin = input.getPos(); |
michael@0 | 1131 | context->term = 0; |
michael@0 | 1132 | |
michael@0 | 1133 | matchAgain: |
michael@0 | 1134 | ASSERT(context->term < static_cast<int>(disjunction->terms.size())); |
michael@0 | 1135 | |
michael@0 | 1136 | // Prevent jank resulting from getting stuck in Yarr for a long time. |
michael@0 | 1137 | if (!CheckForInterrupt(this->cx)) |
michael@0 | 1138 | return JSRegExpErrorInternal; |
michael@0 | 1139 | |
michael@0 | 1140 | switch (currentTerm().type) { |
michael@0 | 1141 | case ByteTerm::TypeSubpatternBegin: |
michael@0 | 1142 | MATCH_NEXT(); |
michael@0 | 1143 | case ByteTerm::TypeSubpatternEnd: |
michael@0 | 1144 | context->matchEnd = input.getPos(); |
michael@0 | 1145 | return JSRegExpMatch; |
michael@0 | 1146 | |
michael@0 | 1147 | case ByteTerm::TypeBodyAlternativeBegin: |
michael@0 | 1148 | MATCH_NEXT(); |
michael@0 | 1149 | case ByteTerm::TypeBodyAlternativeDisjunction: |
michael@0 | 1150 | case ByteTerm::TypeBodyAlternativeEnd: |
michael@0 | 1151 | context->matchEnd = input.getPos(); |
michael@0 | 1152 | return JSRegExpMatch; |
michael@0 | 1153 | |
michael@0 | 1154 | case ByteTerm::TypeAlternativeBegin: |
michael@0 | 1155 | MATCH_NEXT(); |
michael@0 | 1156 | case ByteTerm::TypeAlternativeDisjunction: |
michael@0 | 1157 | case ByteTerm::TypeAlternativeEnd: { |
michael@0 | 1158 | int offset = currentTerm().alternative.end; |
michael@0 | 1159 | BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); |
michael@0 | 1160 | backTrack->offset = offset; |
michael@0 | 1161 | context->term += offset; |
michael@0 | 1162 | MATCH_NEXT(); |
michael@0 | 1163 | } |
michael@0 | 1164 | |
michael@0 | 1165 | case ByteTerm::TypeAssertionBOL: |
michael@0 | 1166 | if (matchAssertionBOL(currentTerm())) |
michael@0 | 1167 | MATCH_NEXT(); |
michael@0 | 1168 | BACKTRACK(); |
michael@0 | 1169 | case ByteTerm::TypeAssertionEOL: |
michael@0 | 1170 | if (matchAssertionEOL(currentTerm())) |
michael@0 | 1171 | MATCH_NEXT(); |
michael@0 | 1172 | BACKTRACK(); |
michael@0 | 1173 | case ByteTerm::TypeAssertionWordBoundary: |
michael@0 | 1174 | if (matchAssertionWordBoundary(currentTerm())) |
michael@0 | 1175 | MATCH_NEXT(); |
michael@0 | 1176 | BACKTRACK(); |
michael@0 | 1177 | |
michael@0 | 1178 | case ByteTerm::TypePatternCharacterOnce: |
michael@0 | 1179 | case ByteTerm::TypePatternCharacterFixed: { |
michael@0 | 1180 | for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { |
michael@0 | 1181 | if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) |
michael@0 | 1182 | BACKTRACK(); |
michael@0 | 1183 | } |
michael@0 | 1184 | MATCH_NEXT(); |
michael@0 | 1185 | } |
michael@0 | 1186 | case ByteTerm::TypePatternCharacterGreedy: { |
michael@0 | 1187 | BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
michael@0 | 1188 | unsigned matchAmount = 0; |
michael@0 | 1189 | while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { |
michael@0 | 1190 | if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + 1)) { |
michael@0 | 1191 | input.uncheckInput(1); |
michael@0 | 1192 | break; |
michael@0 | 1193 | } |
michael@0 | 1194 | ++matchAmount; |
michael@0 | 1195 | } |
michael@0 | 1196 | backTrack->matchAmount = matchAmount; |
michael@0 | 1197 | |
michael@0 | 1198 | MATCH_NEXT(); |
michael@0 | 1199 | } |
michael@0 | 1200 | case ByteTerm::TypePatternCharacterNonGreedy: { |
michael@0 | 1201 | BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
michael@0 | 1202 | backTrack->matchAmount = 0; |
michael@0 | 1203 | MATCH_NEXT(); |
michael@0 | 1204 | } |
michael@0 | 1205 | |
michael@0 | 1206 | case ByteTerm::TypePatternCasedCharacterOnce: |
michael@0 | 1207 | case ByteTerm::TypePatternCasedCharacterFixed: { |
michael@0 | 1208 | for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { |
michael@0 | 1209 | if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) |
michael@0 | 1210 | BACKTRACK(); |
michael@0 | 1211 | } |
michael@0 | 1212 | MATCH_NEXT(); |
michael@0 | 1213 | } |
michael@0 | 1214 | case ByteTerm::TypePatternCasedCharacterGreedy: { |
michael@0 | 1215 | BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
michael@0 | 1216 | unsigned matchAmount = 0; |
michael@0 | 1217 | while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) { |
michael@0 | 1218 | if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + 1)) { |
michael@0 | 1219 | input.uncheckInput(1); |
michael@0 | 1220 | break; |
michael@0 | 1221 | } |
michael@0 | 1222 | ++matchAmount; |
michael@0 | 1223 | } |
michael@0 | 1224 | backTrack->matchAmount = matchAmount; |
michael@0 | 1225 | |
michael@0 | 1226 | MATCH_NEXT(); |
michael@0 | 1227 | } |
michael@0 | 1228 | case ByteTerm::TypePatternCasedCharacterNonGreedy: { |
michael@0 | 1229 | BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); |
michael@0 | 1230 | backTrack->matchAmount = 0; |
michael@0 | 1231 | MATCH_NEXT(); |
michael@0 | 1232 | } |
michael@0 | 1233 | |
michael@0 | 1234 | case ByteTerm::TypeCharacterClass: |
michael@0 | 1235 | if (matchCharacterClass(currentTerm(), context)) |
michael@0 | 1236 | MATCH_NEXT(); |
michael@0 | 1237 | BACKTRACK(); |
michael@0 | 1238 | case ByteTerm::TypeBackReference: |
michael@0 | 1239 | if (matchBackReference(currentTerm(), context)) |
michael@0 | 1240 | MATCH_NEXT(); |
michael@0 | 1241 | BACKTRACK(); |
michael@0 | 1242 | case ByteTerm::TypeParenthesesSubpattern: { |
michael@0 | 1243 | JSRegExpResult result = matchParentheses(currentTerm(), context); |
michael@0 | 1244 | |
michael@0 | 1245 | if (result == JSRegExpMatch) { |
michael@0 | 1246 | MATCH_NEXT(); |
michael@0 | 1247 | } else if (result != JSRegExpNoMatch) |
michael@0 | 1248 | return result; |
michael@0 | 1249 | |
michael@0 | 1250 | BACKTRACK(); |
michael@0 | 1251 | } |
michael@0 | 1252 | case ByteTerm::TypeParenthesesSubpatternOnceBegin: |
michael@0 | 1253 | if (matchParenthesesOnceBegin(currentTerm(), context)) |
michael@0 | 1254 | MATCH_NEXT(); |
michael@0 | 1255 | BACKTRACK(); |
michael@0 | 1256 | case ByteTerm::TypeParenthesesSubpatternOnceEnd: |
michael@0 | 1257 | if (matchParenthesesOnceEnd(currentTerm(), context)) |
michael@0 | 1258 | MATCH_NEXT(); |
michael@0 | 1259 | BACKTRACK(); |
michael@0 | 1260 | case ByteTerm::TypeParenthesesSubpatternTerminalBegin: |
michael@0 | 1261 | if (matchParenthesesTerminalBegin(currentTerm(), context)) |
michael@0 | 1262 | MATCH_NEXT(); |
michael@0 | 1263 | BACKTRACK(); |
michael@0 | 1264 | case ByteTerm::TypeParenthesesSubpatternTerminalEnd: |
michael@0 | 1265 | if (matchParenthesesTerminalEnd(currentTerm(), context)) |
michael@0 | 1266 | MATCH_NEXT(); |
michael@0 | 1267 | BACKTRACK(); |
michael@0 | 1268 | case ByteTerm::TypeParentheticalAssertionBegin: |
michael@0 | 1269 | if (matchParentheticalAssertionBegin(currentTerm(), context)) |
michael@0 | 1270 | MATCH_NEXT(); |
michael@0 | 1271 | BACKTRACK(); |
michael@0 | 1272 | case ByteTerm::TypeParentheticalAssertionEnd: |
michael@0 | 1273 | if (matchParentheticalAssertionEnd(currentTerm(), context)) |
michael@0 | 1274 | MATCH_NEXT(); |
michael@0 | 1275 | BACKTRACK(); |
michael@0 | 1276 | |
michael@0 | 1277 | case ByteTerm::TypeCheckInput: |
michael@0 | 1278 | if (input.checkInput(currentTerm().checkInputCount)) |
michael@0 | 1279 | MATCH_NEXT(); |
michael@0 | 1280 | BACKTRACK(); |
michael@0 | 1281 | |
michael@0 | 1282 | case ByteTerm::TypeUncheckInput: |
michael@0 | 1283 | input.uncheckInput(currentTerm().checkInputCount); |
michael@0 | 1284 | MATCH_NEXT(); |
michael@0 | 1285 | |
michael@0 | 1286 | case ByteTerm::TypeDotStarEnclosure: |
michael@0 | 1287 | if (matchDotStarEnclosure(currentTerm(), context)) |
michael@0 | 1288 | return JSRegExpMatch; |
michael@0 | 1289 | BACKTRACK(); |
michael@0 | 1290 | } |
michael@0 | 1291 | |
michael@0 | 1292 | // We should never fall-through to here. |
michael@0 | 1293 | ASSERT_NOT_REACHED(); |
michael@0 | 1294 | |
michael@0 | 1295 | backtrack: |
michael@0 | 1296 | ASSERT(context->term < static_cast<int>(disjunction->terms.size())); |
michael@0 | 1297 | |
michael@0 | 1298 | // Prevent jank resulting from getting stuck in Yarr for a long time. |
michael@0 | 1299 | if (!CheckForInterrupt(this->cx)) |
michael@0 | 1300 | return JSRegExpErrorInternal; |
michael@0 | 1301 | |
michael@0 | 1302 | switch (currentTerm().type) { |
michael@0 | 1303 | case ByteTerm::TypeSubpatternBegin: |
michael@0 | 1304 | return JSRegExpNoMatch; |
michael@0 | 1305 | case ByteTerm::TypeSubpatternEnd: |
michael@0 | 1306 | ASSERT_NOT_REACHED(); |
michael@0 | 1307 | |
michael@0 | 1308 | case ByteTerm::TypeBodyAlternativeBegin: |
michael@0 | 1309 | case ByteTerm::TypeBodyAlternativeDisjunction: { |
michael@0 | 1310 | int offset = currentTerm().alternative.next; |
michael@0 | 1311 | context->term += offset; |
michael@0 | 1312 | if (offset > 0) |
michael@0 | 1313 | MATCH_NEXT(); |
michael@0 | 1314 | |
michael@0 | 1315 | if (input.atEnd()) |
michael@0 | 1316 | return JSRegExpNoMatch; |
michael@0 | 1317 | |
michael@0 | 1318 | input.next(); |
michael@0 | 1319 | |
michael@0 | 1320 | context->matchBegin = input.getPos(); |
michael@0 | 1321 | |
michael@0 | 1322 | if (currentTerm().alternative.onceThrough) |
michael@0 | 1323 | context->term += currentTerm().alternative.next; |
michael@0 | 1324 | |
michael@0 | 1325 | MATCH_NEXT(); |
michael@0 | 1326 | } |
michael@0 | 1327 | case ByteTerm::TypeBodyAlternativeEnd: |
michael@0 | 1328 | ASSERT_NOT_REACHED(); |
michael@0 | 1329 | |
michael@0 | 1330 | case ByteTerm::TypeAlternativeBegin: |
michael@0 | 1331 | case ByteTerm::TypeAlternativeDisjunction: { |
michael@0 | 1332 | int offset = currentTerm().alternative.next; |
michael@0 | 1333 | context->term += offset; |
michael@0 | 1334 | if (offset > 0) |
michael@0 | 1335 | MATCH_NEXT(); |
michael@0 | 1336 | BACKTRACK(); |
michael@0 | 1337 | } |
michael@0 | 1338 | case ByteTerm::TypeAlternativeEnd: { |
michael@0 | 1339 | // We should never backtrack back into an alternative of the main body of the regex. |
michael@0 | 1340 | BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); |
michael@0 | 1341 | unsigned offset = backTrack->offset; |
michael@0 | 1342 | context->term -= offset; |
michael@0 | 1343 | BACKTRACK(); |
michael@0 | 1344 | } |
michael@0 | 1345 | |
michael@0 | 1346 | case ByteTerm::TypeAssertionBOL: |
michael@0 | 1347 | case ByteTerm::TypeAssertionEOL: |
michael@0 | 1348 | case ByteTerm::TypeAssertionWordBoundary: |
michael@0 | 1349 | BACKTRACK(); |
michael@0 | 1350 | |
michael@0 | 1351 | case ByteTerm::TypePatternCharacterOnce: |
michael@0 | 1352 | case ByteTerm::TypePatternCharacterFixed: |
michael@0 | 1353 | case ByteTerm::TypePatternCharacterGreedy: |
michael@0 | 1354 | case ByteTerm::TypePatternCharacterNonGreedy: |
michael@0 | 1355 | if (backtrackPatternCharacter(currentTerm(), context)) |
michael@0 | 1356 | MATCH_NEXT(); |
michael@0 | 1357 | BACKTRACK(); |
michael@0 | 1358 | case ByteTerm::TypePatternCasedCharacterOnce: |
michael@0 | 1359 | case ByteTerm::TypePatternCasedCharacterFixed: |
michael@0 | 1360 | case ByteTerm::TypePatternCasedCharacterGreedy: |
michael@0 | 1361 | case ByteTerm::TypePatternCasedCharacterNonGreedy: |
michael@0 | 1362 | if (backtrackPatternCasedCharacter(currentTerm(), context)) |
michael@0 | 1363 | MATCH_NEXT(); |
michael@0 | 1364 | BACKTRACK(); |
michael@0 | 1365 | case ByteTerm::TypeCharacterClass: |
michael@0 | 1366 | if (backtrackCharacterClass(currentTerm(), context)) |
michael@0 | 1367 | MATCH_NEXT(); |
michael@0 | 1368 | BACKTRACK(); |
michael@0 | 1369 | case ByteTerm::TypeBackReference: |
michael@0 | 1370 | if (backtrackBackReference(currentTerm(), context)) |
michael@0 | 1371 | MATCH_NEXT(); |
michael@0 | 1372 | BACKTRACK(); |
michael@0 | 1373 | case ByteTerm::TypeParenthesesSubpattern: { |
michael@0 | 1374 | JSRegExpResult result = backtrackParentheses(currentTerm(), context); |
michael@0 | 1375 | |
michael@0 | 1376 | if (result == JSRegExpMatch) { |
michael@0 | 1377 | MATCH_NEXT(); |
michael@0 | 1378 | } else if (result != JSRegExpNoMatch) |
michael@0 | 1379 | return result; |
michael@0 | 1380 | |
michael@0 | 1381 | BACKTRACK(); |
michael@0 | 1382 | } |
michael@0 | 1383 | case ByteTerm::TypeParenthesesSubpatternOnceBegin: |
michael@0 | 1384 | if (backtrackParenthesesOnceBegin(currentTerm(), context)) |
michael@0 | 1385 | MATCH_NEXT(); |
michael@0 | 1386 | BACKTRACK(); |
michael@0 | 1387 | case ByteTerm::TypeParenthesesSubpatternOnceEnd: |
michael@0 | 1388 | if (backtrackParenthesesOnceEnd(currentTerm(), context)) |
michael@0 | 1389 | MATCH_NEXT(); |
michael@0 | 1390 | BACKTRACK(); |
michael@0 | 1391 | case ByteTerm::TypeParenthesesSubpatternTerminalBegin: |
michael@0 | 1392 | if (backtrackParenthesesTerminalBegin(currentTerm(), context)) |
michael@0 | 1393 | MATCH_NEXT(); |
michael@0 | 1394 | BACKTRACK(); |
michael@0 | 1395 | case ByteTerm::TypeParenthesesSubpatternTerminalEnd: |
michael@0 | 1396 | if (backtrackParenthesesTerminalEnd(currentTerm(), context)) |
michael@0 | 1397 | MATCH_NEXT(); |
michael@0 | 1398 | BACKTRACK(); |
michael@0 | 1399 | case ByteTerm::TypeParentheticalAssertionBegin: |
michael@0 | 1400 | if (backtrackParentheticalAssertionBegin(currentTerm(), context)) |
michael@0 | 1401 | MATCH_NEXT(); |
michael@0 | 1402 | BACKTRACK(); |
michael@0 | 1403 | case ByteTerm::TypeParentheticalAssertionEnd: |
michael@0 | 1404 | if (backtrackParentheticalAssertionEnd(currentTerm(), context)) |
michael@0 | 1405 | MATCH_NEXT(); |
michael@0 | 1406 | BACKTRACK(); |
michael@0 | 1407 | |
michael@0 | 1408 | case ByteTerm::TypeCheckInput: |
michael@0 | 1409 | input.uncheckInput(currentTerm().checkInputCount); |
michael@0 | 1410 | BACKTRACK(); |
michael@0 | 1411 | |
michael@0 | 1412 | case ByteTerm::TypeUncheckInput: |
michael@0 | 1413 | input.checkInput(currentTerm().checkInputCount); |
michael@0 | 1414 | BACKTRACK(); |
michael@0 | 1415 | |
michael@0 | 1416 | case ByteTerm::TypeDotStarEnclosure: |
michael@0 | 1417 | ASSERT_NOT_REACHED(); |
michael@0 | 1418 | } |
michael@0 | 1419 | |
michael@0 | 1420 | ASSERT_NOT_REACHED(); |
michael@0 | 1421 | return JSRegExpErrorNoMatch; |
michael@0 | 1422 | } |
michael@0 | 1423 | |
michael@0 | 1424 | JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) |
michael@0 | 1425 | { |
michael@0 | 1426 | JSRegExpResult result = matchDisjunction(disjunction, context, btrack); |
michael@0 | 1427 | |
michael@0 | 1428 | if (result == JSRegExpMatch) { |
michael@0 | 1429 | while (context->matchBegin == context->matchEnd) { |
michael@0 | 1430 | result = matchDisjunction(disjunction, context, true); |
michael@0 | 1431 | if (result != JSRegExpMatch) |
michael@0 | 1432 | return result; |
michael@0 | 1433 | } |
michael@0 | 1434 | return JSRegExpMatch; |
michael@0 | 1435 | } |
michael@0 | 1436 | |
michael@0 | 1437 | return result; |
michael@0 | 1438 | } |
michael@0 | 1439 | |
michael@0 | 1440 | unsigned interpret() |
michael@0 | 1441 | { |
michael@0 | 1442 | if (!input.isAvailableInput(0)) |
michael@0 | 1443 | return offsetNoMatch; |
michael@0 | 1444 | |
michael@0 | 1445 | for (unsigned i = 0; i < pattern->m_body->m_numSubpatterns + 1; ++i) |
michael@0 | 1446 | output[i << 1] = offsetNoMatch; |
michael@0 | 1447 | |
michael@0 | 1448 | allocatorPool = pattern->m_allocator->startAllocator(); |
michael@0 | 1449 | if (!allocatorPool) |
michael@0 | 1450 | CRASH(); |
michael@0 | 1451 | |
michael@0 | 1452 | DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get()); |
michael@0 | 1453 | |
michael@0 | 1454 | JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false); |
michael@0 | 1455 | if (result == JSRegExpMatch) { |
michael@0 | 1456 | output[0] = context->matchBegin; |
michael@0 | 1457 | output[1] = context->matchEnd; |
michael@0 | 1458 | } |
michael@0 | 1459 | |
michael@0 | 1460 | freeDisjunctionContext(context); |
michael@0 | 1461 | |
michael@0 | 1462 | pattern->m_allocator->stopAllocator(); |
michael@0 | 1463 | |
michael@0 | 1464 | ASSERT((result == JSRegExpMatch) == (output[0] != offsetNoMatch)); |
michael@0 | 1465 | return output[0]; |
michael@0 | 1466 | } |
michael@0 | 1467 | |
michael@0 | 1468 | Interpreter(JSContext *cx, BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start) |
michael@0 | 1469 | : cx(cx) |
michael@0 | 1470 | , pattern(pattern) |
michael@0 | 1471 | , output(output) |
michael@0 | 1472 | , input(input, start, length) |
michael@0 | 1473 | , allocatorPool(0) |
michael@0 | 1474 | , remainingMatchCount(matchLimit) |
michael@0 | 1475 | { |
michael@0 | 1476 | } |
michael@0 | 1477 | |
michael@0 | 1478 | private: |
michael@0 | 1479 | JSContext *cx; |
michael@0 | 1480 | BytecodePattern* pattern; |
michael@0 | 1481 | unsigned* output; |
michael@0 | 1482 | InputStream input; |
michael@0 | 1483 | BumpPointerPool* allocatorPool; |
michael@0 | 1484 | unsigned remainingMatchCount; |
michael@0 | 1485 | }; |
michael@0 | 1486 | |
michael@0 | 1487 | |
michael@0 | 1488 | |
michael@0 | 1489 | class ByteCompiler { |
michael@0 | 1490 | struct ParenthesesStackEntry { |
michael@0 | 1491 | unsigned beginTerm; |
michael@0 | 1492 | unsigned savedAlternativeIndex; |
michael@0 | 1493 | |
michael@0 | 1494 | // For js::Vector. Does not create a valid object. |
michael@0 | 1495 | ParenthesesStackEntry() { } |
michael@0 | 1496 | |
michael@0 | 1497 | ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/) |
michael@0 | 1498 | : beginTerm(beginTerm) |
michael@0 | 1499 | , savedAlternativeIndex(savedAlternativeIndex) |
michael@0 | 1500 | { |
michael@0 | 1501 | } |
michael@0 | 1502 | }; |
michael@0 | 1503 | |
michael@0 | 1504 | public: |
michael@0 | 1505 | ByteCompiler(YarrPattern& pattern) |
michael@0 | 1506 | : m_pattern(pattern) |
michael@0 | 1507 | { |
michael@0 | 1508 | m_currentAlternativeIndex = 0; |
michael@0 | 1509 | } |
michael@0 | 1510 | |
michael@0 | 1511 | PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator) |
michael@0 | 1512 | { |
michael@0 | 1513 | regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough()); |
michael@0 | 1514 | emitDisjunction(m_pattern.m_body); |
michael@0 | 1515 | regexEnd(); |
michael@0 | 1516 | |
michael@0 | 1517 | return adoptPtr(newOrCrash<BytecodePattern>(m_bodyDisjunction.release(), m_allParenthesesInfo, Ref<YarrPattern>(m_pattern), allocator)); |
michael@0 | 1518 | } |
michael@0 | 1519 | |
michael@0 | 1520 | void checkInput(unsigned count) |
michael@0 | 1521 | { |
michael@0 | 1522 | m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count)); |
michael@0 | 1523 | } |
michael@0 | 1524 | |
michael@0 | 1525 | void uncheckInput(unsigned count) |
michael@0 | 1526 | { |
michael@0 | 1527 | m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count)); |
michael@0 | 1528 | } |
michael@0 | 1529 | |
michael@0 | 1530 | void assertionBOL(unsigned inputPosition) |
michael@0 | 1531 | { |
michael@0 | 1532 | m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition)); |
michael@0 | 1533 | } |
michael@0 | 1534 | |
michael@0 | 1535 | void assertionEOL(unsigned inputPosition) |
michael@0 | 1536 | { |
michael@0 | 1537 | m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition)); |
michael@0 | 1538 | } |
michael@0 | 1539 | |
michael@0 | 1540 | void assertionWordBoundary(bool invert, unsigned inputPosition) |
michael@0 | 1541 | { |
michael@0 | 1542 | m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition)); |
michael@0 | 1543 | } |
michael@0 | 1544 | |
michael@0 | 1545 | void atomPatternCharacter(UChar ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
michael@0 | 1546 | { |
michael@0 | 1547 | if (m_pattern.m_ignoreCase) { |
michael@0 | 1548 | UChar lo = Unicode::toLower(ch); |
michael@0 | 1549 | UChar hi = Unicode::toUpper(ch); |
michael@0 | 1550 | |
michael@0 | 1551 | if (lo != hi) { |
michael@0 | 1552 | m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); |
michael@0 | 1553 | return; |
michael@0 | 1554 | } |
michael@0 | 1555 | } |
michael@0 | 1556 | |
michael@0 | 1557 | m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); |
michael@0 | 1558 | } |
michael@0 | 1559 | |
michael@0 | 1560 | void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
michael@0 | 1561 | { |
michael@0 | 1562 | m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition)); |
michael@0 | 1563 | |
michael@0 | 1564 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1565 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; |
michael@0 | 1566 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
michael@0 | 1567 | } |
michael@0 | 1568 | |
michael@0 | 1569 | void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
michael@0 | 1570 | { |
michael@0 | 1571 | ASSERT(subpatternId); |
michael@0 | 1572 | |
michael@0 | 1573 | m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition)); |
michael@0 | 1574 | |
michael@0 | 1575 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1576 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; |
michael@0 | 1577 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
michael@0 | 1578 | } |
michael@0 | 1579 | |
michael@0 | 1580 | void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) |
michael@0 | 1581 | { |
michael@0 | 1582 | int beginTerm = m_bodyDisjunction->terms.size(); |
michael@0 | 1583 | |
michael@0 | 1584 | m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); |
michael@0 | 1585 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
michael@0 | 1586 | m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
michael@0 | 1587 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
michael@0 | 1588 | |
michael@0 | 1589 | m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
michael@0 | 1590 | m_currentAlternativeIndex = beginTerm + 1; |
michael@0 | 1591 | } |
michael@0 | 1592 | |
michael@0 | 1593 | void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) |
michael@0 | 1594 | { |
michael@0 | 1595 | int beginTerm = m_bodyDisjunction->terms.size(); |
michael@0 | 1596 | |
michael@0 | 1597 | m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition)); |
michael@0 | 1598 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
michael@0 | 1599 | m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
michael@0 | 1600 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
michael@0 | 1601 | |
michael@0 | 1602 | m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
michael@0 | 1603 | m_currentAlternativeIndex = beginTerm + 1; |
michael@0 | 1604 | } |
michael@0 | 1605 | |
michael@0 | 1606 | void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) |
michael@0 | 1607 | { |
michael@0 | 1608 | // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin, |
michael@0 | 1609 | // then fix this up at the end! - simplifying this should make it much clearer. |
michael@0 | 1610 | // https://bugs.webkit.org/show_bug.cgi?id=50136 |
michael@0 | 1611 | |
michael@0 | 1612 | int beginTerm = m_bodyDisjunction->terms.size(); |
michael@0 | 1613 | |
michael@0 | 1614 | m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition)); |
michael@0 | 1615 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
michael@0 | 1616 | m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
michael@0 | 1617 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
michael@0 | 1618 | |
michael@0 | 1619 | m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
michael@0 | 1620 | m_currentAlternativeIndex = beginTerm + 1; |
michael@0 | 1621 | } |
michael@0 | 1622 | |
michael@0 | 1623 | void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation) |
michael@0 | 1624 | { |
michael@0 | 1625 | int beginTerm = m_bodyDisjunction->terms.size(); |
michael@0 | 1626 | |
michael@0 | 1627 | m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0)); |
michael@0 | 1628 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; |
michael@0 | 1629 | m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin()); |
michael@0 | 1630 | m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; |
michael@0 | 1631 | |
michael@0 | 1632 | m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); |
michael@0 | 1633 | m_currentAlternativeIndex = beginTerm + 1; |
michael@0 | 1634 | } |
michael@0 | 1635 | |
michael@0 | 1636 | void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
michael@0 | 1637 | { |
michael@0 | 1638 | unsigned beginTerm = popParenthesesStack(); |
michael@0 | 1639 | closeAlternative(beginTerm + 1); |
michael@0 | 1640 | unsigned endTerm = m_bodyDisjunction->terms.size(); |
michael@0 | 1641 | |
michael@0 | 1642 | ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin); |
michael@0 | 1643 | |
michael@0 | 1644 | bool invert = m_bodyDisjunction->terms[beginTerm].invert(); |
michael@0 | 1645 | unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; |
michael@0 | 1646 | |
michael@0 | 1647 | m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition)); |
michael@0 | 1648 | m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; |
michael@0 | 1649 | m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; |
michael@0 | 1650 | m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; |
michael@0 | 1651 | |
michael@0 | 1652 | m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1653 | m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
michael@0 | 1654 | m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1655 | m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; |
michael@0 | 1656 | } |
michael@0 | 1657 | |
michael@0 | 1658 | void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored) |
michael@0 | 1659 | { |
michael@0 | 1660 | m_bodyDisjunction->terms.append(ByteTerm::DotStarEnclosure(bolAnchored, eolAnchored)); |
michael@0 | 1661 | } |
michael@0 | 1662 | |
michael@0 | 1663 | unsigned popParenthesesStack() |
michael@0 | 1664 | { |
michael@0 | 1665 | ASSERT(m_parenthesesStack.size()); |
michael@0 | 1666 | int stackEnd = m_parenthesesStack.size() - 1; |
michael@0 | 1667 | unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm; |
michael@0 | 1668 | m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex; |
michael@0 | 1669 | m_parenthesesStack.shrink(stackEnd); |
michael@0 | 1670 | |
michael@0 | 1671 | ASSERT(beginTerm < m_bodyDisjunction->terms.size()); |
michael@0 | 1672 | ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size()); |
michael@0 | 1673 | |
michael@0 | 1674 | return beginTerm; |
michael@0 | 1675 | } |
michael@0 | 1676 | |
michael@0 | 1677 | #ifndef NDEBUG |
michael@0 | 1678 | void dumpDisjunction(ByteDisjunction* disjunction) |
michael@0 | 1679 | { |
michael@0 | 1680 | dataLogF("ByteDisjunction(%p):\n\t", (void *)disjunction); |
michael@0 | 1681 | for (unsigned i = 0; i < disjunction->terms.size(); ++i) |
michael@0 | 1682 | dataLogF("{ %d } ", disjunction->terms[i].type); |
michael@0 | 1683 | dataLogF("\n"); |
michael@0 | 1684 | } |
michael@0 | 1685 | #endif |
michael@0 | 1686 | |
michael@0 | 1687 | void closeAlternative(int beginTerm) |
michael@0 | 1688 | { |
michael@0 | 1689 | int origBeginTerm = beginTerm; |
michael@0 | 1690 | ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin); |
michael@0 | 1691 | int endIndex = m_bodyDisjunction->terms.size(); |
michael@0 | 1692 | |
michael@0 | 1693 | unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; |
michael@0 | 1694 | |
michael@0 | 1695 | if (!m_bodyDisjunction->terms[beginTerm].alternative.next) |
michael@0 | 1696 | m_bodyDisjunction->terms.remove(beginTerm); |
michael@0 | 1697 | else { |
michael@0 | 1698 | while (m_bodyDisjunction->terms[beginTerm].alternative.next) { |
michael@0 | 1699 | beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; |
michael@0 | 1700 | ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction); |
michael@0 | 1701 | m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; |
michael@0 | 1702 | m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
michael@0 | 1703 | } |
michael@0 | 1704 | |
michael@0 | 1705 | m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; |
michael@0 | 1706 | |
michael@0 | 1707 | m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd()); |
michael@0 | 1708 | m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; |
michael@0 | 1709 | } |
michael@0 | 1710 | } |
michael@0 | 1711 | |
michael@0 | 1712 | void closeBodyAlternative() |
michael@0 | 1713 | { |
michael@0 | 1714 | int beginTerm = 0; |
michael@0 | 1715 | int origBeginTerm = 0; |
michael@0 | 1716 | ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin); |
michael@0 | 1717 | int endIndex = m_bodyDisjunction->terms.size(); |
michael@0 | 1718 | |
michael@0 | 1719 | unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; |
michael@0 | 1720 | |
michael@0 | 1721 | while (m_bodyDisjunction->terms[beginTerm].alternative.next) { |
michael@0 | 1722 | beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; |
michael@0 | 1723 | ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction); |
michael@0 | 1724 | m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; |
michael@0 | 1725 | m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
michael@0 | 1726 | } |
michael@0 | 1727 | |
michael@0 | 1728 | m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; |
michael@0 | 1729 | |
michael@0 | 1730 | m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd()); |
michael@0 | 1731 | m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; |
michael@0 | 1732 | } |
michael@0 | 1733 | |
michael@0 | 1734 | void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) |
michael@0 | 1735 | { |
michael@0 | 1736 | unsigned beginTerm = popParenthesesStack(); |
michael@0 | 1737 | closeAlternative(beginTerm + 1); |
michael@0 | 1738 | unsigned endTerm = m_bodyDisjunction->terms.size(); |
michael@0 | 1739 | |
michael@0 | 1740 | ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
michael@0 | 1741 | |
michael@0 | 1742 | ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm]; |
michael@0 | 1743 | |
michael@0 | 1744 | bool capture = parenthesesBegin.capture(); |
michael@0 | 1745 | unsigned subpatternId = parenthesesBegin.atom.subpatternId; |
michael@0 | 1746 | |
michael@0 | 1747 | unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; |
michael@0 | 1748 | ByteDisjunction* parenthesesDisjunction = newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize); |
michael@0 | 1749 | |
michael@0 | 1750 | parenthesesDisjunction->terms.reserve(endTerm - beginTerm + 1); |
michael@0 | 1751 | parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin()); |
michael@0 | 1752 | for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses) |
michael@0 | 1753 | parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]); |
michael@0 | 1754 | parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd()); |
michael@0 | 1755 | |
michael@0 | 1756 | m_bodyDisjunction->terms.shrink(beginTerm); |
michael@0 | 1757 | |
michael@0 | 1758 | m_allParenthesesInfo.append(parenthesesDisjunction); |
michael@0 | 1759 | m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition)); |
michael@0 | 1760 | |
michael@0 | 1761 | m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1762 | m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
michael@0 | 1763 | m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; |
michael@0 | 1764 | } |
michael@0 | 1765 | |
michael@0 | 1766 | void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
michael@0 | 1767 | { |
michael@0 | 1768 | unsigned beginTerm = popParenthesesStack(); |
michael@0 | 1769 | closeAlternative(beginTerm + 1); |
michael@0 | 1770 | unsigned endTerm = m_bodyDisjunction->terms.size(); |
michael@0 | 1771 | |
michael@0 | 1772 | ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin); |
michael@0 | 1773 | |
michael@0 | 1774 | bool capture = m_bodyDisjunction->terms[beginTerm].capture(); |
michael@0 | 1775 | unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; |
michael@0 | 1776 | |
michael@0 | 1777 | m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition)); |
michael@0 | 1778 | m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; |
michael@0 | 1779 | m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; |
michael@0 | 1780 | m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; |
michael@0 | 1781 | |
michael@0 | 1782 | m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1783 | m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
michael@0 | 1784 | m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1785 | m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; |
michael@0 | 1786 | } |
michael@0 | 1787 | |
michael@0 | 1788 | void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType) |
michael@0 | 1789 | { |
michael@0 | 1790 | unsigned beginTerm = popParenthesesStack(); |
michael@0 | 1791 | closeAlternative(beginTerm + 1); |
michael@0 | 1792 | unsigned endTerm = m_bodyDisjunction->terms.size(); |
michael@0 | 1793 | |
michael@0 | 1794 | ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin); |
michael@0 | 1795 | |
michael@0 | 1796 | bool capture = m_bodyDisjunction->terms[beginTerm].capture(); |
michael@0 | 1797 | unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; |
michael@0 | 1798 | |
michael@0 | 1799 | m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition)); |
michael@0 | 1800 | m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; |
michael@0 | 1801 | m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; |
michael@0 | 1802 | m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; |
michael@0 | 1803 | |
michael@0 | 1804 | m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1805 | m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; |
michael@0 | 1806 | m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount.unsafeGet(); |
michael@0 | 1807 | m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; |
michael@0 | 1808 | } |
michael@0 | 1809 | |
michael@0 | 1810 | void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough) |
michael@0 | 1811 | { |
michael@0 | 1812 | m_bodyDisjunction = adoptPtr(newOrCrash<ByteDisjunction>(numSubpatterns, callFrameSize)); |
michael@0 | 1813 | m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough)); |
michael@0 | 1814 | m_bodyDisjunction->terms[0].frameLocation = 0; |
michael@0 | 1815 | m_currentAlternativeIndex = 0; |
michael@0 | 1816 | } |
michael@0 | 1817 | |
michael@0 | 1818 | void regexEnd() |
michael@0 | 1819 | { |
michael@0 | 1820 | closeBodyAlternative(); |
michael@0 | 1821 | } |
michael@0 | 1822 | |
michael@0 | 1823 | void alternativeBodyDisjunction(bool onceThrough) |
michael@0 | 1824 | { |
michael@0 | 1825 | int newAlternativeIndex = m_bodyDisjunction->terms.size(); |
michael@0 | 1826 | m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; |
michael@0 | 1827 | m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough)); |
michael@0 | 1828 | |
michael@0 | 1829 | m_currentAlternativeIndex = newAlternativeIndex; |
michael@0 | 1830 | } |
michael@0 | 1831 | |
michael@0 | 1832 | void alternativeDisjunction() |
michael@0 | 1833 | { |
michael@0 | 1834 | int newAlternativeIndex = m_bodyDisjunction->terms.size(); |
michael@0 | 1835 | m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; |
michael@0 | 1836 | m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction()); |
michael@0 | 1837 | |
michael@0 | 1838 | m_currentAlternativeIndex = newAlternativeIndex; |
michael@0 | 1839 | } |
michael@0 | 1840 | |
michael@0 | 1841 | void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0) |
michael@0 | 1842 | { |
michael@0 | 1843 | for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { |
michael@0 | 1844 | unsigned currentCountAlreadyChecked = inputCountAlreadyChecked; |
michael@0 | 1845 | |
michael@0 | 1846 | PatternAlternative* alternative = disjunction->m_alternatives[alt]; |
michael@0 | 1847 | |
michael@0 | 1848 | if (alt) { |
michael@0 | 1849 | if (disjunction == m_pattern.m_body) |
michael@0 | 1850 | alternativeBodyDisjunction(alternative->onceThrough()); |
michael@0 | 1851 | else |
michael@0 | 1852 | alternativeDisjunction(); |
michael@0 | 1853 | } |
michael@0 | 1854 | |
michael@0 | 1855 | unsigned minimumSize = alternative->m_minimumSize; |
michael@0 | 1856 | ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked); |
michael@0 | 1857 | unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked; |
michael@0 | 1858 | |
michael@0 | 1859 | if (countToCheck) { |
michael@0 | 1860 | checkInput(countToCheck); |
michael@0 | 1861 | currentCountAlreadyChecked += countToCheck; |
michael@0 | 1862 | } |
michael@0 | 1863 | |
michael@0 | 1864 | for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { |
michael@0 | 1865 | PatternTerm& term = alternative->m_terms[i]; |
michael@0 | 1866 | |
michael@0 | 1867 | switch (term.type) { |
michael@0 | 1868 | case PatternTerm::TypeAssertionBOL: |
michael@0 | 1869 | assertionBOL(currentCountAlreadyChecked - term.inputPosition); |
michael@0 | 1870 | break; |
michael@0 | 1871 | |
michael@0 | 1872 | case PatternTerm::TypeAssertionEOL: |
michael@0 | 1873 | assertionEOL(currentCountAlreadyChecked - term.inputPosition); |
michael@0 | 1874 | break; |
michael@0 | 1875 | |
michael@0 | 1876 | case PatternTerm::TypeAssertionWordBoundary: |
michael@0 | 1877 | assertionWordBoundary(term.invert(), currentCountAlreadyChecked - term.inputPosition); |
michael@0 | 1878 | break; |
michael@0 | 1879 | |
michael@0 | 1880 | case PatternTerm::TypePatternCharacter: |
michael@0 | 1881 | atomPatternCharacter(term.patternCharacter, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); |
michael@0 | 1882 | break; |
michael@0 | 1883 | |
michael@0 | 1884 | case PatternTerm::TypeCharacterClass: |
michael@0 | 1885 | atomCharacterClass(term.characterClass, term.invert(), currentCountAlreadyChecked- term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); |
michael@0 | 1886 | break; |
michael@0 | 1887 | |
michael@0 | 1888 | case PatternTerm::TypeBackReference: |
michael@0 | 1889 | atomBackReference(term.backReferenceSubpatternId, currentCountAlreadyChecked - term.inputPosition, term.frameLocation, term.quantityCount, term.quantityType); |
michael@0 | 1890 | break; |
michael@0 | 1891 | |
michael@0 | 1892 | case PatternTerm::TypeForwardReference: |
michael@0 | 1893 | break; |
michael@0 | 1894 | |
michael@0 | 1895 | case PatternTerm::TypeParenthesesSubpattern: { |
michael@0 | 1896 | unsigned disjunctionAlreadyCheckedCount = 0; |
michael@0 | 1897 | if (term.quantityCount == 1 && !term.parentheses.isCopy) { |
michael@0 | 1898 | unsigned alternativeFrameLocation = term.frameLocation; |
michael@0 | 1899 | // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame. |
michael@0 | 1900 | if (term.quantityType == QuantifierFixedCount) |
michael@0 | 1901 | disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; |
michael@0 | 1902 | else |
michael@0 | 1903 | alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; |
michael@0 | 1904 | unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
michael@0 | 1905 | atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, alternativeFrameLocation); |
michael@0 | 1906 | emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); |
michael@0 | 1907 | atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); |
michael@0 | 1908 | } else if (term.parentheses.isTerminal) { |
michael@0 | 1909 | unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
michael@0 | 1910 | atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce); |
michael@0 | 1911 | emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount); |
michael@0 | 1912 | atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType); |
michael@0 | 1913 | } else { |
michael@0 | 1914 | unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; |
michael@0 | 1915 | atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), disjunctionAlreadyCheckedCount - delegateEndInputOffset, term.frameLocation, 0); |
michael@0 | 1916 | emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0); |
michael@0 | 1917 | atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize); |
michael@0 | 1918 | } |
michael@0 | 1919 | break; |
michael@0 | 1920 | } |
michael@0 | 1921 | |
michael@0 | 1922 | case PatternTerm::TypeParentheticalAssertion: { |
michael@0 | 1923 | unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion; |
michael@0 | 1924 | |
michael@0 | 1925 | ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition)); |
michael@0 | 1926 | unsigned positiveInputOffset = currentCountAlreadyChecked - static_cast<unsigned>(term.inputPosition); |
michael@0 | 1927 | unsigned uncheckAmount = 0; |
michael@0 | 1928 | if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) { |
michael@0 | 1929 | uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize; |
michael@0 | 1930 | uncheckInput(uncheckAmount); |
michael@0 | 1931 | currentCountAlreadyChecked -= uncheckAmount; |
michael@0 | 1932 | } |
michael@0 | 1933 | |
michael@0 | 1934 | atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation); |
michael@0 | 1935 | emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset - uncheckAmount); |
michael@0 | 1936 | atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType); |
michael@0 | 1937 | if (uncheckAmount) { |
michael@0 | 1938 | checkInput(uncheckAmount); |
michael@0 | 1939 | currentCountAlreadyChecked += uncheckAmount; |
michael@0 | 1940 | } |
michael@0 | 1941 | break; |
michael@0 | 1942 | } |
michael@0 | 1943 | |
michael@0 | 1944 | case PatternTerm::TypeDotStarEnclosure: |
michael@0 | 1945 | assertionDotStarEnclosure(term.anchors.bolAnchor, term.anchors.eolAnchor); |
michael@0 | 1946 | break; |
michael@0 | 1947 | } |
michael@0 | 1948 | } |
michael@0 | 1949 | } |
michael@0 | 1950 | } |
michael@0 | 1951 | |
michael@0 | 1952 | private: |
michael@0 | 1953 | YarrPattern& m_pattern; |
michael@0 | 1954 | OwnPtr<ByteDisjunction> m_bodyDisjunction; |
michael@0 | 1955 | unsigned m_currentAlternativeIndex; |
michael@0 | 1956 | Vector<ParenthesesStackEntry> m_parenthesesStack; |
michael@0 | 1957 | Vector<ByteDisjunction*> m_allParenthesesInfo; |
michael@0 | 1958 | }; |
michael@0 | 1959 | |
michael@0 | 1960 | PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator) |
michael@0 | 1961 | { |
michael@0 | 1962 | return ByteCompiler(pattern).compile(allocator); |
michael@0 | 1963 | } |
michael@0 | 1964 | |
michael@0 | 1965 | unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output) |
michael@0 | 1966 | { |
michael@0 | 1967 | #if YARR_8BIT_CHAR_SUPPORT |
michael@0 | 1968 | if (input.is8Bit()) |
michael@0 | 1969 | return Interpreter<LChar>(cx, bytecode, output, input.characters8(), input.length(), start).interpret(); |
michael@0 | 1970 | #endif |
michael@0 | 1971 | return Interpreter<UChar>(cx, bytecode, output, input.chars(), input.length(), start).interpret(); |
michael@0 | 1972 | } |
michael@0 | 1973 | |
michael@0 | 1974 | unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output) |
michael@0 | 1975 | { |
michael@0 | 1976 | return Interpreter<LChar>(cx, bytecode, output, input, length, start).interpret(); |
michael@0 | 1977 | } |
michael@0 | 1978 | |
michael@0 | 1979 | unsigned interpret(JSContext *cx, BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output) |
michael@0 | 1980 | { |
michael@0 | 1981 | return Interpreter<UChar>(cx, bytecode, output, input, length, start).interpret(); |
michael@0 | 1982 | } |
michael@0 | 1983 | |
michael@0 | 1984 | // These should be the same for both UChar & LChar. |
michael@0 | 1985 | COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter); |
michael@0 | 1986 | COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass); |
michael@0 | 1987 | COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference); |
michael@0 | 1988 | COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative); |
michael@0 | 1989 | COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion); |
michael@0 | 1990 | COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce); |
michael@0 | 1991 | COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses); |
michael@0 | 1992 | |
michael@0 | 1993 | |
michael@0 | 1994 | } } |