|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * vim: set ts=8 sts=4 et sw=4 tw=99: |
|
3 * |
|
4 * Copyright (C) 2009 Apple Inc. All rights reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions |
|
8 * are met: |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * 2. Redistributions in binary form must reproduce the above copyright |
|
12 * notice, this list of conditions and the following disclaimer in the |
|
13 * documentation and/or other materials provided with the distribution. |
|
14 * |
|
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
|
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
|
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
|
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
26 */ |
|
27 |
|
28 #include "yarr/YarrJIT.h" |
|
29 |
|
30 #include "assembler/assembler/LinkBuffer.h" |
|
31 #include "yarr/Yarr.h" |
|
32 #include "yarr/YarrCanonicalizeUCS2.h" |
|
33 |
|
34 #if ENABLE_YARR_JIT |
|
35 |
|
36 using namespace WTF; |
|
37 |
|
38 namespace JSC { namespace Yarr { |
|
39 |
|
40 template<YarrJITCompileMode compileMode> |
|
41 class YarrGenerator : private MacroAssembler { |
|
42 friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline); |
|
43 |
|
44 #if WTF_CPU_ARM |
|
45 static const RegisterID input = ARMRegisters::r0; |
|
46 static const RegisterID index = ARMRegisters::r1; |
|
47 static const RegisterID length = ARMRegisters::r2; |
|
48 static const RegisterID output = ARMRegisters::r4; |
|
49 |
|
50 static const RegisterID regT0 = ARMRegisters::r5; |
|
51 static const RegisterID regT1 = ARMRegisters::r6; |
|
52 |
|
53 static const RegisterID returnRegister = ARMRegisters::r0; |
|
54 static const RegisterID returnRegister2 = ARMRegisters::r1; |
|
55 #elif WTF_CPU_MIPS |
|
56 static const RegisterID input = MIPSRegisters::a0; |
|
57 static const RegisterID index = MIPSRegisters::a1; |
|
58 static const RegisterID length = MIPSRegisters::a2; |
|
59 static const RegisterID output = MIPSRegisters::a3; |
|
60 |
|
61 static const RegisterID regT0 = MIPSRegisters::t4; |
|
62 static const RegisterID regT1 = MIPSRegisters::t5; |
|
63 |
|
64 static const RegisterID returnRegister = MIPSRegisters::v0; |
|
65 static const RegisterID returnRegister2 = MIPSRegisters::v1; |
|
66 #elif WTF_CPU_SH4 |
|
67 static const RegisterID input = SH4Registers::r4; |
|
68 static const RegisterID index = SH4Registers::r5; |
|
69 static const RegisterID length = SH4Registers::r6; |
|
70 static const RegisterID output = SH4Registers::r7; |
|
71 |
|
72 static const RegisterID regT0 = SH4Registers::r0; |
|
73 static const RegisterID regT1 = SH4Registers::r1; |
|
74 |
|
75 static const RegisterID returnRegister = SH4Registers::r0; |
|
76 static const RegisterID returnRegister2 = SH4Registers::r1; |
|
77 #elif WTF_CPU_SPARC |
|
78 static const RegisterID input = SparcRegisters::i0; |
|
79 static const RegisterID index = SparcRegisters::i1; |
|
80 static const RegisterID length = SparcRegisters::i2; |
|
81 static const RegisterID output = SparcRegisters::i3; |
|
82 |
|
83 static const RegisterID regT0 = SparcRegisters::i4; |
|
84 static const RegisterID regT1 = SparcRegisters::i5; |
|
85 |
|
86 static const RegisterID returnRegister = SparcRegisters::i0; |
|
87 #elif WTF_CPU_X86 |
|
88 static const RegisterID input = X86Registers::eax; |
|
89 static const RegisterID index = X86Registers::edx; |
|
90 static const RegisterID length = X86Registers::ecx; |
|
91 static const RegisterID output = X86Registers::edi; |
|
92 |
|
93 static const RegisterID regT0 = X86Registers::ebx; |
|
94 static const RegisterID regT1 = X86Registers::esi; |
|
95 |
|
96 static const RegisterID returnRegister = X86Registers::eax; |
|
97 static const RegisterID returnRegister2 = X86Registers::edx; |
|
98 #elif WTF_CPU_X86_64 |
|
99 # if WTF_PLATFORM_WIN |
|
100 static const RegisterID input = X86Registers::ecx; |
|
101 static const RegisterID index = X86Registers::edx; |
|
102 static const RegisterID length = X86Registers::r8; |
|
103 static const RegisterID output = X86Registers::r9; |
|
104 # else |
|
105 static const RegisterID input = X86Registers::edi; |
|
106 static const RegisterID index = X86Registers::esi; |
|
107 static const RegisterID length = X86Registers::edx; |
|
108 static const RegisterID output = X86Registers::ecx; |
|
109 # endif |
|
110 |
|
111 static const RegisterID regT0 = X86Registers::eax; |
|
112 static const RegisterID regT1 = X86Registers::ebx; |
|
113 |
|
114 static const RegisterID returnRegister = X86Registers::eax; |
|
115 |
|
116 # if !WTF_PLATFORM_WIN |
|
117 // no way to use int128_t as return value on Win64 ABI |
|
118 static const RegisterID returnRegister2 = X86Registers::edx; |
|
119 # endif |
|
120 #endif |
|
121 |
|
122 void optimizeAlternative(PatternAlternative* alternative) |
|
123 { |
|
124 if (!alternative->m_terms.size()) |
|
125 return; |
|
126 |
|
127 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) { |
|
128 PatternTerm& term = alternative->m_terms[i]; |
|
129 PatternTerm& nextTerm = alternative->m_terms[i + 1]; |
|
130 |
|
131 if ((term.type == PatternTerm::TypeCharacterClass) |
|
132 && (term.quantityType == QuantifierFixedCount) |
|
133 && (nextTerm.type == PatternTerm::TypePatternCharacter) |
|
134 && (nextTerm.quantityType == QuantifierFixedCount)) { |
|
135 PatternTerm termCopy = term; |
|
136 alternative->m_terms[i] = nextTerm; |
|
137 alternative->m_terms[i + 1] = termCopy; |
|
138 } |
|
139 } |
|
140 } |
|
141 |
|
142 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) |
|
143 { |
|
144 do { |
|
145 // pick which range we're going to generate |
|
146 int which = count >> 1; |
|
147 char lo = ranges[which].begin; |
|
148 char hi = ranges[which].end; |
|
149 |
|
150 // check if there are any ranges or matches below lo. If not, just jl to failure - |
|
151 // if there is anything else to check, check that first, if it falls through jmp to failure. |
|
152 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { |
|
153 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); |
|
154 |
|
155 // generate code for all ranges before this one |
|
156 if (which) |
|
157 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); |
|
158 |
|
159 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { |
|
160 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); |
|
161 ++*matchIndex; |
|
162 } |
|
163 failures.append(jump()); |
|
164 |
|
165 loOrAbove.link(this); |
|
166 } else if (which) { |
|
167 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); |
|
168 |
|
169 matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount); |
|
170 failures.append(jump()); |
|
171 |
|
172 loOrAbove.link(this); |
|
173 } else |
|
174 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); |
|
175 |
|
176 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) |
|
177 ++*matchIndex; |
|
178 |
|
179 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); |
|
180 // fall through to here, the value is above hi. |
|
181 |
|
182 // shuffle along & loop around if there are any more matches to handle. |
|
183 unsigned next = which + 1; |
|
184 ranges += next; |
|
185 count -= next; |
|
186 } while (count); |
|
187 } |
|
188 |
|
189 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass) |
|
190 { |
|
191 if (charClass->m_table) { |
|
192 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table)); |
|
193 matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry)); |
|
194 return; |
|
195 } |
|
196 Jump unicodeFail; |
|
197 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) { |
|
198 Jump isAscii = branch32(LessThanOrEqual, character, TrustedImm32(0x7f)); |
|
199 |
|
200 if (charClass->m_matchesUnicode.size()) { |
|
201 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) { |
|
202 UChar ch = charClass->m_matchesUnicode[i]; |
|
203 matchDest.append(branch32(Equal, character, Imm32(ch))); |
|
204 } |
|
205 } |
|
206 |
|
207 if (charClass->m_rangesUnicode.size()) { |
|
208 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) { |
|
209 UChar lo = charClass->m_rangesUnicode[i].begin; |
|
210 UChar hi = charClass->m_rangesUnicode[i].end; |
|
211 |
|
212 Jump below = branch32(LessThan, character, Imm32(lo)); |
|
213 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); |
|
214 below.link(this); |
|
215 } |
|
216 } |
|
217 |
|
218 unicodeFail = jump(); |
|
219 isAscii.link(this); |
|
220 } |
|
221 |
|
222 if (charClass->m_ranges.size()) { |
|
223 unsigned matchIndex = 0; |
|
224 JumpList failures; |
|
225 matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size()); |
|
226 while (matchIndex < charClass->m_matches.size()) |
|
227 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++]))); |
|
228 |
|
229 failures.link(this); |
|
230 } else if (charClass->m_matches.size()) { |
|
231 // optimization: gather 'a','A' etc back together, can mask & test once. |
|
232 Vector<char> matchesAZaz; |
|
233 |
|
234 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) { |
|
235 char ch = charClass->m_matches[i]; |
|
236 if (m_pattern.m_ignoreCase) { |
|
237 if (isASCIILower(ch)) { |
|
238 matchesAZaz.append(ch); |
|
239 continue; |
|
240 } |
|
241 if (isASCIIUpper(ch)) |
|
242 continue; |
|
243 } |
|
244 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); |
|
245 } |
|
246 |
|
247 if (unsigned countAZaz = matchesAZaz.size()) { |
|
248 or32(TrustedImm32(32), character); |
|
249 for (unsigned i = 0; i < countAZaz; ++i) |
|
250 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i]))); |
|
251 } |
|
252 } |
|
253 |
|
254 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) |
|
255 unicodeFail.link(this); |
|
256 } |
|
257 |
|
258 // Jumps if input not available; will have (incorrectly) incremented already! |
|
259 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0) |
|
260 { |
|
261 if (countToCheck) |
|
262 add32(Imm32(countToCheck), index); |
|
263 return branch32(Above, index, length); |
|
264 } |
|
265 |
|
266 Jump jumpIfAvailableInput(unsigned countToCheck) |
|
267 { |
|
268 add32(Imm32(countToCheck), index); |
|
269 return branch32(BelowOrEqual, index, length); |
|
270 } |
|
271 |
|
272 Jump checkInput() |
|
273 { |
|
274 return branch32(BelowOrEqual, index, length); |
|
275 } |
|
276 |
|
277 Jump atEndOfInput() |
|
278 { |
|
279 return branch32(Equal, index, length); |
|
280 } |
|
281 |
|
282 Jump notAtEndOfInput() |
|
283 { |
|
284 return branch32(NotEqual, index, length); |
|
285 } |
|
286 |
|
287 Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character) |
|
288 { |
|
289 readCharacter(inputPosition, character); |
|
290 |
|
291 // For case-insesitive compares, non-ascii characters that have different |
|
292 // upper & lower case representations are converted to a character class. |
|
293 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); |
|
294 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
|
295 or32(TrustedImm32(0x20), character); |
|
296 ch |= 0x20; |
|
297 } |
|
298 |
|
299 return branch32(NotEqual, character, Imm32(ch)); |
|
300 } |
|
301 |
|
302 void readCharacter(int inputPosition, RegisterID reg) |
|
303 { |
|
304 if (m_charSize == Char8) |
|
305 load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg); |
|
306 else |
|
307 load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg); |
|
308 } |
|
309 |
|
310 void storeToFrame(RegisterID reg, unsigned frameLocation) |
|
311 { |
|
312 poke(reg, frameLocation); |
|
313 } |
|
314 |
|
315 void storeToFrame(TrustedImm32 imm, unsigned frameLocation) |
|
316 { |
|
317 poke(imm, frameLocation); |
|
318 } |
|
319 |
|
320 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation) |
|
321 { |
|
322 return storePtrWithPatch(TrustedImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*))); |
|
323 } |
|
324 |
|
325 void loadFromFrame(unsigned frameLocation, RegisterID reg) |
|
326 { |
|
327 peek(reg, frameLocation); |
|
328 } |
|
329 |
|
330 void loadFromFrameAndJump(unsigned frameLocation) |
|
331 { |
|
332 jump(Address(stackPointerRegister, frameLocation * sizeof(void*))); |
|
333 } |
|
334 |
|
335 void initCallFrame() |
|
336 { |
|
337 unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; |
|
338 if (callFrameSize) |
|
339 subPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); |
|
340 } |
|
341 void removeCallFrame() |
|
342 { |
|
343 unsigned callFrameSize = m_pattern.m_body->m_callFrameSize; |
|
344 if (callFrameSize) |
|
345 addPtr(Imm32(callFrameSize * sizeof(void*)), stackPointerRegister); |
|
346 } |
|
347 |
|
348 // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns. |
|
349 void setSubpatternStart(RegisterID reg, unsigned subpattern) |
|
350 { |
|
351 ASSERT(subpattern); |
|
352 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( |
|
353 store32(reg, Address(output, (subpattern << 1) * sizeof(int))); |
|
354 } |
|
355 void setSubpatternEnd(RegisterID reg, unsigned subpattern) |
|
356 { |
|
357 ASSERT(subpattern); |
|
358 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( |
|
359 store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int))); |
|
360 } |
|
361 void clearSubpatternStart(unsigned subpattern) |
|
362 { |
|
363 ASSERT(subpattern); |
|
364 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-( |
|
365 store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int))); |
|
366 } |
|
367 |
|
368 // We use one of three different strategies to track the start of the current match, |
|
369 // while matching. |
|
370 // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily |
|
371 // at the end of matching. This is irrespective of compileMode, and in this case |
|
372 // these methods should never be called. |
|
373 // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output |
|
374 // vector, store the match start in the output vector. |
|
375 // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly |
|
376 // in this register. |
|
377 void setMatchStart(RegisterID reg) |
|
378 { |
|
379 ASSERT(!m_pattern.m_body->m_hasFixedSize); |
|
380 if (compileMode == IncludeSubpatterns) |
|
381 store32(reg, output); |
|
382 else |
|
383 move(reg, output); |
|
384 } |
|
385 void getMatchStart(RegisterID reg) |
|
386 { |
|
387 ASSERT(!m_pattern.m_body->m_hasFixedSize); |
|
388 if (compileMode == IncludeSubpatterns) |
|
389 load32(output, reg); |
|
390 else |
|
391 move(output, reg); |
|
392 } |
|
393 |
|
394 enum YarrOpCode { |
|
395 // These nodes wrap body alternatives - those in the main disjunction, |
|
396 // rather than subpatterns or assertions. These are chained together in |
|
397 // a doubly linked list, with a 'begin' node for the first alternative, |
|
398 // a 'next' node for each subsequent alternative, and an 'end' node at |
|
399 // the end. In the case of repeating alternatives, the 'end' node also |
|
400 // has a reference back to 'begin'. |
|
401 OpBodyAlternativeBegin, |
|
402 OpBodyAlternativeNext, |
|
403 OpBodyAlternativeEnd, |
|
404 // Similar to the body alternatives, but used for subpatterns with two |
|
405 // or more alternatives. |
|
406 OpNestedAlternativeBegin, |
|
407 OpNestedAlternativeNext, |
|
408 OpNestedAlternativeEnd, |
|
409 // Used for alternatives in subpatterns where there is only a single |
|
410 // alternative (backtrackingis easier in these cases), or for alternatives |
|
411 // which never need to be backtracked (those in parenthetical assertions, |
|
412 // terminal subpatterns). |
|
413 OpSimpleNestedAlternativeBegin, |
|
414 OpSimpleNestedAlternativeNext, |
|
415 OpSimpleNestedAlternativeEnd, |
|
416 // Used to wrap 'Once' subpattern matches (quantityCount == 1). |
|
417 OpParenthesesSubpatternOnceBegin, |
|
418 OpParenthesesSubpatternOnceEnd, |
|
419 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp). |
|
420 OpParenthesesSubpatternTerminalBegin, |
|
421 OpParenthesesSubpatternTerminalEnd, |
|
422 // Used to wrap parenthetical assertions. |
|
423 OpParentheticalAssertionBegin, |
|
424 OpParentheticalAssertionEnd, |
|
425 // Wraps all simple terms (pattern characters, character classes). |
|
426 OpTerm, |
|
427 // Where an expression contains only 'once through' body alternatives |
|
428 // and no repeating ones, this op is used to return match failure. |
|
429 OpMatchFailed |
|
430 }; |
|
431 |
|
432 // This structure is used to hold the compiled opcode information, |
|
433 // including reference back to the original PatternTerm/PatternAlternatives, |
|
434 // and JIT compilation data structures. |
|
435 struct YarrOp { |
|
436 explicit YarrOp(PatternTerm* term) |
|
437 : m_op(OpTerm) |
|
438 , m_term(term) |
|
439 , m_isDeadCode(false) |
|
440 { |
|
441 } |
|
442 |
|
443 explicit YarrOp(YarrOpCode op) |
|
444 : m_op(op) |
|
445 , m_isDeadCode(false) |
|
446 { |
|
447 } |
|
448 |
|
449 // The operation, as a YarrOpCode, and also a reference to the PatternTerm. |
|
450 YarrOpCode m_op; |
|
451 PatternTerm* m_term; |
|
452 |
|
453 // For alternatives, this holds the PatternAlternative and doubly linked |
|
454 // references to this alternative's siblings. In the case of the |
|
455 // OpBodyAlternativeEnd node at the end of a section of repeating nodes, |
|
456 // m_nextOp will reference the OpBodyAlternativeBegin node of the first |
|
457 // repeating alternative. |
|
458 PatternAlternative* m_alternative; |
|
459 size_t m_previousOp; |
|
460 size_t m_nextOp; |
|
461 |
|
462 // Used to record a set of Jumps out of the generated code, typically |
|
463 // used for jumps out to backtracking code, and a single reentry back |
|
464 // into the code for a node (likely where a backtrack will trigger |
|
465 // rematching). |
|
466 Label m_reentry; |
|
467 JumpList m_jumps; |
|
468 |
|
469 // Used for backtracking when the prior alternative did not consume any |
|
470 // characters but matched. |
|
471 Jump m_zeroLengthMatch; |
|
472 |
|
473 // This flag is used to null out the second pattern character, when |
|
474 // two are fused to match a pair together. |
|
475 bool m_isDeadCode; |
|
476 |
|
477 // Currently used in the case of some of the more complex management of |
|
478 // 'm_checked', to cache the offset used in this alternative, to avoid |
|
479 // recalculating it. |
|
480 int m_checkAdjust; |
|
481 |
|
482 // Used by OpNestedAlternativeNext/End to hold the pointer to the |
|
483 // value that will be pushed into the pattern's frame to return to, |
|
484 // upon backtracking back into the disjunction. |
|
485 DataLabelPtr m_returnAddress; |
|
486 }; |
|
487 |
|
488 // BacktrackingState |
|
489 // This class encapsulates information about the state of code generation |
|
490 // whilst generating the code for backtracking, when a term fails to match. |
|
491 // Upon entry to code generation of the backtracking code for a given node, |
|
492 // the Backtracking state will hold references to all control flow sources |
|
493 // that are outputs in need of further backtracking from the prior node |
|
494 // generated (which is the subsequent operation in the regular expression, |
|
495 // and in the m_ops Vector, since we generated backtracking backwards). |
|
496 // These references to control flow take the form of: |
|
497 // - A jump list of jumps, to be linked to code that will backtrack them |
|
498 // further. |
|
499 // - A set of DataLabelPtr values, to be populated with values to be |
|
500 // treated effectively as return addresses backtracking into complex |
|
501 // subpatterns. |
|
502 // - A flag indicating that the current sequence of generated code up to |
|
503 // this point requires backtracking. |
|
504 class BacktrackingState { |
|
505 public: |
|
506 BacktrackingState() |
|
507 : m_pendingFallthrough(false) |
|
508 { |
|
509 } |
|
510 |
|
511 // Add a jump or jumps, a return address, or set the flag indicating |
|
512 // that the current 'fallthrough' control flow requires backtracking. |
|
513 void append(const Jump& jump) |
|
514 { |
|
515 m_laterFailures.append(jump); |
|
516 } |
|
517 void append(JumpList& jumpList) |
|
518 { |
|
519 m_laterFailures.append(jumpList); |
|
520 } |
|
521 void append(const DataLabelPtr& returnAddress) |
|
522 { |
|
523 m_pendingReturns.append(returnAddress); |
|
524 } |
|
525 void fallthrough() |
|
526 { |
|
527 ASSERT(!m_pendingFallthrough); |
|
528 m_pendingFallthrough = true; |
|
529 } |
|
530 |
|
531 // These methods clear the backtracking state, either linking to the |
|
532 // current location, a provided label, or copying the backtracking out |
|
533 // to a JumpList. All actions may require code generation to take place, |
|
534 // and as such are passed a pointer to the assembler. |
|
535 void link(MacroAssembler* assembler) |
|
536 { |
|
537 if (m_pendingReturns.size()) { |
|
538 Label here(assembler); |
|
539 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) |
|
540 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); |
|
541 m_pendingReturns.clear(); |
|
542 } |
|
543 m_laterFailures.link(assembler); |
|
544 m_laterFailures.clear(); |
|
545 m_pendingFallthrough = false; |
|
546 } |
|
547 void linkTo(Label label, MacroAssembler* assembler) |
|
548 { |
|
549 if (m_pendingReturns.size()) { |
|
550 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) |
|
551 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label)); |
|
552 m_pendingReturns.clear(); |
|
553 } |
|
554 if (m_pendingFallthrough) |
|
555 assembler->jump(label); |
|
556 m_laterFailures.linkTo(label, assembler); |
|
557 m_laterFailures.clear(); |
|
558 m_pendingFallthrough = false; |
|
559 } |
|
560 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler) |
|
561 { |
|
562 if (m_pendingReturns.size()) { |
|
563 Label here(assembler); |
|
564 for (unsigned i = 0; i < m_pendingReturns.size(); ++i) |
|
565 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here)); |
|
566 m_pendingReturns.clear(); |
|
567 m_pendingFallthrough = true; |
|
568 } |
|
569 if (m_pendingFallthrough) |
|
570 jumpList.append(assembler->jump()); |
|
571 jumpList.append(m_laterFailures); |
|
572 m_laterFailures.clear(); |
|
573 m_pendingFallthrough = false; |
|
574 } |
|
575 |
|
576 bool isEmpty() |
|
577 { |
|
578 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough; |
|
579 } |
|
580 |
|
581 // Called at the end of code generation to link all return addresses. |
|
582 void linkDataLabels(LinkBuffer& linkBuffer) |
|
583 { |
|
584 ASSERT(isEmpty()); |
|
585 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i) |
|
586 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation)); |
|
587 } |
|
588 |
|
589 private: |
|
590 struct ReturnAddressRecord { |
|
591 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation) |
|
592 : m_dataLabel(dataLabel) |
|
593 , m_backtrackLocation(backtrackLocation) |
|
594 { |
|
595 } |
|
596 |
|
597 DataLabelPtr m_dataLabel; |
|
598 Label m_backtrackLocation; |
|
599 }; |
|
600 |
|
601 JumpList m_laterFailures; |
|
602 bool m_pendingFallthrough; |
|
603 Vector<DataLabelPtr, 4> m_pendingReturns; |
|
604 Vector<ReturnAddressRecord, 4> m_backtrackRecords; |
|
605 }; |
|
606 |
|
607 // Generation methods: |
|
608 // =================== |
|
609 |
|
610 // This method provides a default implementation of backtracking common |
|
611 // to many terms; terms commonly jump out of the forwards matching path |
|
612 // on any failed conditions, and add these jumps to the m_jumps list. If |
|
613 // no special handling is required we can often just backtrack to m_jumps. |
|
614 bool backtrackTermDefault(size_t opIndex) |
|
615 { |
|
616 YarrOp& op = m_ops[opIndex]; |
|
617 m_backtrackingState.append(op.m_jumps); |
|
618 return true; |
|
619 } |
|
620 |
|
621 bool generateAssertionBOL(size_t opIndex) |
|
622 { |
|
623 YarrOp& op = m_ops[opIndex]; |
|
624 PatternTerm* term = op.m_term; |
|
625 |
|
626 if (m_pattern.m_multiline) { |
|
627 const RegisterID character = regT0; |
|
628 |
|
629 JumpList matchDest; |
|
630 if (!term->inputPosition) |
|
631 matchDest.append(branch32(Equal, index, Imm32(m_checked))); |
|
632 |
|
633 readCharacter((term->inputPosition - m_checked) - 1, character); |
|
634 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); |
|
635 op.m_jumps.append(jump()); |
|
636 |
|
637 matchDest.link(this); |
|
638 } else { |
|
639 // Erk, really should poison out these alternatives early. :-/ |
|
640 if (term->inputPosition) |
|
641 op.m_jumps.append(jump()); |
|
642 else |
|
643 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked))); |
|
644 } |
|
645 return true; |
|
646 } |
|
647 bool backtrackAssertionBOL(size_t opIndex) |
|
648 { |
|
649 return backtrackTermDefault(opIndex); |
|
650 } |
|
651 |
|
652 bool generateAssertionEOL(size_t opIndex) |
|
653 { |
|
654 YarrOp& op = m_ops[opIndex]; |
|
655 PatternTerm* term = op.m_term; |
|
656 |
|
657 if (m_pattern.m_multiline) { |
|
658 const RegisterID character = regT0; |
|
659 |
|
660 JumpList matchDest; |
|
661 if (term->inputPosition == m_checked) |
|
662 matchDest.append(atEndOfInput()); |
|
663 |
|
664 readCharacter(term->inputPosition - m_checked, character); |
|
665 matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass()); |
|
666 op.m_jumps.append(jump()); |
|
667 |
|
668 matchDest.link(this); |
|
669 } else { |
|
670 if (term->inputPosition == m_checked) |
|
671 op.m_jumps.append(notAtEndOfInput()); |
|
672 // Erk, really should poison out these alternatives early. :-/ |
|
673 else |
|
674 op.m_jumps.append(jump()); |
|
675 } |
|
676 return true; |
|
677 } |
|
678 bool backtrackAssertionEOL(size_t opIndex) |
|
679 { |
|
680 return backtrackTermDefault(opIndex); |
|
681 } |
|
682 |
|
683 // Also falls though on nextIsNotWordChar. |
|
684 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar) |
|
685 { |
|
686 YarrOp& op = m_ops[opIndex]; |
|
687 PatternTerm* term = op.m_term; |
|
688 |
|
689 const RegisterID character = regT0; |
|
690 |
|
691 if (term->inputPosition == m_checked) |
|
692 nextIsNotWordChar.append(atEndOfInput()); |
|
693 |
|
694 readCharacter((term->inputPosition - m_checked), character); |
|
695 matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass()); |
|
696 } |
|
697 |
|
698 bool generateAssertionWordBoundary(size_t opIndex) |
|
699 { |
|
700 YarrOp& op = m_ops[opIndex]; |
|
701 PatternTerm* term = op.m_term; |
|
702 |
|
703 const RegisterID character = regT0; |
|
704 |
|
705 Jump atBegin; |
|
706 JumpList matchDest; |
|
707 if (!term->inputPosition) |
|
708 atBegin = branch32(Equal, index, Imm32(m_checked)); |
|
709 readCharacter((term->inputPosition - m_checked) - 1, character); |
|
710 matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass()); |
|
711 if (!term->inputPosition) |
|
712 atBegin.link(this); |
|
713 |
|
714 // We fall through to here if the last character was not a wordchar. |
|
715 JumpList nonWordCharThenWordChar; |
|
716 JumpList nonWordCharThenNonWordChar; |
|
717 if (term->invert()) { |
|
718 matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar); |
|
719 nonWordCharThenWordChar.append(jump()); |
|
720 } else { |
|
721 matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar); |
|
722 nonWordCharThenNonWordChar.append(jump()); |
|
723 } |
|
724 op.m_jumps.append(nonWordCharThenNonWordChar); |
|
725 |
|
726 // We jump here if the last character was a wordchar. |
|
727 matchDest.link(this); |
|
728 JumpList wordCharThenWordChar; |
|
729 JumpList wordCharThenNonWordChar; |
|
730 if (term->invert()) { |
|
731 matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar); |
|
732 wordCharThenWordChar.append(jump()); |
|
733 } else { |
|
734 matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar); |
|
735 // This can fall-though! |
|
736 } |
|
737 |
|
738 op.m_jumps.append(wordCharThenWordChar); |
|
739 |
|
740 nonWordCharThenWordChar.link(this); |
|
741 wordCharThenNonWordChar.link(this); |
|
742 return true; |
|
743 } |
|
744 bool backtrackAssertionWordBoundary(size_t opIndex) |
|
745 { |
|
746 return backtrackTermDefault(opIndex); |
|
747 } |
|
748 |
|
749 bool generatePatternCharacterOnce(size_t opIndex) |
|
750 { |
|
751 YarrOp& op = m_ops[opIndex]; |
|
752 |
|
753 if (op.m_isDeadCode) |
|
754 return true; |
|
755 |
|
756 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed |
|
757 // node, so there must always be at least one more node. |
|
758 ASSERT(opIndex + 1 < m_ops.size()); |
|
759 YarrOp* nextOp = &m_ops[opIndex + 1]; |
|
760 |
|
761 PatternTerm* term = op.m_term; |
|
762 UChar ch = term->patternCharacter; |
|
763 |
|
764 if ((ch > 0xff) && (m_charSize == Char8)) { |
|
765 // Have a 16 bit pattern character and an 8 bit string - short circuit |
|
766 op.m_jumps.append(jump()); |
|
767 return true; |
|
768 } |
|
769 |
|
770 const RegisterID character = regT0; |
|
771 int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2; |
|
772 unsigned ignoreCaseMask = 0; |
|
773 #if CPU(BIG_ENDIAN) |
|
774 int allCharacters = ch << (m_charSize == Char8 ? 24 : 16); |
|
775 #else |
|
776 int allCharacters = ch; |
|
777 #endif |
|
778 int numberCharacters; |
|
779 int startTermPosition = term->inputPosition; |
|
780 |
|
781 // For case-insesitive compares, non-ascii characters that have different |
|
782 // upper & lower case representations are converted to a character class. |
|
783 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); |
|
784 |
|
785 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) |
|
786 #if CPU(BIG_ENDIAN) |
|
787 ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16); |
|
788 #else |
|
789 ignoreCaseMask |= 32; |
|
790 #endif |
|
791 |
|
792 for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) { |
|
793 PatternTerm* nextTerm = nextOp->m_term; |
|
794 |
|
795 if (nextTerm->type != PatternTerm::TypePatternCharacter |
|
796 || nextTerm->quantityType != QuantifierFixedCount |
|
797 || nextTerm->quantityCount != 1 |
|
798 || nextTerm->inputPosition != (startTermPosition + numberCharacters)) |
|
799 break; |
|
800 |
|
801 nextOp->m_isDeadCode = true; |
|
802 |
|
803 #if CPU(BIG_ENDIAN) |
|
804 int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters); |
|
805 #else |
|
806 int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters; |
|
807 #endif |
|
808 |
|
809 UChar currentCharacter = nextTerm->patternCharacter; |
|
810 |
|
811 if ((currentCharacter > 0xff) && (m_charSize == Char8)) { |
|
812 // Have a 16 bit pattern character and an 8 bit string - short circuit |
|
813 op.m_jumps.append(jump()); |
|
814 return true; |
|
815 } |
|
816 |
|
817 // For case-insesitive compares, non-ascii characters that have different |
|
818 // upper & lower case representations are converted to a character class. |
|
819 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter)); |
|
820 |
|
821 allCharacters |= (currentCharacter << shiftAmount); |
|
822 |
|
823 if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter))) |
|
824 ignoreCaseMask |= 32 << shiftAmount; |
|
825 } |
|
826 |
|
827 if (m_charSize == Char8) { |
|
828 switch (numberCharacters) { |
|
829 case 1: |
|
830 op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character)); |
|
831 return true; |
|
832 case 2: { |
|
833 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); |
|
834 load16Unaligned(address, character); |
|
835 break; |
|
836 } |
|
837 case 3: { |
|
838 BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); |
|
839 load16Unaligned(highAddress, character); |
|
840 if (ignoreCaseMask) |
|
841 or32(Imm32(ignoreCaseMask), character); |
|
842 op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask))); |
|
843 op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character)); |
|
844 return true; |
|
845 } |
|
846 case 4: { |
|
847 BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar)); |
|
848 load32WithUnalignedHalfWords(address, character); |
|
849 break; |
|
850 } |
|
851 } |
|
852 } else { |
|
853 switch (numberCharacters) { |
|
854 case 1: |
|
855 op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); |
|
856 return true; |
|
857 case 2: |
|
858 BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar)); |
|
859 load32WithUnalignedHalfWords(address, character); |
|
860 break; |
|
861 } |
|
862 } |
|
863 |
|
864 if (ignoreCaseMask) |
|
865 or32(Imm32(ignoreCaseMask), character); |
|
866 op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask))); |
|
867 return true; |
|
868 } |
|
869 bool backtrackPatternCharacterOnce(size_t opIndex) |
|
870 { |
|
871 return backtrackTermDefault(opIndex); |
|
872 } |
|
873 |
|
874 bool generatePatternCharacterFixed(size_t opIndex) |
|
875 { |
|
876 YarrOp& op = m_ops[opIndex]; |
|
877 PatternTerm* term = op.m_term; |
|
878 UChar ch = term->patternCharacter; |
|
879 |
|
880 const RegisterID character = regT0; |
|
881 const RegisterID countRegister = regT1; |
|
882 |
|
883 move(index, countRegister); |
|
884 if (term->quantityCount.hasOverflowed()) |
|
885 return false; |
|
886 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); |
|
887 |
|
888 Label loop(this); |
|
889 int offset; |
|
890 if ((Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).safeGet(offset)) |
|
891 return false; |
|
892 BaseIndex address(input, countRegister, m_charScale, offset); |
|
893 |
|
894 if (m_charSize == Char8) |
|
895 load8(address, character); |
|
896 else |
|
897 load16(address, character); |
|
898 |
|
899 // For case-insesitive compares, non-ascii characters that have different |
|
900 // upper & lower case representations are converted to a character class. |
|
901 ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch)); |
|
902 if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) { |
|
903 or32(TrustedImm32(0x20), character); |
|
904 ch |= 0x20; |
|
905 } |
|
906 |
|
907 op.m_jumps.append(branch32(NotEqual, character, Imm32(ch))); |
|
908 add32(TrustedImm32(1), countRegister); |
|
909 branch32(NotEqual, countRegister, index).linkTo(loop, this); |
|
910 |
|
911 return true; |
|
912 } |
|
913 bool backtrackPatternCharacterFixed(size_t opIndex) |
|
914 { |
|
915 return backtrackTermDefault(opIndex); |
|
916 } |
|
917 |
|
918 bool generatePatternCharacterGreedy(size_t opIndex) |
|
919 { |
|
920 YarrOp& op = m_ops[opIndex]; |
|
921 PatternTerm* term = op.m_term; |
|
922 UChar ch = term->patternCharacter; |
|
923 |
|
924 const RegisterID character = regT0; |
|
925 const RegisterID countRegister = regT1; |
|
926 |
|
927 move(TrustedImm32(0), countRegister); |
|
928 |
|
929 // Unless have a 16 bit pattern character and an 8 bit string - short circuit |
|
930 if (!((ch > 0xff) && (m_charSize == Char8))) { |
|
931 JumpList failures; |
|
932 Label loop(this); |
|
933 failures.append(atEndOfInput()); |
|
934 failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); |
|
935 |
|
936 add32(TrustedImm32(1), countRegister); |
|
937 add32(TrustedImm32(1), index); |
|
938 if (term->quantityCount == quantifyInfinite) { |
|
939 jump(loop); |
|
940 } else { |
|
941 if (term->quantityCount.hasOverflowed()) |
|
942 return false; |
|
943 branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this); |
|
944 } |
|
945 |
|
946 failures.link(this); |
|
947 } |
|
948 op.m_reentry = label(); |
|
949 |
|
950 storeToFrame(countRegister, term->frameLocation); |
|
951 return true; |
|
952 } |
|
953 bool backtrackPatternCharacterGreedy(size_t opIndex) |
|
954 { |
|
955 YarrOp& op = m_ops[opIndex]; |
|
956 PatternTerm* term = op.m_term; |
|
957 |
|
958 const RegisterID countRegister = regT1; |
|
959 |
|
960 m_backtrackingState.link(this); |
|
961 |
|
962 loadFromFrame(term->frameLocation, countRegister); |
|
963 m_backtrackingState.append(branchTest32(Zero, countRegister)); |
|
964 sub32(TrustedImm32(1), countRegister); |
|
965 sub32(TrustedImm32(1), index); |
|
966 jump(op.m_reentry); |
|
967 |
|
968 return true; |
|
969 } |
|
970 |
|
971 bool generatePatternCharacterNonGreedy(size_t opIndex) |
|
972 { |
|
973 YarrOp& op = m_ops[opIndex]; |
|
974 PatternTerm* term = op.m_term; |
|
975 |
|
976 const RegisterID countRegister = regT1; |
|
977 |
|
978 move(TrustedImm32(0), countRegister); |
|
979 op.m_reentry = label(); |
|
980 storeToFrame(countRegister, term->frameLocation); |
|
981 return true; |
|
982 } |
|
983 bool backtrackPatternCharacterNonGreedy(size_t opIndex) |
|
984 { |
|
985 YarrOp& op = m_ops[opIndex]; |
|
986 PatternTerm* term = op.m_term; |
|
987 UChar ch = term->patternCharacter; |
|
988 |
|
989 const RegisterID character = regT0; |
|
990 const RegisterID countRegister = regT1; |
|
991 |
|
992 m_backtrackingState.link(this); |
|
993 |
|
994 loadFromFrame(term->frameLocation, countRegister); |
|
995 |
|
996 // Unless have a 16 bit pattern character and an 8 bit string - short circuit |
|
997 if (!((ch > 0xff) && (m_charSize == Char8))) { |
|
998 JumpList nonGreedyFailures; |
|
999 nonGreedyFailures.append(atEndOfInput()); |
|
1000 if (term->quantityCount != quantifyInfinite) { |
|
1001 if (term->quantityCount.hasOverflowed()) |
|
1002 return false; |
|
1003 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); |
|
1004 } |
|
1005 nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character)); |
|
1006 |
|
1007 add32(TrustedImm32(1), countRegister); |
|
1008 add32(TrustedImm32(1), index); |
|
1009 |
|
1010 jump(op.m_reentry); |
|
1011 nonGreedyFailures.link(this); |
|
1012 } |
|
1013 |
|
1014 sub32(countRegister, index); |
|
1015 m_backtrackingState.fallthrough(); |
|
1016 |
|
1017 return true; |
|
1018 } |
|
1019 |
|
1020 bool generateCharacterClassOnce(size_t opIndex) |
|
1021 { |
|
1022 YarrOp& op = m_ops[opIndex]; |
|
1023 PatternTerm* term = op.m_term; |
|
1024 |
|
1025 const RegisterID character = regT0; |
|
1026 |
|
1027 JumpList matchDest; |
|
1028 readCharacter(term->inputPosition - m_checked, character); |
|
1029 matchCharacterClass(character, matchDest, term->characterClass); |
|
1030 |
|
1031 if (term->invert()) |
|
1032 op.m_jumps.append(matchDest); |
|
1033 else { |
|
1034 op.m_jumps.append(jump()); |
|
1035 matchDest.link(this); |
|
1036 } |
|
1037 return true; |
|
1038 } |
|
1039 bool backtrackCharacterClassOnce(size_t opIndex) |
|
1040 { |
|
1041 return backtrackTermDefault(opIndex); |
|
1042 } |
|
1043 |
|
1044 bool generateCharacterClassFixed(size_t opIndex) |
|
1045 { |
|
1046 YarrOp& op = m_ops[opIndex]; |
|
1047 PatternTerm* term = op.m_term; |
|
1048 |
|
1049 const RegisterID character = regT0; |
|
1050 const RegisterID countRegister = regT1; |
|
1051 |
|
1052 move(index, countRegister); |
|
1053 if (term->quantityCount.hasOverflowed()) |
|
1054 return false; |
|
1055 sub32(Imm32(term->quantityCount.unsafeGet()), countRegister); |
|
1056 |
|
1057 Label loop(this); |
|
1058 JumpList matchDest; |
|
1059 |
|
1060 int offset; |
|
1061 Checked<int64_t> checkedOffset(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)); |
|
1062 |
|
1063 if (m_charSize == Char8) { |
|
1064 if ((Checked<int>(checkedOffset) * static_cast<int>(sizeof(char))).safeGet(offset)) |
|
1065 return false; |
|
1066 load8(BaseIndex(input, countRegister, TimesOne, offset), character); |
|
1067 } else { |
|
1068 if ((Checked<int>(checkedOffset) * static_cast<int>(sizeof(UChar))).safeGet(offset)) |
|
1069 return false; |
|
1070 load16(BaseIndex(input, countRegister, TimesTwo, offset), character); |
|
1071 } |
|
1072 matchCharacterClass(character, matchDest, term->characterClass); |
|
1073 |
|
1074 if (term->invert()) |
|
1075 op.m_jumps.append(matchDest); |
|
1076 else { |
|
1077 op.m_jumps.append(jump()); |
|
1078 matchDest.link(this); |
|
1079 } |
|
1080 |
|
1081 add32(TrustedImm32(1), countRegister); |
|
1082 branch32(NotEqual, countRegister, index).linkTo(loop, this); |
|
1083 return true; |
|
1084 } |
|
1085 bool backtrackCharacterClassFixed(size_t opIndex) |
|
1086 { |
|
1087 return backtrackTermDefault(opIndex); |
|
1088 } |
|
1089 |
|
1090 bool generateCharacterClassGreedy(size_t opIndex) |
|
1091 { |
|
1092 YarrOp& op = m_ops[opIndex]; |
|
1093 PatternTerm* term = op.m_term; |
|
1094 |
|
1095 const RegisterID character = regT0; |
|
1096 const RegisterID countRegister = regT1; |
|
1097 |
|
1098 move(TrustedImm32(0), countRegister); |
|
1099 |
|
1100 JumpList failures; |
|
1101 Label loop(this); |
|
1102 failures.append(atEndOfInput()); |
|
1103 |
|
1104 if (term->invert()) { |
|
1105 readCharacter(term->inputPosition - m_checked, character); |
|
1106 matchCharacterClass(character, failures, term->characterClass); |
|
1107 } else { |
|
1108 JumpList matchDest; |
|
1109 readCharacter(term->inputPosition - m_checked, character); |
|
1110 matchCharacterClass(character, matchDest, term->characterClass); |
|
1111 failures.append(jump()); |
|
1112 matchDest.link(this); |
|
1113 } |
|
1114 |
|
1115 add32(TrustedImm32(1), countRegister); |
|
1116 add32(TrustedImm32(1), index); |
|
1117 if (term->quantityCount != quantifyInfinite) { |
|
1118 unsigned quantityCount; |
|
1119 if (term->quantityCount.safeGet(quantityCount)) |
|
1120 return false; |
|
1121 branch32(NotEqual, countRegister, Imm32(quantityCount)).linkTo(loop, this); |
|
1122 failures.append(jump()); |
|
1123 } else |
|
1124 jump(loop); |
|
1125 |
|
1126 failures.link(this); |
|
1127 op.m_reentry = label(); |
|
1128 |
|
1129 storeToFrame(countRegister, term->frameLocation); |
|
1130 return true; |
|
1131 } |
|
1132 bool backtrackCharacterClassGreedy(size_t opIndex) |
|
1133 { |
|
1134 YarrOp& op = m_ops[opIndex]; |
|
1135 PatternTerm* term = op.m_term; |
|
1136 |
|
1137 const RegisterID countRegister = regT1; |
|
1138 |
|
1139 m_backtrackingState.link(this); |
|
1140 |
|
1141 loadFromFrame(term->frameLocation, countRegister); |
|
1142 m_backtrackingState.append(branchTest32(Zero, countRegister)); |
|
1143 sub32(TrustedImm32(1), countRegister); |
|
1144 sub32(TrustedImm32(1), index); |
|
1145 jump(op.m_reentry); |
|
1146 |
|
1147 return true; |
|
1148 } |
|
1149 |
|
1150 bool generateCharacterClassNonGreedy(size_t opIndex) |
|
1151 { |
|
1152 YarrOp& op = m_ops[opIndex]; |
|
1153 PatternTerm* term = op.m_term; |
|
1154 |
|
1155 const RegisterID countRegister = regT1; |
|
1156 |
|
1157 move(TrustedImm32(0), countRegister); |
|
1158 op.m_reentry = label(); |
|
1159 storeToFrame(countRegister, term->frameLocation); |
|
1160 return true; |
|
1161 } |
|
1162 bool backtrackCharacterClassNonGreedy(size_t opIndex) |
|
1163 { |
|
1164 YarrOp& op = m_ops[opIndex]; |
|
1165 PatternTerm* term = op.m_term; |
|
1166 |
|
1167 const RegisterID character = regT0; |
|
1168 const RegisterID countRegister = regT1; |
|
1169 |
|
1170 JumpList nonGreedyFailures; |
|
1171 |
|
1172 m_backtrackingState.link(this); |
|
1173 |
|
1174 loadFromFrame(term->frameLocation, countRegister); |
|
1175 |
|
1176 nonGreedyFailures.append(atEndOfInput()); |
|
1177 if (term->quantityCount.hasOverflowed()) |
|
1178 return false; |
|
1179 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet()))); |
|
1180 |
|
1181 JumpList matchDest; |
|
1182 readCharacter(term->inputPosition - m_checked, character); |
|
1183 matchCharacterClass(character, matchDest, term->characterClass); |
|
1184 |
|
1185 if (term->invert()) |
|
1186 nonGreedyFailures.append(matchDest); |
|
1187 else { |
|
1188 nonGreedyFailures.append(jump()); |
|
1189 matchDest.link(this); |
|
1190 } |
|
1191 |
|
1192 add32(TrustedImm32(1), countRegister); |
|
1193 add32(TrustedImm32(1), index); |
|
1194 |
|
1195 jump(op.m_reentry); |
|
1196 |
|
1197 nonGreedyFailures.link(this); |
|
1198 sub32(countRegister, index); |
|
1199 m_backtrackingState.fallthrough(); |
|
1200 |
|
1201 return true; |
|
1202 } |
|
1203 |
|
1204 bool generateDotStarEnclosure(size_t opIndex) |
|
1205 { |
|
1206 YarrOp& op = m_ops[opIndex]; |
|
1207 PatternTerm* term = op.m_term; |
|
1208 |
|
1209 const RegisterID character = regT0; |
|
1210 const RegisterID matchPos = regT1; |
|
1211 |
|
1212 JumpList foundBeginningNewLine; |
|
1213 JumpList saveStartIndex; |
|
1214 JumpList foundEndingNewLine; |
|
1215 |
|
1216 ASSERT(!m_pattern.m_body->m_hasFixedSize); |
|
1217 getMatchStart(matchPos); |
|
1218 |
|
1219 saveStartIndex.append(branchTest32(Zero, matchPos)); |
|
1220 Label findBOLLoop(this); |
|
1221 sub32(TrustedImm32(1), matchPos); |
|
1222 if (m_charSize == Char8) |
|
1223 load8(BaseIndex(input, matchPos, TimesOne, 0), character); |
|
1224 else |
|
1225 load16(BaseIndex(input, matchPos, TimesTwo, 0), character); |
|
1226 matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass()); |
|
1227 branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this); |
|
1228 saveStartIndex.append(jump()); |
|
1229 |
|
1230 foundBeginningNewLine.link(this); |
|
1231 add32(TrustedImm32(1), matchPos); // Advance past newline |
|
1232 saveStartIndex.link(this); |
|
1233 |
|
1234 if (!m_pattern.m_multiline && term->anchors.bolAnchor) |
|
1235 op.m_jumps.append(branchTest32(NonZero, matchPos)); |
|
1236 |
|
1237 ASSERT(!m_pattern.m_body->m_hasFixedSize); |
|
1238 setMatchStart(matchPos); |
|
1239 |
|
1240 move(index, matchPos); |
|
1241 |
|
1242 Label findEOLLoop(this); |
|
1243 foundEndingNewLine.append(branch32(Equal, matchPos, length)); |
|
1244 if (m_charSize == Char8) |
|
1245 load8(BaseIndex(input, matchPos, TimesOne, 0), character); |
|
1246 else |
|
1247 load16(BaseIndex(input, matchPos, TimesTwo, 0), character); |
|
1248 matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass()); |
|
1249 add32(TrustedImm32(1), matchPos); |
|
1250 jump(findEOLLoop); |
|
1251 |
|
1252 foundEndingNewLine.link(this); |
|
1253 |
|
1254 if (!m_pattern.m_multiline && term->anchors.eolAnchor) |
|
1255 op.m_jumps.append(branch32(NotEqual, matchPos, length)); |
|
1256 |
|
1257 move(matchPos, index); |
|
1258 return true; |
|
1259 } |
|
1260 |
|
1261 bool backtrackDotStarEnclosure(size_t opIndex) |
|
1262 { |
|
1263 return backtrackTermDefault(opIndex); |
|
1264 } |
|
1265 |
|
1266 // Code generation/backtracking for simple terms |
|
1267 // (pattern characters, character classes, and assertions). |
|
1268 // These methods farm out work to the set of functions above. |
|
1269 bool generateTerm(size_t opIndex) |
|
1270 { |
|
1271 YarrOp& op = m_ops[opIndex]; |
|
1272 PatternTerm* term = op.m_term; |
|
1273 |
|
1274 switch (term->type) { |
|
1275 case PatternTerm::TypePatternCharacter: |
|
1276 switch (term->quantityType) { |
|
1277 case QuantifierFixedCount: |
|
1278 if (term->quantityCount == 1) |
|
1279 return generatePatternCharacterOnce(opIndex); |
|
1280 else |
|
1281 return generatePatternCharacterFixed(opIndex); |
|
1282 break; |
|
1283 case QuantifierGreedy: |
|
1284 return generatePatternCharacterGreedy(opIndex); |
|
1285 case QuantifierNonGreedy: |
|
1286 return generatePatternCharacterNonGreedy(opIndex); |
|
1287 } |
|
1288 break; |
|
1289 |
|
1290 case PatternTerm::TypeCharacterClass: |
|
1291 switch (term->quantityType) { |
|
1292 case QuantifierFixedCount: |
|
1293 if (term->quantityCount == 1) |
|
1294 return generateCharacterClassOnce(opIndex); |
|
1295 else |
|
1296 return generateCharacterClassFixed(opIndex); |
|
1297 break; |
|
1298 case QuantifierGreedy: |
|
1299 return generateCharacterClassGreedy(opIndex); |
|
1300 case QuantifierNonGreedy: |
|
1301 return generateCharacterClassNonGreedy(opIndex); |
|
1302 } |
|
1303 break; |
|
1304 |
|
1305 case PatternTerm::TypeAssertionBOL: |
|
1306 return generateAssertionBOL(opIndex); |
|
1307 |
|
1308 case PatternTerm::TypeAssertionEOL: |
|
1309 return generateAssertionEOL(opIndex); |
|
1310 |
|
1311 case PatternTerm::TypeAssertionWordBoundary: |
|
1312 return generateAssertionWordBoundary(opIndex); |
|
1313 |
|
1314 case PatternTerm::TypeForwardReference: |
|
1315 return true; |
|
1316 |
|
1317 case PatternTerm::TypeParenthesesSubpattern: |
|
1318 case PatternTerm::TypeParentheticalAssertion: |
|
1319 ASSERT_NOT_REACHED(); |
|
1320 return false; |
|
1321 case PatternTerm::TypeBackReference: |
|
1322 return false; |
|
1323 case PatternTerm::TypeDotStarEnclosure: |
|
1324 return generateDotStarEnclosure(opIndex); |
|
1325 } |
|
1326 |
|
1327 return false; |
|
1328 } |
|
1329 bool backtrackTerm(size_t opIndex) |
|
1330 { |
|
1331 YarrOp& op = m_ops[opIndex]; |
|
1332 PatternTerm* term = op.m_term; |
|
1333 |
|
1334 switch (term->type) { |
|
1335 case PatternTerm::TypePatternCharacter: |
|
1336 switch (term->quantityType) { |
|
1337 case QuantifierFixedCount: |
|
1338 if (term->quantityCount == 1) |
|
1339 return backtrackPatternCharacterOnce(opIndex); |
|
1340 else |
|
1341 return backtrackPatternCharacterFixed(opIndex); |
|
1342 case QuantifierGreedy: |
|
1343 return backtrackPatternCharacterGreedy(opIndex); |
|
1344 case QuantifierNonGreedy: |
|
1345 return backtrackPatternCharacterNonGreedy(opIndex); |
|
1346 } |
|
1347 break; |
|
1348 |
|
1349 case PatternTerm::TypeCharacterClass: |
|
1350 switch (term->quantityType) { |
|
1351 case QuantifierFixedCount: |
|
1352 if (term->quantityCount == 1) |
|
1353 return backtrackCharacterClassOnce(opIndex); |
|
1354 else |
|
1355 return backtrackCharacterClassFixed(opIndex); |
|
1356 case QuantifierGreedy: |
|
1357 return backtrackCharacterClassGreedy(opIndex); |
|
1358 case QuantifierNonGreedy: |
|
1359 return backtrackCharacterClassNonGreedy(opIndex); |
|
1360 } |
|
1361 break; |
|
1362 |
|
1363 case PatternTerm::TypeAssertionBOL: |
|
1364 return backtrackAssertionBOL(opIndex); |
|
1365 |
|
1366 case PatternTerm::TypeAssertionEOL: |
|
1367 return backtrackAssertionEOL(opIndex); |
|
1368 |
|
1369 case PatternTerm::TypeAssertionWordBoundary: |
|
1370 return backtrackAssertionWordBoundary(opIndex); |
|
1371 |
|
1372 case PatternTerm::TypeForwardReference: |
|
1373 return true; |
|
1374 |
|
1375 case PatternTerm::TypeParenthesesSubpattern: |
|
1376 case PatternTerm::TypeParentheticalAssertion: |
|
1377 ASSERT_NOT_REACHED(); |
|
1378 return false; |
|
1379 |
|
1380 case PatternTerm::TypeDotStarEnclosure: |
|
1381 return backtrackDotStarEnclosure(opIndex); |
|
1382 |
|
1383 case PatternTerm::TypeBackReference: |
|
1384 return false; |
|
1385 } |
|
1386 return true; |
|
1387 } |
|
1388 |
|
1389 bool generate() |
|
1390 { |
|
1391 // Forwards generate the matching code. |
|
1392 ASSERT(m_ops.size()); |
|
1393 size_t opIndex = 0; |
|
1394 |
|
1395 do { |
|
1396 YarrOp& op = m_ops[opIndex]; |
|
1397 switch (op.m_op) { |
|
1398 |
|
1399 case OpTerm: |
|
1400 if (!generateTerm(opIndex)) |
|
1401 return false; |
|
1402 break; |
|
1403 |
|
1404 // OpBodyAlternativeBegin/Next/End |
|
1405 // |
|
1406 // These nodes wrap the set of alternatives in the body of the regular expression. |
|
1407 // There may be either one or two chains of OpBodyAlternative nodes, one representing |
|
1408 // the 'once through' sequence of alternatives (if any exist), and one representing |
|
1409 // the repeating alternatives (again, if any exist). |
|
1410 // |
|
1411 // Upon normal entry to the Begin alternative, we will check that input is available. |
|
1412 // Reentry to the Begin alternative will take place after the check has taken place, |
|
1413 // and will assume that the input position has already been progressed as appropriate. |
|
1414 // |
|
1415 // Entry to subsequent Next/End alternatives occurs when the prior alternative has |
|
1416 // successfully completed a match - return a success state from JIT code. |
|
1417 // |
|
1418 // Next alternatives allow for reentry optimized to suit backtracking from its |
|
1419 // preceding alternative. It expects the input position to still be set to a position |
|
1420 // appropriate to its predecessor, and it will only perform an input check if the |
|
1421 // predecessor had a minimum size less than its own. |
|
1422 // |
|
1423 // In the case 'once through' expressions, the End node will also have a reentry |
|
1424 // point to jump to when the last alternative fails. Again, this expects the input |
|
1425 // position to still reflect that expected by the prior alternative. |
|
1426 case OpBodyAlternativeBegin: { |
|
1427 PatternAlternative* alternative = op.m_alternative; |
|
1428 |
|
1429 // Upon entry at the head of the set of alternatives, check if input is available |
|
1430 // to run the first alternative. (This progresses the input position). |
|
1431 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize)); |
|
1432 // We will reenter after the check, and assume the input position to have been |
|
1433 // set as appropriate to this alternative. |
|
1434 op.m_reentry = label(); |
|
1435 |
|
1436 if (alternative->m_minimumSize > INT_MAX) |
|
1437 return false; |
|
1438 m_checked = alternative->m_minimumSize; |
|
1439 break; |
|
1440 } |
|
1441 case OpBodyAlternativeNext: |
|
1442 case OpBodyAlternativeEnd: { |
|
1443 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; |
|
1444 PatternAlternative* alternative = op.m_alternative; |
|
1445 |
|
1446 // If we get here, the prior alternative matched - return success. |
|
1447 |
|
1448 // Adjust the stack pointer to remove the pattern's frame. |
|
1449 #if !WTF_CPU_SPARC |
|
1450 removeCallFrame(); |
|
1451 #endif |
|
1452 |
|
1453 // Load appropriate values into the return register and the first output |
|
1454 // slot, and return. In the case of pattern with a fixed size, we will |
|
1455 // not have yet set the value in the first |
|
1456 ASSERT(index != returnRegister); |
|
1457 if (m_pattern.m_body->m_hasFixedSize) { |
|
1458 move(index, returnRegister); |
|
1459 if (priorAlternative->m_minimumSize) |
|
1460 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister); |
|
1461 if (compileMode == IncludeSubpatterns) |
|
1462 store32(returnRegister, output); |
|
1463 } else |
|
1464 getMatchStart(returnRegister); |
|
1465 if (compileMode == IncludeSubpatterns) |
|
1466 store32(index, Address(output, 4)); |
|
1467 #if WTF_CPU_X86_64 |
|
1468 // upper 32bit to 0 |
|
1469 move32(returnRegister, returnRegister); |
|
1470 lshiftPtr(Imm32(32), index); |
|
1471 orPtr(index, returnRegister); |
|
1472 #else |
|
1473 move(index, returnRegister2); |
|
1474 #endif |
|
1475 |
|
1476 generateReturn(); |
|
1477 |
|
1478 // This is the divide between the tail of the prior alternative, above, and |
|
1479 // the head of the subsequent alternative, below. |
|
1480 |
|
1481 if (op.m_op == OpBodyAlternativeNext) { |
|
1482 // This is the reentry point for the Next alternative. We expect any code |
|
1483 // that jumps here to do so with the input position matching that of the |
|
1484 // PRIOR alteranative, and we will only check input availability if we |
|
1485 // need to progress it forwards. |
|
1486 op.m_reentry = label(); |
|
1487 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) { |
|
1488 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index); |
|
1489 op.m_jumps.append(jumpIfNoAvailableInput()); |
|
1490 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize) |
|
1491 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index); |
|
1492 } else if (op.m_nextOp == notFound) { |
|
1493 // This is the reentry point for the End of 'once through' alternatives, |
|
1494 // jumped to when the last alternative fails to match. |
|
1495 op.m_reentry = label(); |
|
1496 sub32(Imm32(priorAlternative->m_minimumSize), index); |
|
1497 } |
|
1498 |
|
1499 if (op.m_op == OpBodyAlternativeNext) |
|
1500 m_checked += alternative->m_minimumSize; |
|
1501 m_checked -= priorAlternative->m_minimumSize; |
|
1502 break; |
|
1503 } |
|
1504 |
|
1505 // OpSimpleNestedAlternativeBegin/Next/End |
|
1506 // OpNestedAlternativeBegin/Next/End |
|
1507 // |
|
1508 // These nodes are used to handle sets of alternatives that are nested within |
|
1509 // subpatterns and parenthetical assertions. The 'simple' forms are used where |
|
1510 // we do not need to be able to backtrack back into any alternative other than |
|
1511 // the last, the normal forms allow backtracking into any alternative. |
|
1512 // |
|
1513 // Each Begin/Next node is responsible for planting an input check to ensure |
|
1514 // sufficient input is available on entry. Next nodes additionally need to |
|
1515 // jump to the end - Next nodes use the End node's m_jumps list to hold this |
|
1516 // set of jumps. |
|
1517 // |
|
1518 // In the non-simple forms, successful alternative matches must store a |
|
1519 // 'return address' using a DataLabelPtr, used to store the address to jump |
|
1520 // to when backtracking, to get to the code for the appropriate alternative. |
|
1521 case OpSimpleNestedAlternativeBegin: |
|
1522 case OpNestedAlternativeBegin: { |
|
1523 PatternTerm* term = op.m_term; |
|
1524 PatternAlternative* alternative = op.m_alternative; |
|
1525 PatternDisjunction* disjunction = term->parentheses.disjunction; |
|
1526 |
|
1527 // Calculate how much input we need to check for, and if non-zero check. |
|
1528 op.m_checkAdjust = alternative->m_minimumSize; |
|
1529 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) |
|
1530 op.m_checkAdjust -= disjunction->m_minimumSize; |
|
1531 if (op.m_checkAdjust) |
|
1532 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); |
|
1533 |
|
1534 m_checked += op.m_checkAdjust; |
|
1535 break; |
|
1536 } |
|
1537 case OpSimpleNestedAlternativeNext: |
|
1538 case OpNestedAlternativeNext: { |
|
1539 PatternTerm* term = op.m_term; |
|
1540 PatternAlternative* alternative = op.m_alternative; |
|
1541 PatternDisjunction* disjunction = term->parentheses.disjunction; |
|
1542 |
|
1543 // In the non-simple case, store a 'return address' so we can backtrack correctly. |
|
1544 if (op.m_op == OpNestedAlternativeNext) { |
|
1545 unsigned parenthesesFrameLocation = term->frameLocation; |
|
1546 unsigned alternativeFrameLocation = parenthesesFrameLocation; |
|
1547 if (term->quantityType != QuantifierFixedCount) |
|
1548 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; |
|
1549 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); |
|
1550 } |
|
1551 |
|
1552 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { |
|
1553 // If the previous alternative matched without consuming characters then |
|
1554 // backtrack to try to match while consumming some input. |
|
1555 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); |
|
1556 } |
|
1557 |
|
1558 // If we reach here then the last alternative has matched - jump to the |
|
1559 // End node, to skip over any further alternatives. |
|
1560 // |
|
1561 // FIXME: this is logically O(N^2) (though N can be expected to be very |
|
1562 // small). We could avoid this either by adding an extra jump to the JIT |
|
1563 // data structures, or by making backtracking code that jumps to Next |
|
1564 // alternatives are responsible for checking that input is available (if |
|
1565 // we didn't need to plant the input checks, then m_jumps would be free). |
|
1566 YarrOp* endOp = &m_ops[op.m_nextOp]; |
|
1567 while (endOp->m_nextOp != notFound) { |
|
1568 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); |
|
1569 endOp = &m_ops[endOp->m_nextOp]; |
|
1570 } |
|
1571 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); |
|
1572 endOp->m_jumps.append(jump()); |
|
1573 |
|
1574 // This is the entry point for the next alternative. |
|
1575 op.m_reentry = label(); |
|
1576 |
|
1577 // Calculate how much input we need to check for, and if non-zero check. |
|
1578 op.m_checkAdjust = alternative->m_minimumSize; |
|
1579 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion)) |
|
1580 op.m_checkAdjust -= disjunction->m_minimumSize; |
|
1581 if (op.m_checkAdjust) |
|
1582 op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust)); |
|
1583 |
|
1584 YarrOp& lastOp = m_ops[op.m_previousOp]; |
|
1585 m_checked -= lastOp.m_checkAdjust; |
|
1586 m_checked += op.m_checkAdjust; |
|
1587 break; |
|
1588 } |
|
1589 case OpSimpleNestedAlternativeEnd: |
|
1590 case OpNestedAlternativeEnd: { |
|
1591 PatternTerm* term = op.m_term; |
|
1592 |
|
1593 // In the non-simple case, store a 'return address' so we can backtrack correctly. |
|
1594 if (op.m_op == OpNestedAlternativeEnd) { |
|
1595 unsigned parenthesesFrameLocation = term->frameLocation; |
|
1596 unsigned alternativeFrameLocation = parenthesesFrameLocation; |
|
1597 if (term->quantityType != QuantifierFixedCount) |
|
1598 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; |
|
1599 op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation); |
|
1600 } |
|
1601 |
|
1602 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) { |
|
1603 // If the previous alternative matched without consuming characters then |
|
1604 // backtrack to try to match while consumming some input. |
|
1605 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); |
|
1606 } |
|
1607 |
|
1608 // If this set of alternatives contains more than one alternative, |
|
1609 // then the Next nodes will have planted jumps to the End, and added |
|
1610 // them to this node's m_jumps list. |
|
1611 op.m_jumps.link(this); |
|
1612 op.m_jumps.clear(); |
|
1613 |
|
1614 YarrOp& lastOp = m_ops[op.m_previousOp]; |
|
1615 m_checked -= lastOp.m_checkAdjust; |
|
1616 break; |
|
1617 } |
|
1618 |
|
1619 // OpParenthesesSubpatternOnceBegin/End |
|
1620 // |
|
1621 // These nodes support (optionally) capturing subpatterns, that have a |
|
1622 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers). |
|
1623 case OpParenthesesSubpatternOnceBegin: { |
|
1624 PatternTerm* term = op.m_term; |
|
1625 unsigned parenthesesFrameLocation = term->frameLocation; |
|
1626 const RegisterID indexTemporary = regT0; |
|
1627 ASSERT(term->quantityCount == 1); |
|
1628 |
|
1629 // Upon entry to a Greedy quantified set of parenthese store the index. |
|
1630 // We'll use this for two purposes: |
|
1631 // - To indicate which iteration we are on of mathing the remainder of |
|
1632 // the expression after the parentheses - the first, including the |
|
1633 // match within the parentheses, or the second having skipped over them. |
|
1634 // - To check for empty matches, which must be rejected. |
|
1635 // |
|
1636 // At the head of a NonGreedy set of parentheses we'll immediately set the |
|
1637 // value on the stack to -1 (indicating a match skipping the subpattern), |
|
1638 // and plant a jump to the end. We'll also plant a label to backtrack to |
|
1639 // to reenter the subpattern later, with a store to set up index on the |
|
1640 // second iteration. |
|
1641 // |
|
1642 // FIXME: for capturing parens, could use the index in the capture array? |
|
1643 if (term->quantityType == QuantifierGreedy) |
|
1644 storeToFrame(index, parenthesesFrameLocation); |
|
1645 else if (term->quantityType == QuantifierNonGreedy) { |
|
1646 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); |
|
1647 op.m_jumps.append(jump()); |
|
1648 op.m_reentry = label(); |
|
1649 storeToFrame(index, parenthesesFrameLocation); |
|
1650 } |
|
1651 |
|
1652 // If the parenthese are capturing, store the starting index value to the |
|
1653 // captures array, offsetting as necessary. |
|
1654 // |
|
1655 // FIXME: could avoid offsetting this value in JIT code, apply |
|
1656 // offsets only afterwards, at the point the results array is |
|
1657 // being accessed. |
|
1658 if (term->capture() && compileMode == IncludeSubpatterns) { |
|
1659 int inputOffset = term->inputPosition - m_checked; |
|
1660 if (term->quantityType == QuantifierFixedCount) |
|
1661 inputOffset -= term->parentheses.disjunction->m_minimumSize; |
|
1662 if (inputOffset) { |
|
1663 move(index, indexTemporary); |
|
1664 add32(Imm32(inputOffset), indexTemporary); |
|
1665 setSubpatternStart(indexTemporary, term->parentheses.subpatternId); |
|
1666 } else |
|
1667 setSubpatternStart(index, term->parentheses.subpatternId); |
|
1668 } |
|
1669 break; |
|
1670 } |
|
1671 case OpParenthesesSubpatternOnceEnd: { |
|
1672 PatternTerm* term = op.m_term; |
|
1673 const RegisterID indexTemporary = regT0; |
|
1674 ASSERT(term->quantityCount == 1); |
|
1675 |
|
1676 #ifndef NDEBUG |
|
1677 // Runtime ASSERT to make sure that the nested alternative handled the |
|
1678 // "no input consumed" check. |
|
1679 if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) { |
|
1680 Jump pastBreakpoint; |
|
1681 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); |
|
1682 breakpoint(); |
|
1683 pastBreakpoint.link(this); |
|
1684 } |
|
1685 #endif |
|
1686 |
|
1687 // If the parenthese are capturing, store the ending index value to the |
|
1688 // captures array, offsetting as necessary. |
|
1689 // |
|
1690 // FIXME: could avoid offsetting this value in JIT code, apply |
|
1691 // offsets only afterwards, at the point the results array is |
|
1692 // being accessed. |
|
1693 if (term->capture() && compileMode == IncludeSubpatterns) { |
|
1694 int inputOffset = term->inputPosition - m_checked; |
|
1695 if (inputOffset) { |
|
1696 move(index, indexTemporary); |
|
1697 add32(Imm32(inputOffset), indexTemporary); |
|
1698 setSubpatternEnd(indexTemporary, term->parentheses.subpatternId); |
|
1699 } else |
|
1700 setSubpatternEnd(index, term->parentheses.subpatternId); |
|
1701 } |
|
1702 |
|
1703 // If the parentheses are quantified Greedy then add a label to jump back |
|
1704 // to if get a failed match from after the parentheses. For NonGreedy |
|
1705 // parentheses, link the jump from before the subpattern to here. |
|
1706 if (term->quantityType == QuantifierGreedy) |
|
1707 op.m_reentry = label(); |
|
1708 else if (term->quantityType == QuantifierNonGreedy) { |
|
1709 YarrOp& beginOp = m_ops[op.m_previousOp]; |
|
1710 beginOp.m_jumps.link(this); |
|
1711 } |
|
1712 break; |
|
1713 } |
|
1714 |
|
1715 // OpParenthesesSubpatternTerminalBegin/End |
|
1716 case OpParenthesesSubpatternTerminalBegin: { |
|
1717 PatternTerm* term = op.m_term; |
|
1718 ASSERT(term->quantityType == QuantifierGreedy); |
|
1719 ASSERT(term->quantityCount == quantifyInfinite); |
|
1720 ASSERT(!term->capture()); |
|
1721 |
|
1722 // Upon entry set a label to loop back to. |
|
1723 op.m_reentry = label(); |
|
1724 |
|
1725 // Store the start index of the current match; we need to reject zero |
|
1726 // length matches. |
|
1727 storeToFrame(index, term->frameLocation); |
|
1728 break; |
|
1729 } |
|
1730 case OpParenthesesSubpatternTerminalEnd: { |
|
1731 YarrOp& beginOp = m_ops[op.m_previousOp]; |
|
1732 #ifndef NDEBUG |
|
1733 PatternTerm* term = op.m_term; |
|
1734 |
|
1735 // Runtime ASSERT to make sure that the nested alternative handled the |
|
1736 // "no input consumed" check. |
|
1737 Jump pastBreakpoint; |
|
1738 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*))); |
|
1739 breakpoint(); |
|
1740 pastBreakpoint.link(this); |
|
1741 #endif |
|
1742 |
|
1743 // We know that the match is non-zero, we can accept it and |
|
1744 // loop back up to the head of the subpattern. |
|
1745 jump(beginOp.m_reentry); |
|
1746 |
|
1747 // This is the entry point to jump to when we stop matching - we will |
|
1748 // do so once the subpattern cannot match any more. |
|
1749 op.m_reentry = label(); |
|
1750 break; |
|
1751 } |
|
1752 |
|
1753 // OpParentheticalAssertionBegin/End |
|
1754 case OpParentheticalAssertionBegin: { |
|
1755 PatternTerm* term = op.m_term; |
|
1756 |
|
1757 // Store the current index - assertions should not update index, so |
|
1758 // we will need to restore it upon a successful match. |
|
1759 unsigned parenthesesFrameLocation = term->frameLocation; |
|
1760 storeToFrame(index, parenthesesFrameLocation); |
|
1761 |
|
1762 // Check |
|
1763 op.m_checkAdjust = m_checked - term->inputPosition; |
|
1764 if (op.m_checkAdjust) |
|
1765 sub32(Imm32(op.m_checkAdjust), index); |
|
1766 |
|
1767 m_checked -= op.m_checkAdjust; |
|
1768 break; |
|
1769 } |
|
1770 case OpParentheticalAssertionEnd: { |
|
1771 PatternTerm* term = op.m_term; |
|
1772 |
|
1773 // Restore the input index value. |
|
1774 unsigned parenthesesFrameLocation = term->frameLocation; |
|
1775 loadFromFrame(parenthesesFrameLocation, index); |
|
1776 |
|
1777 // If inverted, a successful match of the assertion must be treated |
|
1778 // as a failure, so jump to backtracking. |
|
1779 if (term->invert()) { |
|
1780 op.m_jumps.append(jump()); |
|
1781 op.m_reentry = label(); |
|
1782 } |
|
1783 |
|
1784 YarrOp& lastOp = m_ops[op.m_previousOp]; |
|
1785 m_checked += lastOp.m_checkAdjust; |
|
1786 break; |
|
1787 } |
|
1788 |
|
1789 case OpMatchFailed: |
|
1790 #if !WTF_CPU_SPARC |
|
1791 removeCallFrame(); |
|
1792 #endif |
|
1793 #if WTF_CPU_X86_64 |
|
1794 move(TrustedImm32(int(WTF::notFound)), returnRegister); |
|
1795 #else |
|
1796 move(TrustedImmPtr((void*)WTF::notFound), returnRegister); |
|
1797 move(TrustedImm32(0), returnRegister2); |
|
1798 #endif |
|
1799 generateReturn(); |
|
1800 break; |
|
1801 } |
|
1802 |
|
1803 ++opIndex; |
|
1804 } while (opIndex < m_ops.size()); |
|
1805 |
|
1806 return true; |
|
1807 } |
|
1808 |
|
1809 bool backtrack() |
|
1810 { |
|
1811 // Backwards generate the backtracking code. |
|
1812 size_t opIndex = m_ops.size(); |
|
1813 ASSERT(opIndex); |
|
1814 |
|
1815 do { |
|
1816 --opIndex; |
|
1817 YarrOp& op = m_ops[opIndex]; |
|
1818 switch (op.m_op) { |
|
1819 |
|
1820 case OpTerm: |
|
1821 if (!backtrackTerm(opIndex)) |
|
1822 return false; |
|
1823 break; |
|
1824 |
|
1825 // OpBodyAlternativeBegin/Next/End |
|
1826 // |
|
1827 // For each Begin/Next node representing an alternative, we need to decide what to do |
|
1828 // in two circumstances: |
|
1829 // - If we backtrack back into this node, from within the alternative. |
|
1830 // - If the input check at the head of the alternative fails (if this exists). |
|
1831 // |
|
1832 // We treat these two cases differently since in the former case we have slightly |
|
1833 // more information - since we are backtracking out of a prior alternative we know |
|
1834 // that at least enough input was available to run it. For example, given the regular |
|
1835 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern |
|
1836 // character match of 'a'), then we need not perform an additional input availability |
|
1837 // check before running the second alternative. |
|
1838 // |
|
1839 // Backtracking required differs for the last alternative, which in the case of the |
|
1840 // repeating set of alternatives must loop. The code generated for the last alternative |
|
1841 // will also be used to handle all input check failures from any prior alternatives - |
|
1842 // these require similar functionality, in seeking the next available alternative for |
|
1843 // which there is sufficient input. |
|
1844 // |
|
1845 // Since backtracking of all other alternatives simply requires us to link backtracks |
|
1846 // to the reentry point for the subsequent alternative, we will only be generating any |
|
1847 // code when backtracking the last alternative. |
|
1848 case OpBodyAlternativeBegin: |
|
1849 case OpBodyAlternativeNext: { |
|
1850 PatternAlternative* alternative = op.m_alternative; |
|
1851 |
|
1852 if (op.m_op == OpBodyAlternativeNext) { |
|
1853 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; |
|
1854 m_checked += priorAlternative->m_minimumSize; |
|
1855 } |
|
1856 m_checked -= alternative->m_minimumSize; |
|
1857 |
|
1858 // Is this the last alternative? If not, then if we backtrack to this point we just |
|
1859 // need to jump to try to match the next alternative. |
|
1860 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) { |
|
1861 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this); |
|
1862 break; |
|
1863 } |
|
1864 YarrOp& endOp = m_ops[op.m_nextOp]; |
|
1865 |
|
1866 YarrOp* beginOp = &op; |
|
1867 while (beginOp->m_op != OpBodyAlternativeBegin) { |
|
1868 ASSERT(beginOp->m_op == OpBodyAlternativeNext); |
|
1869 beginOp = &m_ops[beginOp->m_previousOp]; |
|
1870 } |
|
1871 |
|
1872 bool onceThrough = endOp.m_nextOp == notFound; |
|
1873 |
|
1874 // First, generate code to handle cases where we backtrack out of an attempted match |
|
1875 // of the last alternative. If this is a 'once through' set of alternatives then we |
|
1876 // have nothing to do - link this straight through to the End. |
|
1877 if (onceThrough) |
|
1878 m_backtrackingState.linkTo(endOp.m_reentry, this); |
|
1879 else { |
|
1880 // If we don't need to move the input poistion, and the pattern has a fixed size |
|
1881 // (in which case we omit the store of the start index until the pattern has matched) |
|
1882 // then we can just link the backtrack out of the last alternative straight to the |
|
1883 // head of the first alternative. |
|
1884 if (m_pattern.m_body->m_hasFixedSize |
|
1885 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) |
|
1886 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1)) |
|
1887 m_backtrackingState.linkTo(beginOp->m_reentry, this); |
|
1888 else { |
|
1889 // We need to generate a trampoline of code to execute before looping back |
|
1890 // around to the first alternative. |
|
1891 m_backtrackingState.link(this); |
|
1892 |
|
1893 // If the pattern size is not fixed, then store the start index, for use if we match. |
|
1894 if (!m_pattern.m_body->m_hasFixedSize) { |
|
1895 if (alternative->m_minimumSize == 1) |
|
1896 setMatchStart(index); |
|
1897 else { |
|
1898 move(index, regT0); |
|
1899 if (alternative->m_minimumSize) |
|
1900 sub32(Imm32(alternative->m_minimumSize - 1), regT0); |
|
1901 else |
|
1902 add32(TrustedImm32(1), regT0); |
|
1903 setMatchStart(regT0); |
|
1904 } |
|
1905 } |
|
1906 |
|
1907 // Generate code to loop. Check whether the last alternative is longer than the |
|
1908 // first (e.g. /a|xy/ or /a|xyz/). |
|
1909 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) { |
|
1910 // We want to loop, and increment input position. If the delta is 1, it is |
|
1911 // already correctly incremented, if more than one then decrement as appropriate. |
|
1912 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize; |
|
1913 ASSERT(delta); |
|
1914 if (delta != 1) |
|
1915 sub32(Imm32(delta - 1), index); |
|
1916 jump(beginOp->m_reentry); |
|
1917 } else { |
|
1918 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot |
|
1919 // be sufficent input available to handle this, so just fall through. |
|
1920 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize; |
|
1921 if (delta != 0xFFFFFFFFu) { |
|
1922 // We need to check input because we are incrementing the input. |
|
1923 add32(Imm32(delta + 1), index); |
|
1924 checkInput().linkTo(beginOp->m_reentry, this); |
|
1925 } |
|
1926 } |
|
1927 } |
|
1928 } |
|
1929 |
|
1930 // We can reach this point in the code in two ways: |
|
1931 // - Fallthrough from the code above (a repeating alternative backtracked out of its |
|
1932 // last alternative, and did not have sufficent input to run the first). |
|
1933 // - We will loop back up to the following label when a releating alternative loops, |
|
1934 // following a failed input check. |
|
1935 // |
|
1936 // Either way, we have just failed the input check for the first alternative. |
|
1937 Label firstInputCheckFailed(this); |
|
1938 |
|
1939 // Generate code to handle input check failures from alternatives except the last. |
|
1940 // prevOp is the alternative we're handling a bail out from (initially Begin), and |
|
1941 // nextOp is the alternative we will be attempting to reenter into. |
|
1942 // |
|
1943 // We will link input check failures from the forwards matching path back to the code |
|
1944 // that can handle them. |
|
1945 YarrOp* prevOp = beginOp; |
|
1946 YarrOp* nextOp = &m_ops[beginOp->m_nextOp]; |
|
1947 while (nextOp->m_op != OpBodyAlternativeEnd) { |
|
1948 prevOp->m_jumps.link(this); |
|
1949 |
|
1950 // We only get here if an input check fails, it is only worth checking again |
|
1951 // if the next alternative has a minimum size less than the last. |
|
1952 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) { |
|
1953 // FIXME: if we added an extra label to YarrOp, we could avoid needing to |
|
1954 // subtract delta back out, and reduce this code. Should performance test |
|
1955 // the benefit of this. |
|
1956 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize; |
|
1957 sub32(Imm32(delta), index); |
|
1958 Jump fail = jumpIfNoAvailableInput(); |
|
1959 add32(Imm32(delta), index); |
|
1960 jump(nextOp->m_reentry); |
|
1961 fail.link(this); |
|
1962 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize) |
|
1963 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index); |
|
1964 prevOp = nextOp; |
|
1965 nextOp = &m_ops[nextOp->m_nextOp]; |
|
1966 } |
|
1967 |
|
1968 // We fall through to here if there is insufficient input to run the last alternative. |
|
1969 |
|
1970 // If there is insufficient input to run the last alternative, then for 'once through' |
|
1971 // alternatives we are done - just jump back up into the forwards matching path at the End. |
|
1972 if (onceThrough) { |
|
1973 op.m_jumps.linkTo(endOp.m_reentry, this); |
|
1974 jump(endOp.m_reentry); |
|
1975 break; |
|
1976 } |
|
1977 |
|
1978 // For repeating alternatives, link any input check failure from the last alternative to |
|
1979 // this point. |
|
1980 op.m_jumps.link(this); |
|
1981 |
|
1982 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize; |
|
1983 |
|
1984 // Check for cases where input position is already incremented by 1 for the last |
|
1985 // alternative (this is particularly useful where the minimum size of the body |
|
1986 // disjunction is 0, e.g. /a*|b/). |
|
1987 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) { |
|
1988 // index is already incremented by 1, so just store it now! |
|
1989 setMatchStart(index); |
|
1990 needsToUpdateMatchStart = false; |
|
1991 } |
|
1992 |
|
1993 // Check whether there is sufficient input to loop. Increment the input position by |
|
1994 // one, and check. Also add in the minimum disjunction size before checking - there |
|
1995 // is no point in looping if we're just going to fail all the input checks around |
|
1996 // the next iteration. |
|
1997 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize); |
|
1998 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) { |
|
1999 // If the last alternative had the same minimum size as the disjunction, |
|
2000 // just simply increment input pos by 1, no adjustment based on minimum size. |
|
2001 add32(TrustedImm32(1), index); |
|
2002 } else { |
|
2003 // If the minumum for the last alternative was one greater than than that |
|
2004 // for the disjunction, we're already progressed by 1, nothing to do! |
|
2005 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1; |
|
2006 if (delta) |
|
2007 sub32(Imm32(delta), index); |
|
2008 } |
|
2009 Jump matchFailed = jumpIfNoAvailableInput(); |
|
2010 |
|
2011 if (needsToUpdateMatchStart) { |
|
2012 if (!m_pattern.m_body->m_minimumSize) |
|
2013 setMatchStart(index); |
|
2014 else { |
|
2015 move(index, regT0); |
|
2016 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0); |
|
2017 setMatchStart(regT0); |
|
2018 } |
|
2019 } |
|
2020 |
|
2021 // Calculate how much more input the first alternative requires than the minimum |
|
2022 // for the body as a whole. If no more is needed then we dont need an additional |
|
2023 // input check here - jump straight back up to the start of the first alternative. |
|
2024 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) |
|
2025 jump(beginOp->m_reentry); |
|
2026 else { |
|
2027 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize) |
|
2028 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index); |
|
2029 else |
|
2030 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index); |
|
2031 checkInput().linkTo(beginOp->m_reentry, this); |
|
2032 jump(firstInputCheckFailed); |
|
2033 } |
|
2034 |
|
2035 // We jump to here if we iterate to the point that there is insufficient input to |
|
2036 // run any matches, and need to return a failure state from JIT code. |
|
2037 matchFailed.link(this); |
|
2038 |
|
2039 #if !WTF_CPU_SPARC |
|
2040 removeCallFrame(); |
|
2041 #endif |
|
2042 #if WTF_CPU_X86_64 |
|
2043 move(TrustedImm32(int(WTF::notFound)), returnRegister); |
|
2044 #else |
|
2045 move(TrustedImmPtr((void*)WTF::notFound), returnRegister); |
|
2046 move(TrustedImm32(0), returnRegister2); |
|
2047 #endif |
|
2048 generateReturn(); |
|
2049 break; |
|
2050 } |
|
2051 case OpBodyAlternativeEnd: { |
|
2052 // We should never backtrack back into a body disjunction. |
|
2053 ASSERT(m_backtrackingState.isEmpty()); |
|
2054 |
|
2055 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative; |
|
2056 m_checked += priorAlternative->m_minimumSize; |
|
2057 break; |
|
2058 } |
|
2059 |
|
2060 // OpSimpleNestedAlternativeBegin/Next/End |
|
2061 // OpNestedAlternativeBegin/Next/End |
|
2062 // |
|
2063 // Generate code for when we backtrack back out of an alternative into |
|
2064 // a Begin or Next node, or when the entry input count check fails. If |
|
2065 // there are more alternatives we need to jump to the next alternative, |
|
2066 // if not we backtrack back out of the current set of parentheses. |
|
2067 // |
|
2068 // In the case of non-simple nested assertions we need to also link the |
|
2069 // 'return address' appropriately to backtrack back out into the correct |
|
2070 // alternative. |
|
2071 case OpSimpleNestedAlternativeBegin: |
|
2072 case OpSimpleNestedAlternativeNext: |
|
2073 case OpNestedAlternativeBegin: |
|
2074 case OpNestedAlternativeNext: { |
|
2075 YarrOp& nextOp = m_ops[op.m_nextOp]; |
|
2076 bool isBegin = op.m_previousOp == notFound; |
|
2077 bool isLastAlternative = nextOp.m_nextOp == notFound; |
|
2078 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin)); |
|
2079 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd)); |
|
2080 |
|
2081 // Treat an input check failure the same as a failed match. |
|
2082 m_backtrackingState.append(op.m_jumps); |
|
2083 |
|
2084 // Set the backtracks to jump to the appropriate place. We may need |
|
2085 // to link the backtracks in one of three different way depending on |
|
2086 // the type of alternative we are dealing with: |
|
2087 // - A single alternative, with no simplings. |
|
2088 // - The last alternative of a set of two or more. |
|
2089 // - An alternative other than the last of a set of two or more. |
|
2090 // |
|
2091 // In the case of a single alternative on its own, we don't need to |
|
2092 // jump anywhere - if the alternative fails to match we can just |
|
2093 // continue to backtrack out of the parentheses without jumping. |
|
2094 // |
|
2095 // In the case of the last alternative in a set of more than one, we |
|
2096 // need to jump to return back out to the beginning. We'll do so by |
|
2097 // adding a jump to the End node's m_jumps list, and linking this |
|
2098 // when we come to generate the Begin node. For alternatives other |
|
2099 // than the last, we need to jump to the next alternative. |
|
2100 // |
|
2101 // If the alternative had adjusted the input position we must link |
|
2102 // backtracking to here, correct, and then jump on. If not we can |
|
2103 // link the backtracks directly to their destination. |
|
2104 if (op.m_checkAdjust) { |
|
2105 // Handle the cases where we need to link the backtracks here. |
|
2106 m_backtrackingState.link(this); |
|
2107 sub32(Imm32(op.m_checkAdjust), index); |
|
2108 if (!isLastAlternative) { |
|
2109 // An alternative that is not the last should jump to its successor. |
|
2110 jump(nextOp.m_reentry); |
|
2111 } else if (!isBegin) { |
|
2112 // The last of more than one alternatives must jump back to the beginning. |
|
2113 nextOp.m_jumps.append(jump()); |
|
2114 } else { |
|
2115 // A single alternative on its own can fall through. |
|
2116 m_backtrackingState.fallthrough(); |
|
2117 } |
|
2118 } else { |
|
2119 // Handle the cases where we can link the backtracks directly to their destinations. |
|
2120 if (!isLastAlternative) { |
|
2121 // An alternative that is not the last should jump to its successor. |
|
2122 m_backtrackingState.linkTo(nextOp.m_reentry, this); |
|
2123 } else if (!isBegin) { |
|
2124 // The last of more than one alternatives must jump back to the beginning. |
|
2125 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this); |
|
2126 } |
|
2127 // In the case of a single alternative on its own do nothing - it can fall through. |
|
2128 } |
|
2129 |
|
2130 // If there is a backtrack jump from a zero length match link it here. |
|
2131 if (op.m_zeroLengthMatch.isSet()) |
|
2132 m_backtrackingState.append(op.m_zeroLengthMatch); |
|
2133 |
|
2134 // At this point we've handled the backtracking back into this node. |
|
2135 // Now link any backtracks that need to jump to here. |
|
2136 |
|
2137 // For non-simple alternatives, link the alternative's 'return address' |
|
2138 // so that we backtrack back out into the previous alternative. |
|
2139 if (op.m_op == OpNestedAlternativeNext) |
|
2140 m_backtrackingState.append(op.m_returnAddress); |
|
2141 |
|
2142 // If there is more than one alternative, then the last alternative will |
|
2143 // have planted a jump to be linked to the end. This jump was added to the |
|
2144 // End node's m_jumps list. If we are back at the beginning, link it here. |
|
2145 if (isBegin) { |
|
2146 YarrOp* endOp = &m_ops[op.m_nextOp]; |
|
2147 while (endOp->m_nextOp != notFound) { |
|
2148 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext); |
|
2149 endOp = &m_ops[endOp->m_nextOp]; |
|
2150 } |
|
2151 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd); |
|
2152 m_backtrackingState.append(endOp->m_jumps); |
|
2153 } |
|
2154 |
|
2155 if (!isBegin) { |
|
2156 YarrOp& lastOp = m_ops[op.m_previousOp]; |
|
2157 m_checked += lastOp.m_checkAdjust; |
|
2158 } |
|
2159 m_checked -= op.m_checkAdjust; |
|
2160 break; |
|
2161 } |
|
2162 case OpSimpleNestedAlternativeEnd: |
|
2163 case OpNestedAlternativeEnd: { |
|
2164 PatternTerm* term = op.m_term; |
|
2165 |
|
2166 // If there is a backtrack jump from a zero length match link it here. |
|
2167 if (op.m_zeroLengthMatch.isSet()) |
|
2168 m_backtrackingState.append(op.m_zeroLengthMatch); |
|
2169 |
|
2170 // If we backtrack into the end of a simple subpattern do nothing; |
|
2171 // just continue through into the last alternative. If we backtrack |
|
2172 // into the end of a non-simple set of alterntives we need to jump |
|
2173 // to the backtracking return address set up during generation. |
|
2174 if (op.m_op == OpNestedAlternativeEnd) { |
|
2175 m_backtrackingState.link(this); |
|
2176 |
|
2177 // Plant a jump to the return address. |
|
2178 unsigned parenthesesFrameLocation = term->frameLocation; |
|
2179 unsigned alternativeFrameLocation = parenthesesFrameLocation; |
|
2180 if (term->quantityType != QuantifierFixedCount) |
|
2181 alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce; |
|
2182 loadFromFrameAndJump(alternativeFrameLocation); |
|
2183 |
|
2184 // Link the DataLabelPtr associated with the end of the last |
|
2185 // alternative to this point. |
|
2186 m_backtrackingState.append(op.m_returnAddress); |
|
2187 } |
|
2188 |
|
2189 YarrOp& lastOp = m_ops[op.m_previousOp]; |
|
2190 m_checked += lastOp.m_checkAdjust; |
|
2191 break; |
|
2192 } |
|
2193 |
|
2194 // OpParenthesesSubpatternOnceBegin/End |
|
2195 // |
|
2196 // When we are backtracking back out of a capturing subpattern we need |
|
2197 // to clear the start index in the matches output array, to record that |
|
2198 // this subpattern has not been captured. |
|
2199 // |
|
2200 // When backtracking back out of a Greedy quantified subpattern we need |
|
2201 // to catch this, and try running the remainder of the alternative after |
|
2202 // the subpattern again, skipping the parentheses. |
|
2203 // |
|
2204 // Upon backtracking back into a quantified set of parentheses we need to |
|
2205 // check whether we were currently skipping the subpattern. If not, we |
|
2206 // can backtrack into them, if we were we need to either backtrack back |
|
2207 // out of the start of the parentheses, or jump back to the forwards |
|
2208 // matching start, depending of whether the match is Greedy or NonGreedy. |
|
2209 case OpParenthesesSubpatternOnceBegin: { |
|
2210 PatternTerm* term = op.m_term; |
|
2211 ASSERT(term->quantityCount == 1); |
|
2212 |
|
2213 // We only need to backtrack to thispoint if capturing or greedy. |
|
2214 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) { |
|
2215 m_backtrackingState.link(this); |
|
2216 |
|
2217 // If capturing, clear the capture (we only need to reset start). |
|
2218 if (term->capture() && compileMode == IncludeSubpatterns) |
|
2219 clearSubpatternStart(term->parentheses.subpatternId); |
|
2220 |
|
2221 // If Greedy, jump to the end. |
|
2222 if (term->quantityType == QuantifierGreedy) { |
|
2223 // Clear the flag in the stackframe indicating we ran through the subpattern. |
|
2224 unsigned parenthesesFrameLocation = term->frameLocation; |
|
2225 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation); |
|
2226 // Jump to after the parentheses, skipping the subpattern. |
|
2227 jump(m_ops[op.m_nextOp].m_reentry); |
|
2228 // A backtrack from after the parentheses, when skipping the subpattern, |
|
2229 // will jump back to here. |
|
2230 op.m_jumps.link(this); |
|
2231 } |
|
2232 |
|
2233 m_backtrackingState.fallthrough(); |
|
2234 } |
|
2235 break; |
|
2236 } |
|
2237 case OpParenthesesSubpatternOnceEnd: { |
|
2238 PatternTerm* term = op.m_term; |
|
2239 |
|
2240 if (term->quantityType != QuantifierFixedCount) { |
|
2241 m_backtrackingState.link(this); |
|
2242 |
|
2243 // Check whether we should backtrack back into the parentheses, or if we |
|
2244 // are currently in a state where we had skipped over the subpattern |
|
2245 // (in which case the flag value on the stack will be -1). |
|
2246 unsigned parenthesesFrameLocation = term->frameLocation; |
|
2247 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1)); |
|
2248 |
|
2249 if (term->quantityType == QuantifierGreedy) { |
|
2250 // For Greedy parentheses, we skip after having already tried going |
|
2251 // through the subpattern, so if we get here we're done. |
|
2252 YarrOp& beginOp = m_ops[op.m_previousOp]; |
|
2253 beginOp.m_jumps.append(hadSkipped); |
|
2254 } else { |
|
2255 // For NonGreedy parentheses, we try skipping the subpattern first, |
|
2256 // so if we get here we need to try running through the subpattern |
|
2257 // next. Jump back to the start of the parentheses in the forwards |
|
2258 // matching path. |
|
2259 ASSERT(term->quantityType == QuantifierNonGreedy); |
|
2260 YarrOp& beginOp = m_ops[op.m_previousOp]; |
|
2261 hadSkipped.linkTo(beginOp.m_reentry, this); |
|
2262 } |
|
2263 |
|
2264 m_backtrackingState.fallthrough(); |
|
2265 } |
|
2266 |
|
2267 m_backtrackingState.append(op.m_jumps); |
|
2268 break; |
|
2269 } |
|
2270 |
|
2271 // OpParenthesesSubpatternTerminalBegin/End |
|
2272 // |
|
2273 // Terminal subpatterns will always match - there is nothing after them to |
|
2274 // force a backtrack, and they have a minimum count of 0, and as such will |
|
2275 // always produce an acceptable result. |
|
2276 case OpParenthesesSubpatternTerminalBegin: { |
|
2277 // We will backtrack to this point once the subpattern cannot match any |
|
2278 // more. Since no match is accepted as a successful match (we are Greedy |
|
2279 // quantified with a minimum of zero) jump back to the forwards matching |
|
2280 // path at the end. |
|
2281 YarrOp& endOp = m_ops[op.m_nextOp]; |
|
2282 m_backtrackingState.linkTo(endOp.m_reentry, this); |
|
2283 break; |
|
2284 } |
|
2285 case OpParenthesesSubpatternTerminalEnd: |
|
2286 // We should never be backtracking to here (hence the 'terminal' in the name). |
|
2287 ASSERT(m_backtrackingState.isEmpty()); |
|
2288 m_backtrackingState.append(op.m_jumps); |
|
2289 break; |
|
2290 |
|
2291 // OpParentheticalAssertionBegin/End |
|
2292 case OpParentheticalAssertionBegin: { |
|
2293 PatternTerm* term = op.m_term; |
|
2294 YarrOp& endOp = m_ops[op.m_nextOp]; |
|
2295 |
|
2296 // We need to handle the backtracks upon backtracking back out |
|
2297 // of a parenthetical assertion if either we need to correct |
|
2298 // the input index, or the assertion was inverted. |
|
2299 if (op.m_checkAdjust || term->invert()) { |
|
2300 m_backtrackingState.link(this); |
|
2301 |
|
2302 if (op.m_checkAdjust) |
|
2303 add32(Imm32(op.m_checkAdjust), index); |
|
2304 |
|
2305 // In an inverted assertion failure to match the subpattern |
|
2306 // is treated as a successful match - jump to the end of the |
|
2307 // subpattern. We already have adjusted the input position |
|
2308 // back to that before the assertion, which is correct. |
|
2309 if (term->invert()) |
|
2310 jump(endOp.m_reentry); |
|
2311 |
|
2312 m_backtrackingState.fallthrough(); |
|
2313 } |
|
2314 |
|
2315 // The End node's jump list will contain any backtracks into |
|
2316 // the end of the assertion. Also, if inverted, we will have |
|
2317 // added the failure caused by a successful match to this. |
|
2318 m_backtrackingState.append(endOp.m_jumps); |
|
2319 |
|
2320 m_checked += op.m_checkAdjust; |
|
2321 break; |
|
2322 } |
|
2323 case OpParentheticalAssertionEnd: { |
|
2324 // FIXME: We should really be clearing any nested subpattern |
|
2325 // matches on bailing out from after the pattern. Firefox has |
|
2326 // this bug too (presumably because they use YARR!) |
|
2327 |
|
2328 // Never backtrack into an assertion; later failures bail to before the begin. |
|
2329 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this); |
|
2330 |
|
2331 YarrOp& lastOp = m_ops[op.m_previousOp]; |
|
2332 m_checked -= lastOp.m_checkAdjust; |
|
2333 break; |
|
2334 } |
|
2335 |
|
2336 case OpMatchFailed: |
|
2337 break; |
|
2338 } |
|
2339 |
|
2340 } while (opIndex); |
|
2341 |
|
2342 return true; |
|
2343 } |
|
2344 |
|
2345 // Compilation methods: |
|
2346 // ==================== |
|
2347 |
|
2348 // opCompileParenthesesSubpattern |
|
2349 // Emits ops for a subpattern (set of parentheses). These consist |
|
2350 // of a set of alternatives wrapped in an outer set of nodes for |
|
2351 // the parentheses. |
|
2352 // Supported types of parentheses are 'Once' (quantityCount == 1) |
|
2353 // and 'Terminal' (non-capturing parentheses quantified as greedy |
|
2354 // and infinite). |
|
2355 // Alternatives will use the 'Simple' set of ops if either the |
|
2356 // subpattern is terminal (in which case we will never need to |
|
2357 // backtrack), or if the subpattern only contains one alternative. |
|
2358 void opCompileParenthesesSubpattern(PatternTerm* term) |
|
2359 { |
|
2360 YarrOpCode parenthesesBeginOpCode; |
|
2361 YarrOpCode parenthesesEndOpCode; |
|
2362 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin; |
|
2363 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext; |
|
2364 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd; |
|
2365 |
|
2366 // We can currently only compile quantity 1 subpatterns that are |
|
2367 // not copies. We generate a copy in the case of a range quantifier, |
|
2368 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to |
|
2369 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem |
|
2370 // comes where the subpattern is capturing, in which case we would |
|
2371 // need to restore the capture from the first subpattern upon a |
|
2372 // failure in the second. |
|
2373 if (term->quantityCount == 1 && !term->parentheses.isCopy) { |
|
2374 // Select the 'Once' nodes. |
|
2375 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin; |
|
2376 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd; |
|
2377 |
|
2378 // If there is more than one alternative we cannot use the 'simple' nodes. |
|
2379 if (term->parentheses.disjunction->m_alternatives.size() != 1) { |
|
2380 alternativeBeginOpCode = OpNestedAlternativeBegin; |
|
2381 alternativeNextOpCode = OpNestedAlternativeNext; |
|
2382 alternativeEndOpCode = OpNestedAlternativeEnd; |
|
2383 } |
|
2384 } else if (term->parentheses.isTerminal) { |
|
2385 // Terminal groups are optimized on the assumption that matching will never |
|
2386 // backtrack into the terminal group. But this is false if there is more |
|
2387 // than one alternative and one of the alternatives can match empty. In that |
|
2388 // case, the empty match is counted as a failure, so we would need to backtrack. |
|
2389 // The backtracking code doesn't handle this case correctly, so we fall back |
|
2390 // to the interpreter. |
|
2391 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives; |
|
2392 if (alternatives.size() != 1) { |
|
2393 for (unsigned i = 0; i < alternatives.size(); ++i) { |
|
2394 if (alternatives[i]->m_minimumSize == 0) { |
|
2395 m_shouldFallBack = true; |
|
2396 return; |
|
2397 } |
|
2398 } |
|
2399 } |
|
2400 |
|
2401 // Select the 'Terminal' nodes. |
|
2402 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin; |
|
2403 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd; |
|
2404 } else { |
|
2405 // This subpattern is not supported by the JIT. |
|
2406 m_shouldFallBack = true; |
|
2407 return; |
|
2408 } |
|
2409 |
|
2410 size_t parenBegin = m_ops.size(); |
|
2411 m_ops.append(parenthesesBeginOpCode); |
|
2412 |
|
2413 m_ops.append(alternativeBeginOpCode); |
|
2414 m_ops.last().m_previousOp = notFound; |
|
2415 m_ops.last().m_term = term; |
|
2416 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives; |
|
2417 for (unsigned i = 0; i < alternatives.size(); ++i) { |
|
2418 size_t lastOpIndex = m_ops.size() - 1; |
|
2419 |
|
2420 PatternAlternative* nestedAlternative = alternatives[i]; |
|
2421 opCompileAlternative(nestedAlternative); |
|
2422 |
|
2423 size_t thisOpIndex = m_ops.size(); |
|
2424 m_ops.append(YarrOp(alternativeNextOpCode)); |
|
2425 |
|
2426 YarrOp& lastOp = m_ops[lastOpIndex]; |
|
2427 YarrOp& thisOp = m_ops[thisOpIndex]; |
|
2428 |
|
2429 lastOp.m_alternative = nestedAlternative; |
|
2430 lastOp.m_nextOp = thisOpIndex; |
|
2431 thisOp.m_previousOp = lastOpIndex; |
|
2432 thisOp.m_term = term; |
|
2433 } |
|
2434 YarrOp& lastOp = m_ops.last(); |
|
2435 ASSERT(lastOp.m_op == alternativeNextOpCode); |
|
2436 lastOp.m_op = alternativeEndOpCode; |
|
2437 lastOp.m_alternative = 0; |
|
2438 lastOp.m_nextOp = notFound; |
|
2439 |
|
2440 size_t parenEnd = m_ops.size(); |
|
2441 m_ops.append(parenthesesEndOpCode); |
|
2442 |
|
2443 m_ops[parenBegin].m_term = term; |
|
2444 m_ops[parenBegin].m_previousOp = notFound; |
|
2445 m_ops[parenBegin].m_nextOp = parenEnd; |
|
2446 m_ops[parenEnd].m_term = term; |
|
2447 m_ops[parenEnd].m_previousOp = parenBegin; |
|
2448 m_ops[parenEnd].m_nextOp = notFound; |
|
2449 } |
|
2450 |
|
2451 // opCompileParentheticalAssertion |
|
2452 // Emits ops for a parenthetical assertion. These consist of an |
|
2453 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping |
|
2454 // the alternatives, with these wrapped by an outer pair of |
|
2455 // OpParentheticalAssertionBegin/End nodes. |
|
2456 // We can always use the OpSimpleNestedAlternative nodes in the |
|
2457 // case of parenthetical assertions since these only ever match |
|
2458 // once, and will never backtrack back into the assertion. |
|
2459 void opCompileParentheticalAssertion(PatternTerm* term) |
|
2460 { |
|
2461 size_t parenBegin = m_ops.size(); |
|
2462 m_ops.append(OpParentheticalAssertionBegin); |
|
2463 |
|
2464 m_ops.append(OpSimpleNestedAlternativeBegin); |
|
2465 m_ops.last().m_previousOp = notFound; |
|
2466 m_ops.last().m_term = term; |
|
2467 Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives; |
|
2468 for (unsigned i = 0; i < alternatives.size(); ++i) { |
|
2469 size_t lastOpIndex = m_ops.size() - 1; |
|
2470 |
|
2471 PatternAlternative* nestedAlternative = alternatives[i]; |
|
2472 opCompileAlternative(nestedAlternative); |
|
2473 |
|
2474 size_t thisOpIndex = m_ops.size(); |
|
2475 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext)); |
|
2476 |
|
2477 YarrOp& lastOp = m_ops[lastOpIndex]; |
|
2478 YarrOp& thisOp = m_ops[thisOpIndex]; |
|
2479 |
|
2480 lastOp.m_alternative = nestedAlternative; |
|
2481 lastOp.m_nextOp = thisOpIndex; |
|
2482 thisOp.m_previousOp = lastOpIndex; |
|
2483 thisOp.m_term = term; |
|
2484 } |
|
2485 YarrOp& lastOp = m_ops.last(); |
|
2486 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext); |
|
2487 lastOp.m_op = OpSimpleNestedAlternativeEnd; |
|
2488 lastOp.m_alternative = 0; |
|
2489 lastOp.m_nextOp = notFound; |
|
2490 |
|
2491 size_t parenEnd = m_ops.size(); |
|
2492 m_ops.append(OpParentheticalAssertionEnd); |
|
2493 |
|
2494 m_ops[parenBegin].m_term = term; |
|
2495 m_ops[parenBegin].m_previousOp = notFound; |
|
2496 m_ops[parenBegin].m_nextOp = parenEnd; |
|
2497 m_ops[parenEnd].m_term = term; |
|
2498 m_ops[parenEnd].m_previousOp = parenBegin; |
|
2499 m_ops[parenEnd].m_nextOp = notFound; |
|
2500 } |
|
2501 |
|
2502 // opCompileAlternative |
|
2503 // Called to emit nodes for all terms in an alternative. |
|
2504 void opCompileAlternative(PatternAlternative* alternative) |
|
2505 { |
|
2506 optimizeAlternative(alternative); |
|
2507 |
|
2508 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { |
|
2509 PatternTerm* term = &alternative->m_terms[i]; |
|
2510 |
|
2511 switch (term->type) { |
|
2512 case PatternTerm::TypeParenthesesSubpattern: |
|
2513 opCompileParenthesesSubpattern(term); |
|
2514 break; |
|
2515 |
|
2516 case PatternTerm::TypeParentheticalAssertion: |
|
2517 opCompileParentheticalAssertion(term); |
|
2518 break; |
|
2519 |
|
2520 default: |
|
2521 m_ops.append(term); |
|
2522 } |
|
2523 } |
|
2524 } |
|
2525 |
|
2526 // opCompileBody |
|
2527 // This method compiles the body disjunction of the regular expression. |
|
2528 // The body consists of two sets of alternatives - zero or more 'once |
|
2529 // through' (BOL anchored) alternatives, followed by zero or more |
|
2530 // repeated alternatives. |
|
2531 // For each of these two sets of alteratives, if not empty they will be |
|
2532 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the |
|
2533 // 'begin' node referencing the first alternative, and 'next' nodes |
|
2534 // referencing any further alternatives. The begin/next/end nodes are |
|
2535 // linked together in a doubly linked list. In the case of repeating |
|
2536 // alternatives, the end node is also linked back to the beginning. |
|
2537 // If no repeating alternatives exist, then a OpMatchFailed node exists |
|
2538 // to return the failing result. |
|
2539 void opCompileBody(PatternDisjunction* disjunction) |
|
2540 { |
|
2541 Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives; |
|
2542 size_t currentAlternativeIndex = 0; |
|
2543 |
|
2544 // Emit the 'once through' alternatives. |
|
2545 if (alternatives.size() && alternatives[0]->onceThrough()) { |
|
2546 m_ops.append(YarrOp(OpBodyAlternativeBegin)); |
|
2547 m_ops.last().m_previousOp = notFound; |
|
2548 |
|
2549 do { |
|
2550 size_t lastOpIndex = m_ops.size() - 1; |
|
2551 PatternAlternative* alternative = alternatives[currentAlternativeIndex]; |
|
2552 opCompileAlternative(alternative); |
|
2553 |
|
2554 size_t thisOpIndex = m_ops.size(); |
|
2555 m_ops.append(YarrOp(OpBodyAlternativeNext)); |
|
2556 |
|
2557 YarrOp& lastOp = m_ops[lastOpIndex]; |
|
2558 YarrOp& thisOp = m_ops[thisOpIndex]; |
|
2559 |
|
2560 lastOp.m_alternative = alternative; |
|
2561 lastOp.m_nextOp = thisOpIndex; |
|
2562 thisOp.m_previousOp = lastOpIndex; |
|
2563 |
|
2564 ++currentAlternativeIndex; |
|
2565 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough()); |
|
2566 |
|
2567 YarrOp& lastOp = m_ops.last(); |
|
2568 |
|
2569 ASSERT(lastOp.m_op == OpBodyAlternativeNext); |
|
2570 lastOp.m_op = OpBodyAlternativeEnd; |
|
2571 lastOp.m_alternative = 0; |
|
2572 lastOp.m_nextOp = notFound; |
|
2573 } |
|
2574 |
|
2575 if (currentAlternativeIndex == alternatives.size()) { |
|
2576 m_ops.append(YarrOp(OpMatchFailed)); |
|
2577 return; |
|
2578 } |
|
2579 |
|
2580 // Emit the repeated alternatives. |
|
2581 size_t repeatLoop = m_ops.size(); |
|
2582 m_ops.append(YarrOp(OpBodyAlternativeBegin)); |
|
2583 m_ops.last().m_previousOp = notFound; |
|
2584 do { |
|
2585 size_t lastOpIndex = m_ops.size() - 1; |
|
2586 PatternAlternative* alternative = alternatives[currentAlternativeIndex]; |
|
2587 ASSERT(!alternative->onceThrough()); |
|
2588 opCompileAlternative(alternative); |
|
2589 |
|
2590 size_t thisOpIndex = m_ops.size(); |
|
2591 m_ops.append(YarrOp(OpBodyAlternativeNext)); |
|
2592 |
|
2593 YarrOp& lastOp = m_ops[lastOpIndex]; |
|
2594 YarrOp& thisOp = m_ops[thisOpIndex]; |
|
2595 |
|
2596 lastOp.m_alternative = alternative; |
|
2597 lastOp.m_nextOp = thisOpIndex; |
|
2598 thisOp.m_previousOp = lastOpIndex; |
|
2599 |
|
2600 ++currentAlternativeIndex; |
|
2601 } while (currentAlternativeIndex < alternatives.size()); |
|
2602 YarrOp& lastOp = m_ops.last(); |
|
2603 ASSERT(lastOp.m_op == OpBodyAlternativeNext); |
|
2604 lastOp.m_op = OpBodyAlternativeEnd; |
|
2605 lastOp.m_alternative = 0; |
|
2606 lastOp.m_nextOp = repeatLoop; |
|
2607 } |
|
2608 |
|
2609 void generateEnter() |
|
2610 { |
|
2611 #if WTF_CPU_X86_64 |
|
2612 push(X86Registers::ebp); |
|
2613 move(stackPointerRegister, X86Registers::ebp); |
|
2614 push(X86Registers::ebx); |
|
2615 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves. |
|
2616 zeroExtend32ToPtr(index, index); |
|
2617 zeroExtend32ToPtr(length, length); |
|
2618 #elif WTF_CPU_X86 |
|
2619 push(X86Registers::ebp); |
|
2620 move(stackPointerRegister, X86Registers::ebp); |
|
2621 // TODO: do we need spill registers to fill the output pointer if there are no sub captures? |
|
2622 push(X86Registers::ebx); |
|
2623 push(X86Registers::edi); |
|
2624 push(X86Registers::esi); |
|
2625 // load output into edi (2 = saved ebp + return address). |
|
2626 # if WTF_COMPILER_MSVC || WTF_COMPILER_SUNCC |
|
2627 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input); |
|
2628 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index); |
|
2629 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length); |
|
2630 if (compileMode == IncludeSubpatterns) |
|
2631 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output); |
|
2632 # else |
|
2633 if (compileMode == IncludeSubpatterns) |
|
2634 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output); |
|
2635 # endif |
|
2636 #elif WTF_CPU_ARM |
|
2637 push(ARMRegisters::r4); |
|
2638 push(ARMRegisters::r5); |
|
2639 push(ARMRegisters::r6); |
|
2640 # if WTF_CPU_ARM_TRADITIONAL |
|
2641 push(ARMRegisters::r8); // scratch register |
|
2642 # endif |
|
2643 if (compileMode == IncludeSubpatterns) |
|
2644 move(ARMRegisters::r3, output); |
|
2645 #elif WTF_CPU_SH4 |
|
2646 push(SH4Registers::r11); |
|
2647 push(SH4Registers::r13); |
|
2648 #elif WTF_CPU_SPARC |
|
2649 save(Imm32(-m_pattern.m_body->m_callFrameSize * sizeof(void*))); |
|
2650 #elif WTF_CPU_MIPS |
|
2651 // Do nothing. |
|
2652 #endif |
|
2653 } |
|
2654 |
|
2655 void generateReturn() |
|
2656 { |
|
2657 #if WTF_CPU_X86_64 |
|
2658 pop(X86Registers::ebx); |
|
2659 pop(X86Registers::ebp); |
|
2660 #elif WTF_CPU_X86 |
|
2661 pop(X86Registers::esi); |
|
2662 pop(X86Registers::edi); |
|
2663 pop(X86Registers::ebx); |
|
2664 pop(X86Registers::ebp); |
|
2665 #elif WTF_CPU_ARM |
|
2666 # if WTF_CPU_ARM_TRADITIONAL |
|
2667 pop(ARMRegisters::r8); // scratch register |
|
2668 # endif |
|
2669 pop(ARMRegisters::r6); |
|
2670 pop(ARMRegisters::r5); |
|
2671 pop(ARMRegisters::r4); |
|
2672 #elif WTF_CPU_SH4 |
|
2673 pop(SH4Registers::r13); |
|
2674 pop(SH4Registers::r11); |
|
2675 #elif WTF_CPU_SPARC |
|
2676 ret_and_restore(); |
|
2677 return; |
|
2678 #elif WTF_CPU_MIPS |
|
2679 // Do nothing |
|
2680 #endif |
|
2681 ret(); |
|
2682 } |
|
2683 |
|
2684 public: |
|
2685 YarrGenerator(YarrPattern& pattern, YarrCharSize charSize) |
|
2686 : m_pattern(pattern) |
|
2687 , m_charSize(charSize) |
|
2688 , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo) |
|
2689 , m_shouldFallBack(false) |
|
2690 , m_checked(0) |
|
2691 { |
|
2692 } |
|
2693 |
|
2694 void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject) |
|
2695 { |
|
2696 generateEnter(); |
|
2697 |
|
2698 Jump hasInput = checkInput(); |
|
2699 #if WTF_CPU_X86_64 |
|
2700 move(TrustedImm32(int(WTF::notFound)), returnRegister); |
|
2701 #else |
|
2702 move(TrustedImmPtr((void*)WTF::notFound), returnRegister); |
|
2703 move(TrustedImm32(0), returnRegister2); |
|
2704 #endif |
|
2705 generateReturn(); |
|
2706 hasInput.link(this); |
|
2707 |
|
2708 if (compileMode == IncludeSubpatterns) { |
|
2709 for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i) |
|
2710 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int))); |
|
2711 } |
|
2712 |
|
2713 if (!m_pattern.m_body->m_hasFixedSize) |
|
2714 setMatchStart(index); |
|
2715 |
|
2716 initCallFrame(); |
|
2717 |
|
2718 // Compile the pattern to the internal 'YarrOp' representation. |
|
2719 opCompileBody(m_pattern.m_body); |
|
2720 |
|
2721 // If we encountered anything we can't handle in the JIT code |
|
2722 // (e.g. backreferences) then return early. |
|
2723 if (m_shouldFallBack) { |
|
2724 jitObject.setFallBack(true); |
|
2725 return; |
|
2726 } |
|
2727 |
|
2728 if (!generate() || !backtrack()) { |
|
2729 jitObject.setFallBack(true); |
|
2730 return; |
|
2731 } |
|
2732 |
|
2733 // Link & finalize the code. |
|
2734 ExecutablePool *pool; |
|
2735 bool ok; |
|
2736 LinkBuffer linkBuffer(this, globalData->regexAllocator, &pool, &ok, REGEXP_CODE); |
|
2737 |
|
2738 // Attempt to detect OOM during linkBuffer creation. |
|
2739 if (linkBuffer.unsafeCode() == nullptr) { |
|
2740 jitObject.setFallBack(true); |
|
2741 return; |
|
2742 } |
|
2743 |
|
2744 m_backtrackingState.linkDataLabels(linkBuffer); |
|
2745 |
|
2746 if (compileMode == MatchOnly) { |
|
2747 #if YARR_8BIT_CHAR_SUPPORT |
|
2748 if (m_charSize == Char8) |
|
2749 jitObject.set8BitCodeMatchOnly(linkBuffer.finalizeCode()); |
|
2750 else |
|
2751 #endif |
|
2752 jitObject.set16BitCodeMatchOnly(linkBuffer.finalizeCode()); |
|
2753 } else { |
|
2754 #if YARR_8BIT_CHAR_SUPPORT |
|
2755 if (m_charSize == Char8) |
|
2756 jitObject.set8BitCode(linkBuffer.finalizeCode()); |
|
2757 else |
|
2758 #endif |
|
2759 jitObject.set16BitCode(linkBuffer.finalizeCode()); |
|
2760 } |
|
2761 jitObject.setFallBack(m_shouldFallBack); |
|
2762 } |
|
2763 |
|
2764 private: |
|
2765 YarrPattern& m_pattern; |
|
2766 |
|
2767 YarrCharSize m_charSize; |
|
2768 |
|
2769 Scale m_charScale; |
|
2770 |
|
2771 // Used to detect regular expression constructs that are not currently |
|
2772 // supported in the JIT; fall back to the interpreter when this is detected. |
|
2773 bool m_shouldFallBack; |
|
2774 |
|
2775 // The regular expression expressed as a linear sequence of operations. |
|
2776 Vector<YarrOp, 128> m_ops; |
|
2777 |
|
2778 // This records the current input offset being applied due to the current |
|
2779 // set of alternatives we are nested within. E.g. when matching the |
|
2780 // character 'b' within the regular expression /abc/, we will know that |
|
2781 // the minimum size for the alternative is 3, checked upon entry to the |
|
2782 // alternative, and that 'b' is at offset 1 from the start, and as such |
|
2783 // when matching 'b' we need to apply an offset of -2 to the load. |
|
2784 // |
|
2785 // FIXME: This should go away. Rather than tracking this value throughout |
|
2786 // code generation, we should gather this information up front & store it |
|
2787 // on the YarrOp structure. |
|
2788 int m_checked; |
|
2789 |
|
2790 // This class records state whilst generating the backtracking path of code. |
|
2791 BacktrackingState m_backtrackingState; |
|
2792 }; |
|
2793 |
|
2794 void jitCompile(YarrPattern& pattern, YarrCharSize charSize, JSGlobalData* globalData, YarrCodeBlock& jitObject, YarrJITCompileMode mode) |
|
2795 { |
|
2796 if (mode == MatchOnly) |
|
2797 YarrGenerator<MatchOnly>(pattern, charSize).compile(globalData, jitObject); |
|
2798 else |
|
2799 YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(globalData, jitObject); |
|
2800 } |
|
2801 |
|
2802 }} |
|
2803 |
|
2804 #endif |