Wed, 31 Dec 2014 07:53:36 +0100
Correct small whitespace inconsistency, lost while renaming variables.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 *
4 * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
28 #ifndef yarr_YarrInterpreter_h
29 #define yarr_YarrInterpreter_h
31 #include "yarr/YarrPattern.h"
33 namespace WTF {
34 class BumpPointerAllocator;
35 }
36 using WTF::BumpPointerAllocator;
38 namespace JSC { namespace Yarr {
40 class ByteDisjunction;
42 struct ByteTerm {
43 enum Type {
44 TypeBodyAlternativeBegin,
45 TypeBodyAlternativeDisjunction,
46 TypeBodyAlternativeEnd,
47 TypeAlternativeBegin,
48 TypeAlternativeDisjunction,
49 TypeAlternativeEnd,
50 TypeSubpatternBegin,
51 TypeSubpatternEnd,
52 TypeAssertionBOL,
53 TypeAssertionEOL,
54 TypeAssertionWordBoundary,
55 TypePatternCharacterOnce,
56 TypePatternCharacterFixed,
57 TypePatternCharacterGreedy,
58 TypePatternCharacterNonGreedy,
59 TypePatternCasedCharacterOnce,
60 TypePatternCasedCharacterFixed,
61 TypePatternCasedCharacterGreedy,
62 TypePatternCasedCharacterNonGreedy,
63 TypeCharacterClass,
64 TypeBackReference,
65 TypeParenthesesSubpattern,
66 TypeParenthesesSubpatternOnceBegin,
67 TypeParenthesesSubpatternOnceEnd,
68 TypeParenthesesSubpatternTerminalBegin,
69 TypeParenthesesSubpatternTerminalEnd,
70 TypeParentheticalAssertionBegin,
71 TypeParentheticalAssertionEnd,
72 TypeCheckInput,
73 TypeUncheckInput,
74 TypeDotStarEnclosure
75 } type;
76 union {
77 struct {
78 union {
79 UChar patternCharacter;
80 struct {
81 UChar lo;
82 UChar hi;
83 } casedCharacter;
84 CharacterClass* characterClass;
85 unsigned subpatternId;
86 };
87 union {
88 ByteDisjunction* parenthesesDisjunction;
89 unsigned parenthesesWidth;
90 };
91 QuantifierType quantityType;
92 unsigned quantityCount;
93 } atom;
94 struct {
95 int next;
96 int end;
97 bool onceThrough;
98 } alternative;
99 struct {
100 bool m_bol : 1;
101 bool m_eol : 1;
102 } anchors;
103 unsigned checkInputCount;
104 };
105 unsigned frameLocation;
106 bool m_capture : 1;
107 bool m_invert : 1;
108 unsigned inputPosition;
110 ByteTerm(UChar ch, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
111 : frameLocation(frameLocation)
112 , m_capture(false)
113 , m_invert(false)
114 {
115 switch (quantityType) {
116 case QuantifierFixedCount:
117 type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
118 break;
119 case QuantifierGreedy:
120 type = ByteTerm::TypePatternCharacterGreedy;
121 break;
122 case QuantifierNonGreedy:
123 type = ByteTerm::TypePatternCharacterNonGreedy;
124 break;
125 }
127 atom.patternCharacter = ch;
128 atom.quantityType = quantityType;
129 atom.quantityCount = quantityCount.unsafeGet();
130 inputPosition = inputPos;
131 }
133 ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, Checked<unsigned> quantityCount, QuantifierType quantityType)
134 : frameLocation(frameLocation)
135 , m_capture(false)
136 , m_invert(false)
137 {
138 switch (quantityType) {
139 case QuantifierFixedCount:
140 type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
141 break;
142 case QuantifierGreedy:
143 type = ByteTerm::TypePatternCasedCharacterGreedy;
144 break;
145 case QuantifierNonGreedy:
146 type = ByteTerm::TypePatternCasedCharacterNonGreedy;
147 break;
148 }
150 atom.casedCharacter.lo = lo;
151 atom.casedCharacter.hi = hi;
152 atom.quantityType = quantityType;
153 atom.quantityCount = quantityCount.unsafeGet();
154 inputPosition = inputPos;
155 }
157 ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
158 : type(ByteTerm::TypeCharacterClass)
159 , m_capture(false)
160 , m_invert(invert)
161 {
162 atom.characterClass = characterClass;
163 atom.quantityType = QuantifierFixedCount;
164 atom.quantityCount = 1;
165 inputPosition = inputPos;
166 }
168 ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool capture, int inputPos)
169 : type(type)
170 , m_capture(capture)
171 , m_invert(false)
172 {
173 atom.subpatternId = subpatternId;
174 atom.parenthesesDisjunction = parenthesesInfo;
175 atom.quantityType = QuantifierFixedCount;
176 atom.quantityCount = 1;
177 inputPosition = inputPos;
178 }
180 ByteTerm(Type type, bool invert = false)
181 : type(type)
182 , m_capture(false)
183 , m_invert(invert)
184 {
185 atom.quantityType = QuantifierFixedCount;
186 atom.quantityCount = 1;
187 }
189 ByteTerm(Type type, unsigned subpatternId, bool capture, bool invert, int inputPos)
190 : type(type)
191 , m_capture(capture)
192 , m_invert(invert)
193 {
194 atom.subpatternId = subpatternId;
195 atom.quantityType = QuantifierFixedCount;
196 atom.quantityCount = 1;
197 inputPosition = inputPos;
198 }
200 // For js::Vector. Does not create a valid object.
201 ByteTerm()
202 {
203 }
205 static ByteTerm BOL(int inputPos)
206 {
207 ByteTerm term(TypeAssertionBOL);
208 term.inputPosition = inputPos;
209 return term;
210 }
212 static ByteTerm CheckInput(Checked<unsigned> count)
213 {
214 ByteTerm term(TypeCheckInput);
215 term.checkInputCount = count.unsafeGet();
216 return term;
217 }
219 static ByteTerm UncheckInput(Checked<unsigned> count)
220 {
221 ByteTerm term(TypeUncheckInput);
222 term.checkInputCount = count.unsafeGet();
223 return term;
224 }
226 static ByteTerm EOL(int inputPos)
227 {
228 ByteTerm term(TypeAssertionEOL);
229 term.inputPosition = inputPos;
230 return term;
231 }
233 static ByteTerm WordBoundary(bool invert, int inputPos)
234 {
235 ByteTerm term(TypeAssertionWordBoundary, invert);
236 term.inputPosition = inputPos;
237 return term;
238 }
240 static ByteTerm BackReference(unsigned subpatternId, int inputPos)
241 {
242 return ByteTerm(TypeBackReference, subpatternId, false, false, inputPos);
243 }
245 static ByteTerm BodyAlternativeBegin(bool onceThrough)
246 {
247 ByteTerm term(TypeBodyAlternativeBegin);
248 term.alternative.next = 0;
249 term.alternative.end = 0;
250 term.alternative.onceThrough = onceThrough;
251 return term;
252 }
254 static ByteTerm BodyAlternativeDisjunction(bool onceThrough)
255 {
256 ByteTerm term(TypeBodyAlternativeDisjunction);
257 term.alternative.next = 0;
258 term.alternative.end = 0;
259 term.alternative.onceThrough = onceThrough;
260 return term;
261 }
263 static ByteTerm BodyAlternativeEnd()
264 {
265 ByteTerm term(TypeBodyAlternativeEnd);
266 term.alternative.next = 0;
267 term.alternative.end = 0;
268 term.alternative.onceThrough = false;
269 return term;
270 }
272 static ByteTerm AlternativeBegin()
273 {
274 ByteTerm term(TypeAlternativeBegin);
275 term.alternative.next = 0;
276 term.alternative.end = 0;
277 term.alternative.onceThrough = false;
278 return term;
279 }
281 static ByteTerm AlternativeDisjunction()
282 {
283 ByteTerm term(TypeAlternativeDisjunction);
284 term.alternative.next = 0;
285 term.alternative.end = 0;
286 term.alternative.onceThrough = false;
287 return term;
288 }
290 static ByteTerm AlternativeEnd()
291 {
292 ByteTerm term(TypeAlternativeEnd);
293 term.alternative.next = 0;
294 term.alternative.end = 0;
295 term.alternative.onceThrough = false;
296 return term;
297 }
299 static ByteTerm SubpatternBegin()
300 {
301 return ByteTerm(TypeSubpatternBegin);
302 }
304 static ByteTerm SubpatternEnd()
305 {
306 return ByteTerm(TypeSubpatternEnd);
307 }
309 static ByteTerm DotStarEnclosure(bool bolAnchor, bool eolAnchor)
310 {
311 ByteTerm term(TypeDotStarEnclosure);
312 term.anchors.m_bol = bolAnchor;
313 term.anchors.m_eol = eolAnchor;
314 return term;
315 }
317 bool invert()
318 {
319 return m_invert;
320 }
322 bool capture()
323 {
324 return m_capture;
325 }
326 };
328 class ByteDisjunction {
329 WTF_MAKE_FAST_ALLOCATED;
330 public:
331 ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
332 : m_numSubpatterns(numSubpatterns)
333 , m_frameSize(frameSize)
334 {
335 }
337 Vector<ByteTerm> terms;
338 unsigned m_numSubpatterns;
339 unsigned m_frameSize;
340 };
342 struct BytecodePattern {
343 WTF_MAKE_FAST_ALLOCATED;
344 public:
345 BytecodePattern(PassOwnPtr<ByteDisjunction> body, Vector<ByteDisjunction*> &allParenthesesInfo, YarrPattern& pattern, BumpPointerAllocator* allocator)
346 : m_body(body)
347 , m_ignoreCase(pattern.m_ignoreCase)
348 , m_multiline(pattern.m_multiline)
349 , m_allocator(allocator)
350 {
351 newlineCharacterClass = pattern.newlineCharacterClass();
352 wordcharCharacterClass = pattern.wordcharCharacterClass();
354 // Trick: 'Steal' the YarrPattern's ParenthesesInfo!
355 // The input vector isn't used afterwards anymore,
356 // that way we don't have to copy the input.
357 JS_ASSERT(m_allParenthesesInfo.size() == 0);
358 m_allParenthesesInfo.swap(allParenthesesInfo);
360 // Trick: 'Steal' the YarrPattern's CharacterClasses!
361 // The input vector isn't used afterwards anymore,
362 // that way we don't have to copy the input.
363 JS_ASSERT(m_userCharacterClasses.size() == 0);
364 m_userCharacterClasses.swap(pattern.m_userCharacterClasses);
365 }
367 ~BytecodePattern()
368 {
369 deleteAllValues(m_allParenthesesInfo);
370 deleteAllValues(m_userCharacterClasses);
371 }
373 OwnPtr<ByteDisjunction> m_body;
374 bool m_ignoreCase;
375 bool m_multiline;
376 // Each BytecodePattern is associated with a RegExp, each RegExp is associated
377 // with a JSGlobalData. Cache a pointer to out JSGlobalData's m_regExpAllocator.
378 BumpPointerAllocator* m_allocator;
380 CharacterClass* newlineCharacterClass;
381 CharacterClass* wordcharCharacterClass;
383 private:
384 Vector<ByteDisjunction*> m_allParenthesesInfo;
385 Vector<CharacterClass*> m_userCharacterClasses;
386 };
388 JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
389 JS_EXPORT_PRIVATE unsigned interpret(JSContext *cx, BytecodePattern*, const String& input, unsigned start, unsigned* output);
390 unsigned interpret(JSContext *cx, BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
391 unsigned interpret(JSContext *cx, BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
393 } } // namespace JSC::Yarr
395 #endif /* yarr_YarrInterpreter_h */