|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * vim: set ts=8 sts=4 et sw=4 tw=99: |
|
3 * This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #ifndef vm_RegExpObject_h |
|
8 #define vm_RegExpObject_h |
|
9 |
|
10 #include "mozilla/Attributes.h" |
|
11 #include "mozilla/MemoryReporting.h" |
|
12 |
|
13 #include "jscntxt.h" |
|
14 #include "jsproxy.h" |
|
15 |
|
16 #include "gc/Marking.h" |
|
17 #include "gc/Zone.h" |
|
18 #include "vm/Shape.h" |
|
19 #if ENABLE_YARR_JIT |
|
20 #include "yarr/YarrJIT.h" |
|
21 #else |
|
22 #include "yarr/YarrInterpreter.h" |
|
23 #endif |
|
24 |
|
25 /* |
|
26 * JavaScript Regular Expressions |
|
27 * |
|
28 * There are several engine concepts associated with a single logical regexp: |
|
29 * |
|
30 * RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp" |
|
31 * |
|
32 * RegExpShared - The compiled representation of the regexp. |
|
33 * |
|
34 * RegExpCompartment - Owns all RegExpShared instances in a compartment. |
|
35 * |
|
36 * To save memory, a RegExpShared is not created for a RegExpObject until it is |
|
37 * needed for execution. When a RegExpShared needs to be created, it is looked |
|
38 * up in a per-compartment table to allow reuse between objects. Lastly, on |
|
39 * GC, every RegExpShared (that is not active on the callstack) is discarded. |
|
40 * Because of the last point, any code using a RegExpShared (viz., by executing |
|
41 * a regexp) must indicate the RegExpShared is active via RegExpGuard. |
|
42 */ |
|
43 namespace js { |
|
44 |
|
45 class MatchConduit; |
|
46 class MatchPair; |
|
47 class MatchPairs; |
|
48 class RegExpShared; |
|
49 |
|
50 namespace frontend { class TokenStream; } |
|
51 |
|
52 enum RegExpFlag |
|
53 { |
|
54 IgnoreCaseFlag = 0x01, |
|
55 GlobalFlag = 0x02, |
|
56 MultilineFlag = 0x04, |
|
57 StickyFlag = 0x08, |
|
58 |
|
59 NoFlags = 0x00, |
|
60 AllFlags = 0x0f |
|
61 }; |
|
62 |
|
63 enum RegExpRunStatus |
|
64 { |
|
65 RegExpRunStatus_Error, |
|
66 RegExpRunStatus_Success, |
|
67 RegExpRunStatus_Success_NotFound |
|
68 }; |
|
69 |
|
70 class RegExpObjectBuilder |
|
71 { |
|
72 ExclusiveContext *cx; |
|
73 Rooted<RegExpObject*> reobj_; |
|
74 |
|
75 bool getOrCreate(); |
|
76 bool getOrCreateClone(HandleTypeObject type); |
|
77 |
|
78 public: |
|
79 RegExpObjectBuilder(ExclusiveContext *cx, RegExpObject *reobj = nullptr); |
|
80 |
|
81 RegExpObject *reobj() { return reobj_; } |
|
82 |
|
83 RegExpObject *build(HandleAtom source, RegExpFlag flags); |
|
84 RegExpObject *build(HandleAtom source, RegExpShared &shared); |
|
85 |
|
86 /* Perform a VM-internal clone. */ |
|
87 RegExpObject *clone(Handle<RegExpObject*> other); |
|
88 }; |
|
89 |
|
90 JSObject * |
|
91 CloneRegExpObject(JSContext *cx, JSObject *obj); |
|
92 |
|
93 /* |
|
94 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is |
|
95 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may |
|
96 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a |
|
97 * cache so that they can be reused when compiling the same regex string. |
|
98 * |
|
99 * During a GC, the trace hook for RegExpObject clears any pointers to |
|
100 * RegExpShareds so that there will be no dangling pointers when they are |
|
101 * deleted. However, some RegExpShareds are not deleted: |
|
102 * |
|
103 * 1. Any RegExpShared with pointers from the C++ stack is not deleted. |
|
104 * 2. Any RegExpShared which has been embedded into jitcode is not deleted. |
|
105 * This rarely comes into play, as jitcode is usually purged before the |
|
106 * RegExpShared are sweeped. |
|
107 * 3. Any RegExpShared that was installed in a RegExpObject during an |
|
108 * incremental GC is not deleted. This is because the RegExpObject may have |
|
109 * been traced through before the new RegExpShared was installed, in which |
|
110 * case deleting the RegExpShared would turn the RegExpObject's reference |
|
111 * into a dangling pointer |
|
112 * |
|
113 * The activeUseCount and gcNumberWhenUsed fields are used to track these |
|
114 * conditions. |
|
115 * |
|
116 * There are two tables used to track RegExpShareds. map_ implements the cache |
|
117 * and is cleared on every GC. inUse_ logically owns all RegExpShareds in the |
|
118 * compartment and attempts to delete all RegExpShareds that aren't kept alive |
|
119 * by the above conditions on every GC sweep phase. It is necessary to use two |
|
120 * separate tables since map_ *must* be fully cleared on each GC since the Key |
|
121 * points to a JSAtom that can become garbage. |
|
122 */ |
|
123 class RegExpShared |
|
124 { |
|
125 friend class RegExpCompartment; |
|
126 friend class RegExpStatics; |
|
127 friend class RegExpGuard; |
|
128 |
|
129 typedef frontend::TokenStream TokenStream; |
|
130 typedef JSC::Yarr::BytecodePattern BytecodePattern; |
|
131 typedef JSC::Yarr::ErrorCode ErrorCode; |
|
132 typedef JSC::Yarr::YarrPattern YarrPattern; |
|
133 #if ENABLE_YARR_JIT |
|
134 typedef JSC::Yarr::JSGlobalData JSGlobalData; |
|
135 typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock; |
|
136 typedef JSC::Yarr::YarrJITCompileMode YarrJITCompileMode; |
|
137 #endif |
|
138 |
|
139 /* |
|
140 * Source to the RegExp, for lazy compilation. |
|
141 * The source must be rooted while activeUseCount is non-zero |
|
142 * via RegExpGuard or explicit calls to trace(). |
|
143 */ |
|
144 JSAtom * source; |
|
145 |
|
146 RegExpFlag flags; |
|
147 unsigned parenCount; |
|
148 |
|
149 #if ENABLE_YARR_JIT |
|
150 /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */ |
|
151 YarrCodeBlock codeBlock; |
|
152 #endif |
|
153 BytecodePattern *bytecode; |
|
154 |
|
155 /* Lifetime-preserving variables: see class-level comment above. */ |
|
156 size_t activeUseCount; |
|
157 uint64_t gcNumberWhenUsed; |
|
158 |
|
159 /* Internal functions. */ |
|
160 bool compile(JSContext *cx, bool matchOnly); |
|
161 bool compile(JSContext *cx, JSLinearString &pattern, bool matchOnly); |
|
162 |
|
163 bool compileIfNecessary(JSContext *cx); |
|
164 bool compileMatchOnlyIfNecessary(JSContext *cx); |
|
165 |
|
166 public: |
|
167 RegExpShared(JSAtom *source, RegExpFlag flags, uint64_t gcNumber); |
|
168 ~RegExpShared(); |
|
169 |
|
170 /* Explicit trace function for use by the RegExpStatics and JITs. */ |
|
171 void trace(JSTracer *trc) { |
|
172 MarkStringUnbarriered(trc, &source, "regexpshared source"); |
|
173 } |
|
174 |
|
175 /* Static functions to expose some Yarr logic. */ |
|
176 |
|
177 // This function should be deleted once bad Android platforms phase out. See bug 604774. |
|
178 static bool isJITRuntimeEnabled(JSContext *cx) { |
|
179 #if ENABLE_YARR_JIT |
|
180 # if defined(ANDROID) |
|
181 return !cx->jitIsBroken; |
|
182 # else |
|
183 return true; |
|
184 # endif |
|
185 #else |
|
186 return false; |
|
187 #endif |
|
188 } |
|
189 static void reportYarrError(ExclusiveContext *cx, TokenStream *ts, ErrorCode error); |
|
190 static bool checkSyntax(ExclusiveContext *cx, TokenStream *tokenStream, JSLinearString *source); |
|
191 |
|
192 /* Called when a RegExpShared is installed into a RegExpObject. */ |
|
193 void prepareForUse(ExclusiveContext *cx) { |
|
194 gcNumberWhenUsed = cx->zone()->gcNumber(); |
|
195 } |
|
196 |
|
197 /* Primary interface: run this regular expression on the given string. */ |
|
198 RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length, |
|
199 size_t *lastIndex, MatchPairs &matches); |
|
200 |
|
201 /* Run the regular expression without collecting matches, for test(). */ |
|
202 RegExpRunStatus executeMatchOnly(JSContext *cx, const jschar *chars, size_t length, |
|
203 size_t *lastIndex, MatchPair &match); |
|
204 |
|
205 /* Accessors */ |
|
206 |
|
207 size_t getParenCount() const { JS_ASSERT(isCompiled()); return parenCount; } |
|
208 void incRef() { activeUseCount++; } |
|
209 void decRef() { JS_ASSERT(activeUseCount > 0); activeUseCount--; } |
|
210 |
|
211 /* Accounts for the "0" (whole match) pair. */ |
|
212 size_t pairCount() const { return getParenCount() + 1; } |
|
213 |
|
214 RegExpFlag getFlags() const { return flags; } |
|
215 bool ignoreCase() const { return flags & IgnoreCaseFlag; } |
|
216 bool global() const { return flags & GlobalFlag; } |
|
217 bool multiline() const { return flags & MultilineFlag; } |
|
218 bool sticky() const { return flags & StickyFlag; } |
|
219 |
|
220 #ifdef ENABLE_YARR_JIT |
|
221 bool hasCode() const { return codeBlock.has16BitCode(); } |
|
222 bool hasMatchOnlyCode() const { return codeBlock.has16BitCodeMatchOnly(); } |
|
223 #else |
|
224 bool hasCode() const { return false; } |
|
225 bool hasMatchOnlyCode() const { return false; } |
|
226 #endif |
|
227 bool hasBytecode() const { return bytecode != nullptr; } |
|
228 bool isCompiled() const { return hasBytecode() || hasCode() || hasMatchOnlyCode(); } |
|
229 }; |
|
230 |
|
231 /* |
|
232 * Extend the lifetime of a given RegExpShared to at least the lifetime of |
|
233 * the guard object. See Regular Expression comment at the top. |
|
234 */ |
|
235 class RegExpGuard |
|
236 { |
|
237 RegExpShared *re_; |
|
238 |
|
239 /* |
|
240 * Prevent the RegExp source from being collected: |
|
241 * because RegExpShared objects compile at execution time, the source |
|
242 * must remain rooted for the active lifetime of the RegExpShared. |
|
243 */ |
|
244 RootedAtom source_; |
|
245 |
|
246 RegExpGuard(const RegExpGuard &) MOZ_DELETE; |
|
247 void operator=(const RegExpGuard &) MOZ_DELETE; |
|
248 |
|
249 public: |
|
250 RegExpGuard(ExclusiveContext *cx) |
|
251 : re_(nullptr), source_(cx) |
|
252 {} |
|
253 |
|
254 RegExpGuard(ExclusiveContext *cx, RegExpShared &re) |
|
255 : re_(&re), source_(cx, re.source) |
|
256 { |
|
257 re_->incRef(); |
|
258 } |
|
259 |
|
260 ~RegExpGuard() { |
|
261 release(); |
|
262 } |
|
263 |
|
264 public: |
|
265 void init(RegExpShared &re) { |
|
266 JS_ASSERT(!initialized()); |
|
267 re_ = &re; |
|
268 re_->incRef(); |
|
269 source_ = re_->source; |
|
270 } |
|
271 |
|
272 void release() { |
|
273 if (re_) { |
|
274 re_->decRef(); |
|
275 re_ = nullptr; |
|
276 source_ = nullptr; |
|
277 } |
|
278 } |
|
279 |
|
280 bool initialized() const { return !!re_; } |
|
281 RegExpShared *re() const { JS_ASSERT(initialized()); return re_; } |
|
282 RegExpShared *operator->() { return re(); } |
|
283 RegExpShared &operator*() { return *re(); } |
|
284 }; |
|
285 |
|
286 class RegExpCompartment |
|
287 { |
|
288 struct Key { |
|
289 JSAtom *atom; |
|
290 uint16_t flag; |
|
291 |
|
292 Key() {} |
|
293 Key(JSAtom *atom, RegExpFlag flag) |
|
294 : atom(atom), flag(flag) |
|
295 { } |
|
296 |
|
297 typedef Key Lookup; |
|
298 static HashNumber hash(const Lookup &l) { |
|
299 return DefaultHasher<JSAtom *>::hash(l.atom) ^ (l.flag << 1); |
|
300 } |
|
301 static bool match(Key l, Key r) { |
|
302 return l.atom == r.atom && l.flag == r.flag; |
|
303 } |
|
304 }; |
|
305 |
|
306 /* |
|
307 * Cache to reuse RegExpShareds with the same source/flags/etc. The cache |
|
308 * is entirely cleared on each GC. |
|
309 */ |
|
310 typedef HashMap<Key, RegExpShared *, Key, RuntimeAllocPolicy> Map; |
|
311 Map map_; |
|
312 |
|
313 /* |
|
314 * The set of all RegExpShareds in the compartment. On every GC, every |
|
315 * RegExpShared that is not actively being used is deleted and removed from |
|
316 * the set. |
|
317 */ |
|
318 typedef HashSet<RegExpShared *, DefaultHasher<RegExpShared*>, RuntimeAllocPolicy> PendingSet; |
|
319 PendingSet inUse_; |
|
320 |
|
321 /* |
|
322 * This is the template object where the result of re.exec() is based on, |
|
323 * if there is a result. This is used in CreateRegExpMatchResult to set |
|
324 * the input/index properties faster. |
|
325 */ |
|
326 ReadBarriered<JSObject> matchResultTemplateObject_; |
|
327 |
|
328 JSObject *createMatchResultTemplateObject(JSContext *cx); |
|
329 |
|
330 public: |
|
331 RegExpCompartment(JSRuntime *rt); |
|
332 ~RegExpCompartment(); |
|
333 |
|
334 bool init(JSContext *cx); |
|
335 void sweep(JSRuntime *rt); |
|
336 void clearTables(); |
|
337 |
|
338 bool get(ExclusiveContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g); |
|
339 |
|
340 /* Like 'get', but compile 'maybeOpt' (if non-null). */ |
|
341 bool get(JSContext *cx, HandleAtom source, JSString *maybeOpt, RegExpGuard *g); |
|
342 |
|
343 /* Get or create template object used to base the result of .exec() on. */ |
|
344 JSObject *getOrCreateMatchResultTemplateObject(JSContext *cx) { |
|
345 if (matchResultTemplateObject_) |
|
346 return matchResultTemplateObject_; |
|
347 return createMatchResultTemplateObject(cx); |
|
348 } |
|
349 |
|
350 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); |
|
351 }; |
|
352 |
|
353 class RegExpObject : public JSObject |
|
354 { |
|
355 static const unsigned LAST_INDEX_SLOT = 0; |
|
356 static const unsigned SOURCE_SLOT = 1; |
|
357 static const unsigned GLOBAL_FLAG_SLOT = 2; |
|
358 static const unsigned IGNORE_CASE_FLAG_SLOT = 3; |
|
359 static const unsigned MULTILINE_FLAG_SLOT = 4; |
|
360 static const unsigned STICKY_FLAG_SLOT = 5; |
|
361 |
|
362 public: |
|
363 static const unsigned RESERVED_SLOTS = 6; |
|
364 |
|
365 static const Class class_; |
|
366 |
|
367 /* |
|
368 * Note: The regexp statics flags are OR'd into the provided flags, |
|
369 * so this function is really meant for object creation during code |
|
370 * execution, as opposed to during something like XDR. |
|
371 */ |
|
372 static RegExpObject * |
|
373 create(ExclusiveContext *cx, RegExpStatics *res, const jschar *chars, size_t length, |
|
374 RegExpFlag flags, frontend::TokenStream *ts); |
|
375 |
|
376 static RegExpObject * |
|
377 createNoStatics(ExclusiveContext *cx, const jschar *chars, size_t length, RegExpFlag flags, |
|
378 frontend::TokenStream *ts); |
|
379 |
|
380 static RegExpObject * |
|
381 createNoStatics(ExclusiveContext *cx, HandleAtom atom, RegExpFlag flags, frontend::TokenStream *ts); |
|
382 |
|
383 /* Accessors. */ |
|
384 |
|
385 static unsigned lastIndexSlot() { return LAST_INDEX_SLOT; } |
|
386 |
|
387 const Value &getLastIndex() const { return getSlot(LAST_INDEX_SLOT); } |
|
388 |
|
389 void setLastIndex(double d) { |
|
390 setSlot(LAST_INDEX_SLOT, NumberValue(d)); |
|
391 } |
|
392 |
|
393 void zeroLastIndex() { |
|
394 setSlot(LAST_INDEX_SLOT, Int32Value(0)); |
|
395 } |
|
396 |
|
397 JSFlatString *toString(JSContext *cx) const; |
|
398 |
|
399 JSAtom *getSource() const { return &getSlot(SOURCE_SLOT).toString()->asAtom(); } |
|
400 |
|
401 void setSource(JSAtom *source) { |
|
402 setSlot(SOURCE_SLOT, StringValue(source)); |
|
403 } |
|
404 |
|
405 RegExpFlag getFlags() const { |
|
406 unsigned flags = 0; |
|
407 flags |= global() ? GlobalFlag : 0; |
|
408 flags |= ignoreCase() ? IgnoreCaseFlag : 0; |
|
409 flags |= multiline() ? MultilineFlag : 0; |
|
410 flags |= sticky() ? StickyFlag : 0; |
|
411 return RegExpFlag(flags); |
|
412 } |
|
413 |
|
414 /* Flags. */ |
|
415 |
|
416 void setIgnoreCase(bool enabled) { |
|
417 setSlot(IGNORE_CASE_FLAG_SLOT, BooleanValue(enabled)); |
|
418 } |
|
419 |
|
420 void setGlobal(bool enabled) { |
|
421 setSlot(GLOBAL_FLAG_SLOT, BooleanValue(enabled)); |
|
422 } |
|
423 |
|
424 void setMultiline(bool enabled) { |
|
425 setSlot(MULTILINE_FLAG_SLOT, BooleanValue(enabled)); |
|
426 } |
|
427 |
|
428 void setSticky(bool enabled) { |
|
429 setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled)); |
|
430 } |
|
431 |
|
432 bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); } |
|
433 bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); } |
|
434 bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); } |
|
435 bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); } |
|
436 |
|
437 void shared(RegExpGuard *g) const { |
|
438 JS_ASSERT(maybeShared() != nullptr); |
|
439 g->init(*maybeShared()); |
|
440 } |
|
441 |
|
442 bool getShared(ExclusiveContext *cx, RegExpGuard *g) { |
|
443 if (RegExpShared *shared = maybeShared()) { |
|
444 g->init(*shared); |
|
445 return true; |
|
446 } |
|
447 return createShared(cx, g); |
|
448 } |
|
449 |
|
450 void setShared(ExclusiveContext *cx, RegExpShared &shared) { |
|
451 shared.prepareForUse(cx); |
|
452 JSObject::setPrivate(&shared); |
|
453 } |
|
454 |
|
455 private: |
|
456 friend class RegExpObjectBuilder; |
|
457 |
|
458 /* For access to assignInitialShape. */ |
|
459 friend bool |
|
460 EmptyShape::ensureInitialCustomShape<RegExpObject>(ExclusiveContext *cx, |
|
461 Handle<RegExpObject*> obj); |
|
462 |
|
463 /* |
|
464 * Compute the initial shape to associate with fresh RegExp objects, |
|
465 * encoding their initial properties. Return the shape after |
|
466 * changing |obj|'s last property to it. |
|
467 */ |
|
468 static Shape * |
|
469 assignInitialShape(ExclusiveContext *cx, Handle<RegExpObject*> obj); |
|
470 |
|
471 bool init(ExclusiveContext *cx, HandleAtom source, RegExpFlag flags); |
|
472 |
|
473 /* |
|
474 * Precondition: the syntax for |source| has already been validated. |
|
475 * Side effect: sets the private field. |
|
476 */ |
|
477 bool createShared(ExclusiveContext *cx, RegExpGuard *g); |
|
478 RegExpShared *maybeShared() const { |
|
479 return static_cast<RegExpShared *>(JSObject::getPrivate()); |
|
480 } |
|
481 |
|
482 /* Call setShared in preference to setPrivate. */ |
|
483 void setPrivate(void *priv) MOZ_DELETE; |
|
484 }; |
|
485 |
|
486 /* |
|
487 * Parse regexp flags. Report an error and return false if an invalid |
|
488 * sequence of flags is encountered (repeat/invalid flag). |
|
489 * |
|
490 * N.B. flagStr must be rooted. |
|
491 */ |
|
492 bool |
|
493 ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut); |
|
494 |
|
495 /* |
|
496 * Assuming ObjectClassIs(obj, ESClass_RegExp), return obj's RegExpShared. |
|
497 * |
|
498 * Beware: this RegExpShared can be owned by a compartment other than |
|
499 * cx->compartment. Normal RegExpGuard (which is necessary anyways) |
|
500 * will protect the object but it is important not to assign the return value |
|
501 * to be the private of any RegExpObject. |
|
502 */ |
|
503 inline bool |
|
504 RegExpToShared(JSContext *cx, HandleObject obj, RegExpGuard *g) |
|
505 { |
|
506 if (obj->is<RegExpObject>()) |
|
507 return obj->as<RegExpObject>().getShared(cx, g); |
|
508 return Proxy::regexp_toShared(cx, obj, g); |
|
509 } |
|
510 |
|
511 template<XDRMode mode> |
|
512 bool |
|
513 XDRScriptRegExpObject(XDRState<mode> *xdr, HeapPtrObject *objp); |
|
514 |
|
515 extern JSObject * |
|
516 CloneScriptRegExpObject(JSContext *cx, RegExpObject &re); |
|
517 |
|
518 } /* namespace js */ |
|
519 |
|
520 #endif /* vm_RegExpObject_h */ |