js/src/vm/RegExpObject.h

changeset 0
6474c204b198
equal deleted inserted replaced
-1:000000000000 0:738a0880e2ba
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef vm_RegExpObject_h
8 #define vm_RegExpObject_h
9
10 #include "mozilla/Attributes.h"
11 #include "mozilla/MemoryReporting.h"
12
13 #include "jscntxt.h"
14 #include "jsproxy.h"
15
16 #include "gc/Marking.h"
17 #include "gc/Zone.h"
18 #include "vm/Shape.h"
19 #if ENABLE_YARR_JIT
20 #include "yarr/YarrJIT.h"
21 #else
22 #include "yarr/YarrInterpreter.h"
23 #endif
24
25 /*
26 * JavaScript Regular Expressions
27 *
28 * There are several engine concepts associated with a single logical regexp:
29 *
30 * RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp"
31 *
32 * RegExpShared - The compiled representation of the regexp.
33 *
34 * RegExpCompartment - Owns all RegExpShared instances in a compartment.
35 *
36 * To save memory, a RegExpShared is not created for a RegExpObject until it is
37 * needed for execution. When a RegExpShared needs to be created, it is looked
38 * up in a per-compartment table to allow reuse between objects. Lastly, on
39 * GC, every RegExpShared (that is not active on the callstack) is discarded.
40 * Because of the last point, any code using a RegExpShared (viz., by executing
41 * a regexp) must indicate the RegExpShared is active via RegExpGuard.
42 */
43 namespace js {
44
45 class MatchConduit;
46 class MatchPair;
47 class MatchPairs;
48 class RegExpShared;
49
50 namespace frontend { class TokenStream; }
51
52 enum RegExpFlag
53 {
54 IgnoreCaseFlag = 0x01,
55 GlobalFlag = 0x02,
56 MultilineFlag = 0x04,
57 StickyFlag = 0x08,
58
59 NoFlags = 0x00,
60 AllFlags = 0x0f
61 };
62
63 enum RegExpRunStatus
64 {
65 RegExpRunStatus_Error,
66 RegExpRunStatus_Success,
67 RegExpRunStatus_Success_NotFound
68 };
69
70 class RegExpObjectBuilder
71 {
72 ExclusiveContext *cx;
73 Rooted<RegExpObject*> reobj_;
74
75 bool getOrCreate();
76 bool getOrCreateClone(HandleTypeObject type);
77
78 public:
79 RegExpObjectBuilder(ExclusiveContext *cx, RegExpObject *reobj = nullptr);
80
81 RegExpObject *reobj() { return reobj_; }
82
83 RegExpObject *build(HandleAtom source, RegExpFlag flags);
84 RegExpObject *build(HandleAtom source, RegExpShared &shared);
85
86 /* Perform a VM-internal clone. */
87 RegExpObject *clone(Handle<RegExpObject*> other);
88 };
89
90 JSObject *
91 CloneRegExpObject(JSContext *cx, JSObject *obj);
92
93 /*
94 * A RegExpShared is the compiled representation of a regexp. A RegExpShared is
95 * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may
96 * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a
97 * cache so that they can be reused when compiling the same regex string.
98 *
99 * During a GC, the trace hook for RegExpObject clears any pointers to
100 * RegExpShareds so that there will be no dangling pointers when they are
101 * deleted. However, some RegExpShareds are not deleted:
102 *
103 * 1. Any RegExpShared with pointers from the C++ stack is not deleted.
104 * 2. Any RegExpShared which has been embedded into jitcode is not deleted.
105 * This rarely comes into play, as jitcode is usually purged before the
106 * RegExpShared are sweeped.
107 * 3. Any RegExpShared that was installed in a RegExpObject during an
108 * incremental GC is not deleted. This is because the RegExpObject may have
109 * been traced through before the new RegExpShared was installed, in which
110 * case deleting the RegExpShared would turn the RegExpObject's reference
111 * into a dangling pointer
112 *
113 * The activeUseCount and gcNumberWhenUsed fields are used to track these
114 * conditions.
115 *
116 * There are two tables used to track RegExpShareds. map_ implements the cache
117 * and is cleared on every GC. inUse_ logically owns all RegExpShareds in the
118 * compartment and attempts to delete all RegExpShareds that aren't kept alive
119 * by the above conditions on every GC sweep phase. It is necessary to use two
120 * separate tables since map_ *must* be fully cleared on each GC since the Key
121 * points to a JSAtom that can become garbage.
122 */
123 class RegExpShared
124 {
125 friend class RegExpCompartment;
126 friend class RegExpStatics;
127 friend class RegExpGuard;
128
129 typedef frontend::TokenStream TokenStream;
130 typedef JSC::Yarr::BytecodePattern BytecodePattern;
131 typedef JSC::Yarr::ErrorCode ErrorCode;
132 typedef JSC::Yarr::YarrPattern YarrPattern;
133 #if ENABLE_YARR_JIT
134 typedef JSC::Yarr::JSGlobalData JSGlobalData;
135 typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock;
136 typedef JSC::Yarr::YarrJITCompileMode YarrJITCompileMode;
137 #endif
138
139 /*
140 * Source to the RegExp, for lazy compilation.
141 * The source must be rooted while activeUseCount is non-zero
142 * via RegExpGuard or explicit calls to trace().
143 */
144 JSAtom * source;
145
146 RegExpFlag flags;
147 unsigned parenCount;
148
149 #if ENABLE_YARR_JIT
150 /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */
151 YarrCodeBlock codeBlock;
152 #endif
153 BytecodePattern *bytecode;
154
155 /* Lifetime-preserving variables: see class-level comment above. */
156 size_t activeUseCount;
157 uint64_t gcNumberWhenUsed;
158
159 /* Internal functions. */
160 bool compile(JSContext *cx, bool matchOnly);
161 bool compile(JSContext *cx, JSLinearString &pattern, bool matchOnly);
162
163 bool compileIfNecessary(JSContext *cx);
164 bool compileMatchOnlyIfNecessary(JSContext *cx);
165
166 public:
167 RegExpShared(JSAtom *source, RegExpFlag flags, uint64_t gcNumber);
168 ~RegExpShared();
169
170 /* Explicit trace function for use by the RegExpStatics and JITs. */
171 void trace(JSTracer *trc) {
172 MarkStringUnbarriered(trc, &source, "regexpshared source");
173 }
174
175 /* Static functions to expose some Yarr logic. */
176
177 // This function should be deleted once bad Android platforms phase out. See bug 604774.
178 static bool isJITRuntimeEnabled(JSContext *cx) {
179 #if ENABLE_YARR_JIT
180 # if defined(ANDROID)
181 return !cx->jitIsBroken;
182 # else
183 return true;
184 # endif
185 #else
186 return false;
187 #endif
188 }
189 static void reportYarrError(ExclusiveContext *cx, TokenStream *ts, ErrorCode error);
190 static bool checkSyntax(ExclusiveContext *cx, TokenStream *tokenStream, JSLinearString *source);
191
192 /* Called when a RegExpShared is installed into a RegExpObject. */
193 void prepareForUse(ExclusiveContext *cx) {
194 gcNumberWhenUsed = cx->zone()->gcNumber();
195 }
196
197 /* Primary interface: run this regular expression on the given string. */
198 RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length,
199 size_t *lastIndex, MatchPairs &matches);
200
201 /* Run the regular expression without collecting matches, for test(). */
202 RegExpRunStatus executeMatchOnly(JSContext *cx, const jschar *chars, size_t length,
203 size_t *lastIndex, MatchPair &match);
204
205 /* Accessors */
206
207 size_t getParenCount() const { JS_ASSERT(isCompiled()); return parenCount; }
208 void incRef() { activeUseCount++; }
209 void decRef() { JS_ASSERT(activeUseCount > 0); activeUseCount--; }
210
211 /* Accounts for the "0" (whole match) pair. */
212 size_t pairCount() const { return getParenCount() + 1; }
213
214 RegExpFlag getFlags() const { return flags; }
215 bool ignoreCase() const { return flags & IgnoreCaseFlag; }
216 bool global() const { return flags & GlobalFlag; }
217 bool multiline() const { return flags & MultilineFlag; }
218 bool sticky() const { return flags & StickyFlag; }
219
220 #ifdef ENABLE_YARR_JIT
221 bool hasCode() const { return codeBlock.has16BitCode(); }
222 bool hasMatchOnlyCode() const { return codeBlock.has16BitCodeMatchOnly(); }
223 #else
224 bool hasCode() const { return false; }
225 bool hasMatchOnlyCode() const { return false; }
226 #endif
227 bool hasBytecode() const { return bytecode != nullptr; }
228 bool isCompiled() const { return hasBytecode() || hasCode() || hasMatchOnlyCode(); }
229 };
230
231 /*
232 * Extend the lifetime of a given RegExpShared to at least the lifetime of
233 * the guard object. See Regular Expression comment at the top.
234 */
235 class RegExpGuard
236 {
237 RegExpShared *re_;
238
239 /*
240 * Prevent the RegExp source from being collected:
241 * because RegExpShared objects compile at execution time, the source
242 * must remain rooted for the active lifetime of the RegExpShared.
243 */
244 RootedAtom source_;
245
246 RegExpGuard(const RegExpGuard &) MOZ_DELETE;
247 void operator=(const RegExpGuard &) MOZ_DELETE;
248
249 public:
250 RegExpGuard(ExclusiveContext *cx)
251 : re_(nullptr), source_(cx)
252 {}
253
254 RegExpGuard(ExclusiveContext *cx, RegExpShared &re)
255 : re_(&re), source_(cx, re.source)
256 {
257 re_->incRef();
258 }
259
260 ~RegExpGuard() {
261 release();
262 }
263
264 public:
265 void init(RegExpShared &re) {
266 JS_ASSERT(!initialized());
267 re_ = &re;
268 re_->incRef();
269 source_ = re_->source;
270 }
271
272 void release() {
273 if (re_) {
274 re_->decRef();
275 re_ = nullptr;
276 source_ = nullptr;
277 }
278 }
279
280 bool initialized() const { return !!re_; }
281 RegExpShared *re() const { JS_ASSERT(initialized()); return re_; }
282 RegExpShared *operator->() { return re(); }
283 RegExpShared &operator*() { return *re(); }
284 };
285
286 class RegExpCompartment
287 {
288 struct Key {
289 JSAtom *atom;
290 uint16_t flag;
291
292 Key() {}
293 Key(JSAtom *atom, RegExpFlag flag)
294 : atom(atom), flag(flag)
295 { }
296
297 typedef Key Lookup;
298 static HashNumber hash(const Lookup &l) {
299 return DefaultHasher<JSAtom *>::hash(l.atom) ^ (l.flag << 1);
300 }
301 static bool match(Key l, Key r) {
302 return l.atom == r.atom && l.flag == r.flag;
303 }
304 };
305
306 /*
307 * Cache to reuse RegExpShareds with the same source/flags/etc. The cache
308 * is entirely cleared on each GC.
309 */
310 typedef HashMap<Key, RegExpShared *, Key, RuntimeAllocPolicy> Map;
311 Map map_;
312
313 /*
314 * The set of all RegExpShareds in the compartment. On every GC, every
315 * RegExpShared that is not actively being used is deleted and removed from
316 * the set.
317 */
318 typedef HashSet<RegExpShared *, DefaultHasher<RegExpShared*>, RuntimeAllocPolicy> PendingSet;
319 PendingSet inUse_;
320
321 /*
322 * This is the template object where the result of re.exec() is based on,
323 * if there is a result. This is used in CreateRegExpMatchResult to set
324 * the input/index properties faster.
325 */
326 ReadBarriered<JSObject> matchResultTemplateObject_;
327
328 JSObject *createMatchResultTemplateObject(JSContext *cx);
329
330 public:
331 RegExpCompartment(JSRuntime *rt);
332 ~RegExpCompartment();
333
334 bool init(JSContext *cx);
335 void sweep(JSRuntime *rt);
336 void clearTables();
337
338 bool get(ExclusiveContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g);
339
340 /* Like 'get', but compile 'maybeOpt' (if non-null). */
341 bool get(JSContext *cx, HandleAtom source, JSString *maybeOpt, RegExpGuard *g);
342
343 /* Get or create template object used to base the result of .exec() on. */
344 JSObject *getOrCreateMatchResultTemplateObject(JSContext *cx) {
345 if (matchResultTemplateObject_)
346 return matchResultTemplateObject_;
347 return createMatchResultTemplateObject(cx);
348 }
349
350 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
351 };
352
353 class RegExpObject : public JSObject
354 {
355 static const unsigned LAST_INDEX_SLOT = 0;
356 static const unsigned SOURCE_SLOT = 1;
357 static const unsigned GLOBAL_FLAG_SLOT = 2;
358 static const unsigned IGNORE_CASE_FLAG_SLOT = 3;
359 static const unsigned MULTILINE_FLAG_SLOT = 4;
360 static const unsigned STICKY_FLAG_SLOT = 5;
361
362 public:
363 static const unsigned RESERVED_SLOTS = 6;
364
365 static const Class class_;
366
367 /*
368 * Note: The regexp statics flags are OR'd into the provided flags,
369 * so this function is really meant for object creation during code
370 * execution, as opposed to during something like XDR.
371 */
372 static RegExpObject *
373 create(ExclusiveContext *cx, RegExpStatics *res, const jschar *chars, size_t length,
374 RegExpFlag flags, frontend::TokenStream *ts);
375
376 static RegExpObject *
377 createNoStatics(ExclusiveContext *cx, const jschar *chars, size_t length, RegExpFlag flags,
378 frontend::TokenStream *ts);
379
380 static RegExpObject *
381 createNoStatics(ExclusiveContext *cx, HandleAtom atom, RegExpFlag flags, frontend::TokenStream *ts);
382
383 /* Accessors. */
384
385 static unsigned lastIndexSlot() { return LAST_INDEX_SLOT; }
386
387 const Value &getLastIndex() const { return getSlot(LAST_INDEX_SLOT); }
388
389 void setLastIndex(double d) {
390 setSlot(LAST_INDEX_SLOT, NumberValue(d));
391 }
392
393 void zeroLastIndex() {
394 setSlot(LAST_INDEX_SLOT, Int32Value(0));
395 }
396
397 JSFlatString *toString(JSContext *cx) const;
398
399 JSAtom *getSource() const { return &getSlot(SOURCE_SLOT).toString()->asAtom(); }
400
401 void setSource(JSAtom *source) {
402 setSlot(SOURCE_SLOT, StringValue(source));
403 }
404
405 RegExpFlag getFlags() const {
406 unsigned flags = 0;
407 flags |= global() ? GlobalFlag : 0;
408 flags |= ignoreCase() ? IgnoreCaseFlag : 0;
409 flags |= multiline() ? MultilineFlag : 0;
410 flags |= sticky() ? StickyFlag : 0;
411 return RegExpFlag(flags);
412 }
413
414 /* Flags. */
415
416 void setIgnoreCase(bool enabled) {
417 setSlot(IGNORE_CASE_FLAG_SLOT, BooleanValue(enabled));
418 }
419
420 void setGlobal(bool enabled) {
421 setSlot(GLOBAL_FLAG_SLOT, BooleanValue(enabled));
422 }
423
424 void setMultiline(bool enabled) {
425 setSlot(MULTILINE_FLAG_SLOT, BooleanValue(enabled));
426 }
427
428 void setSticky(bool enabled) {
429 setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled));
430 }
431
432 bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); }
433 bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); }
434 bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); }
435 bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); }
436
437 void shared(RegExpGuard *g) const {
438 JS_ASSERT(maybeShared() != nullptr);
439 g->init(*maybeShared());
440 }
441
442 bool getShared(ExclusiveContext *cx, RegExpGuard *g) {
443 if (RegExpShared *shared = maybeShared()) {
444 g->init(*shared);
445 return true;
446 }
447 return createShared(cx, g);
448 }
449
450 void setShared(ExclusiveContext *cx, RegExpShared &shared) {
451 shared.prepareForUse(cx);
452 JSObject::setPrivate(&shared);
453 }
454
455 private:
456 friend class RegExpObjectBuilder;
457
458 /* For access to assignInitialShape. */
459 friend bool
460 EmptyShape::ensureInitialCustomShape<RegExpObject>(ExclusiveContext *cx,
461 Handle<RegExpObject*> obj);
462
463 /*
464 * Compute the initial shape to associate with fresh RegExp objects,
465 * encoding their initial properties. Return the shape after
466 * changing |obj|'s last property to it.
467 */
468 static Shape *
469 assignInitialShape(ExclusiveContext *cx, Handle<RegExpObject*> obj);
470
471 bool init(ExclusiveContext *cx, HandleAtom source, RegExpFlag flags);
472
473 /*
474 * Precondition: the syntax for |source| has already been validated.
475 * Side effect: sets the private field.
476 */
477 bool createShared(ExclusiveContext *cx, RegExpGuard *g);
478 RegExpShared *maybeShared() const {
479 return static_cast<RegExpShared *>(JSObject::getPrivate());
480 }
481
482 /* Call setShared in preference to setPrivate. */
483 void setPrivate(void *priv) MOZ_DELETE;
484 };
485
486 /*
487 * Parse regexp flags. Report an error and return false if an invalid
488 * sequence of flags is encountered (repeat/invalid flag).
489 *
490 * N.B. flagStr must be rooted.
491 */
492 bool
493 ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut);
494
495 /*
496 * Assuming ObjectClassIs(obj, ESClass_RegExp), return obj's RegExpShared.
497 *
498 * Beware: this RegExpShared can be owned by a compartment other than
499 * cx->compartment. Normal RegExpGuard (which is necessary anyways)
500 * will protect the object but it is important not to assign the return value
501 * to be the private of any RegExpObject.
502 */
503 inline bool
504 RegExpToShared(JSContext *cx, HandleObject obj, RegExpGuard *g)
505 {
506 if (obj->is<RegExpObject>())
507 return obj->as<RegExpObject>().getShared(cx, g);
508 return Proxy::regexp_toShared(cx, obj, g);
509 }
510
511 template<XDRMode mode>
512 bool
513 XDRScriptRegExpObject(XDRState<mode> *xdr, HeapPtrObject *objp);
514
515 extern JSObject *
516 CloneScriptRegExpObject(JSContext *cx, RegExpObject &re);
517
518 } /* namespace js */
519
520 #endif /* vm_RegExpObject_h */

mercurial