1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/vm/RegExpObject.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,520 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99: 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#ifndef vm_RegExpObject_h 1.11 +#define vm_RegExpObject_h 1.12 + 1.13 +#include "mozilla/Attributes.h" 1.14 +#include "mozilla/MemoryReporting.h" 1.15 + 1.16 +#include "jscntxt.h" 1.17 +#include "jsproxy.h" 1.18 + 1.19 +#include "gc/Marking.h" 1.20 +#include "gc/Zone.h" 1.21 +#include "vm/Shape.h" 1.22 +#if ENABLE_YARR_JIT 1.23 +#include "yarr/YarrJIT.h" 1.24 +#else 1.25 +#include "yarr/YarrInterpreter.h" 1.26 +#endif 1.27 + 1.28 +/* 1.29 + * JavaScript Regular Expressions 1.30 + * 1.31 + * There are several engine concepts associated with a single logical regexp: 1.32 + * 1.33 + * RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp" 1.34 + * 1.35 + * RegExpShared - The compiled representation of the regexp. 1.36 + * 1.37 + * RegExpCompartment - Owns all RegExpShared instances in a compartment. 1.38 + * 1.39 + * To save memory, a RegExpShared is not created for a RegExpObject until it is 1.40 + * needed for execution. When a RegExpShared needs to be created, it is looked 1.41 + * up in a per-compartment table to allow reuse between objects. Lastly, on 1.42 + * GC, every RegExpShared (that is not active on the callstack) is discarded. 1.43 + * Because of the last point, any code using a RegExpShared (viz., by executing 1.44 + * a regexp) must indicate the RegExpShared is active via RegExpGuard. 1.45 + */ 1.46 +namespace js { 1.47 + 1.48 +class MatchConduit; 1.49 +class MatchPair; 1.50 +class MatchPairs; 1.51 +class RegExpShared; 1.52 + 1.53 +namespace frontend { class TokenStream; } 1.54 + 1.55 +enum RegExpFlag 1.56 +{ 1.57 + IgnoreCaseFlag = 0x01, 1.58 + GlobalFlag = 0x02, 1.59 + MultilineFlag = 0x04, 1.60 + StickyFlag = 0x08, 1.61 + 1.62 + NoFlags = 0x00, 1.63 + AllFlags = 0x0f 1.64 +}; 1.65 + 1.66 +enum RegExpRunStatus 1.67 +{ 1.68 + RegExpRunStatus_Error, 1.69 + RegExpRunStatus_Success, 1.70 + RegExpRunStatus_Success_NotFound 1.71 +}; 1.72 + 1.73 +class RegExpObjectBuilder 1.74 +{ 1.75 + ExclusiveContext *cx; 1.76 + Rooted<RegExpObject*> reobj_; 1.77 + 1.78 + bool getOrCreate(); 1.79 + bool getOrCreateClone(HandleTypeObject type); 1.80 + 1.81 + public: 1.82 + RegExpObjectBuilder(ExclusiveContext *cx, RegExpObject *reobj = nullptr); 1.83 + 1.84 + RegExpObject *reobj() { return reobj_; } 1.85 + 1.86 + RegExpObject *build(HandleAtom source, RegExpFlag flags); 1.87 + RegExpObject *build(HandleAtom source, RegExpShared &shared); 1.88 + 1.89 + /* Perform a VM-internal clone. */ 1.90 + RegExpObject *clone(Handle<RegExpObject*> other); 1.91 +}; 1.92 + 1.93 +JSObject * 1.94 +CloneRegExpObject(JSContext *cx, JSObject *obj); 1.95 + 1.96 +/* 1.97 + * A RegExpShared is the compiled representation of a regexp. A RegExpShared is 1.98 + * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may 1.99 + * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a 1.100 + * cache so that they can be reused when compiling the same regex string. 1.101 + * 1.102 + * During a GC, the trace hook for RegExpObject clears any pointers to 1.103 + * RegExpShareds so that there will be no dangling pointers when they are 1.104 + * deleted. However, some RegExpShareds are not deleted: 1.105 + * 1.106 + * 1. Any RegExpShared with pointers from the C++ stack is not deleted. 1.107 + * 2. Any RegExpShared which has been embedded into jitcode is not deleted. 1.108 + * This rarely comes into play, as jitcode is usually purged before the 1.109 + * RegExpShared are sweeped. 1.110 + * 3. Any RegExpShared that was installed in a RegExpObject during an 1.111 + * incremental GC is not deleted. This is because the RegExpObject may have 1.112 + * been traced through before the new RegExpShared was installed, in which 1.113 + * case deleting the RegExpShared would turn the RegExpObject's reference 1.114 + * into a dangling pointer 1.115 + * 1.116 + * The activeUseCount and gcNumberWhenUsed fields are used to track these 1.117 + * conditions. 1.118 + * 1.119 + * There are two tables used to track RegExpShareds. map_ implements the cache 1.120 + * and is cleared on every GC. inUse_ logically owns all RegExpShareds in the 1.121 + * compartment and attempts to delete all RegExpShareds that aren't kept alive 1.122 + * by the above conditions on every GC sweep phase. It is necessary to use two 1.123 + * separate tables since map_ *must* be fully cleared on each GC since the Key 1.124 + * points to a JSAtom that can become garbage. 1.125 + */ 1.126 +class RegExpShared 1.127 +{ 1.128 + friend class RegExpCompartment; 1.129 + friend class RegExpStatics; 1.130 + friend class RegExpGuard; 1.131 + 1.132 + typedef frontend::TokenStream TokenStream; 1.133 + typedef JSC::Yarr::BytecodePattern BytecodePattern; 1.134 + typedef JSC::Yarr::ErrorCode ErrorCode; 1.135 + typedef JSC::Yarr::YarrPattern YarrPattern; 1.136 +#if ENABLE_YARR_JIT 1.137 + typedef JSC::Yarr::JSGlobalData JSGlobalData; 1.138 + typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock; 1.139 + typedef JSC::Yarr::YarrJITCompileMode YarrJITCompileMode; 1.140 +#endif 1.141 + 1.142 + /* 1.143 + * Source to the RegExp, for lazy compilation. 1.144 + * The source must be rooted while activeUseCount is non-zero 1.145 + * via RegExpGuard or explicit calls to trace(). 1.146 + */ 1.147 + JSAtom * source; 1.148 + 1.149 + RegExpFlag flags; 1.150 + unsigned parenCount; 1.151 + 1.152 +#if ENABLE_YARR_JIT 1.153 + /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */ 1.154 + YarrCodeBlock codeBlock; 1.155 +#endif 1.156 + BytecodePattern *bytecode; 1.157 + 1.158 + /* Lifetime-preserving variables: see class-level comment above. */ 1.159 + size_t activeUseCount; 1.160 + uint64_t gcNumberWhenUsed; 1.161 + 1.162 + /* Internal functions. */ 1.163 + bool compile(JSContext *cx, bool matchOnly); 1.164 + bool compile(JSContext *cx, JSLinearString &pattern, bool matchOnly); 1.165 + 1.166 + bool compileIfNecessary(JSContext *cx); 1.167 + bool compileMatchOnlyIfNecessary(JSContext *cx); 1.168 + 1.169 + public: 1.170 + RegExpShared(JSAtom *source, RegExpFlag flags, uint64_t gcNumber); 1.171 + ~RegExpShared(); 1.172 + 1.173 + /* Explicit trace function for use by the RegExpStatics and JITs. */ 1.174 + void trace(JSTracer *trc) { 1.175 + MarkStringUnbarriered(trc, &source, "regexpshared source"); 1.176 + } 1.177 + 1.178 + /* Static functions to expose some Yarr logic. */ 1.179 + 1.180 + // This function should be deleted once bad Android platforms phase out. See bug 604774. 1.181 + static bool isJITRuntimeEnabled(JSContext *cx) { 1.182 + #if ENABLE_YARR_JIT 1.183 + # if defined(ANDROID) 1.184 + return !cx->jitIsBroken; 1.185 + # else 1.186 + return true; 1.187 + # endif 1.188 + #else 1.189 + return false; 1.190 + #endif 1.191 + } 1.192 + static void reportYarrError(ExclusiveContext *cx, TokenStream *ts, ErrorCode error); 1.193 + static bool checkSyntax(ExclusiveContext *cx, TokenStream *tokenStream, JSLinearString *source); 1.194 + 1.195 + /* Called when a RegExpShared is installed into a RegExpObject. */ 1.196 + void prepareForUse(ExclusiveContext *cx) { 1.197 + gcNumberWhenUsed = cx->zone()->gcNumber(); 1.198 + } 1.199 + 1.200 + /* Primary interface: run this regular expression on the given string. */ 1.201 + RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length, 1.202 + size_t *lastIndex, MatchPairs &matches); 1.203 + 1.204 + /* Run the regular expression without collecting matches, for test(). */ 1.205 + RegExpRunStatus executeMatchOnly(JSContext *cx, const jschar *chars, size_t length, 1.206 + size_t *lastIndex, MatchPair &match); 1.207 + 1.208 + /* Accessors */ 1.209 + 1.210 + size_t getParenCount() const { JS_ASSERT(isCompiled()); return parenCount; } 1.211 + void incRef() { activeUseCount++; } 1.212 + void decRef() { JS_ASSERT(activeUseCount > 0); activeUseCount--; } 1.213 + 1.214 + /* Accounts for the "0" (whole match) pair. */ 1.215 + size_t pairCount() const { return getParenCount() + 1; } 1.216 + 1.217 + RegExpFlag getFlags() const { return flags; } 1.218 + bool ignoreCase() const { return flags & IgnoreCaseFlag; } 1.219 + bool global() const { return flags & GlobalFlag; } 1.220 + bool multiline() const { return flags & MultilineFlag; } 1.221 + bool sticky() const { return flags & StickyFlag; } 1.222 + 1.223 +#ifdef ENABLE_YARR_JIT 1.224 + bool hasCode() const { return codeBlock.has16BitCode(); } 1.225 + bool hasMatchOnlyCode() const { return codeBlock.has16BitCodeMatchOnly(); } 1.226 +#else 1.227 + bool hasCode() const { return false; } 1.228 + bool hasMatchOnlyCode() const { return false; } 1.229 +#endif 1.230 + bool hasBytecode() const { return bytecode != nullptr; } 1.231 + bool isCompiled() const { return hasBytecode() || hasCode() || hasMatchOnlyCode(); } 1.232 +}; 1.233 + 1.234 +/* 1.235 + * Extend the lifetime of a given RegExpShared to at least the lifetime of 1.236 + * the guard object. See Regular Expression comment at the top. 1.237 + */ 1.238 +class RegExpGuard 1.239 +{ 1.240 + RegExpShared *re_; 1.241 + 1.242 + /* 1.243 + * Prevent the RegExp source from being collected: 1.244 + * because RegExpShared objects compile at execution time, the source 1.245 + * must remain rooted for the active lifetime of the RegExpShared. 1.246 + */ 1.247 + RootedAtom source_; 1.248 + 1.249 + RegExpGuard(const RegExpGuard &) MOZ_DELETE; 1.250 + void operator=(const RegExpGuard &) MOZ_DELETE; 1.251 + 1.252 + public: 1.253 + RegExpGuard(ExclusiveContext *cx) 1.254 + : re_(nullptr), source_(cx) 1.255 + {} 1.256 + 1.257 + RegExpGuard(ExclusiveContext *cx, RegExpShared &re) 1.258 + : re_(&re), source_(cx, re.source) 1.259 + { 1.260 + re_->incRef(); 1.261 + } 1.262 + 1.263 + ~RegExpGuard() { 1.264 + release(); 1.265 + } 1.266 + 1.267 + public: 1.268 + void init(RegExpShared &re) { 1.269 + JS_ASSERT(!initialized()); 1.270 + re_ = &re; 1.271 + re_->incRef(); 1.272 + source_ = re_->source; 1.273 + } 1.274 + 1.275 + void release() { 1.276 + if (re_) { 1.277 + re_->decRef(); 1.278 + re_ = nullptr; 1.279 + source_ = nullptr; 1.280 + } 1.281 + } 1.282 + 1.283 + bool initialized() const { return !!re_; } 1.284 + RegExpShared *re() const { JS_ASSERT(initialized()); return re_; } 1.285 + RegExpShared *operator->() { return re(); } 1.286 + RegExpShared &operator*() { return *re(); } 1.287 +}; 1.288 + 1.289 +class RegExpCompartment 1.290 +{ 1.291 + struct Key { 1.292 + JSAtom *atom; 1.293 + uint16_t flag; 1.294 + 1.295 + Key() {} 1.296 + Key(JSAtom *atom, RegExpFlag flag) 1.297 + : atom(atom), flag(flag) 1.298 + { } 1.299 + 1.300 + typedef Key Lookup; 1.301 + static HashNumber hash(const Lookup &l) { 1.302 + return DefaultHasher<JSAtom *>::hash(l.atom) ^ (l.flag << 1); 1.303 + } 1.304 + static bool match(Key l, Key r) { 1.305 + return l.atom == r.atom && l.flag == r.flag; 1.306 + } 1.307 + }; 1.308 + 1.309 + /* 1.310 + * Cache to reuse RegExpShareds with the same source/flags/etc. The cache 1.311 + * is entirely cleared on each GC. 1.312 + */ 1.313 + typedef HashMap<Key, RegExpShared *, Key, RuntimeAllocPolicy> Map; 1.314 + Map map_; 1.315 + 1.316 + /* 1.317 + * The set of all RegExpShareds in the compartment. On every GC, every 1.318 + * RegExpShared that is not actively being used is deleted and removed from 1.319 + * the set. 1.320 + */ 1.321 + typedef HashSet<RegExpShared *, DefaultHasher<RegExpShared*>, RuntimeAllocPolicy> PendingSet; 1.322 + PendingSet inUse_; 1.323 + 1.324 + /* 1.325 + * This is the template object where the result of re.exec() is based on, 1.326 + * if there is a result. This is used in CreateRegExpMatchResult to set 1.327 + * the input/index properties faster. 1.328 + */ 1.329 + ReadBarriered<JSObject> matchResultTemplateObject_; 1.330 + 1.331 + JSObject *createMatchResultTemplateObject(JSContext *cx); 1.332 + 1.333 + public: 1.334 + RegExpCompartment(JSRuntime *rt); 1.335 + ~RegExpCompartment(); 1.336 + 1.337 + bool init(JSContext *cx); 1.338 + void sweep(JSRuntime *rt); 1.339 + void clearTables(); 1.340 + 1.341 + bool get(ExclusiveContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g); 1.342 + 1.343 + /* Like 'get', but compile 'maybeOpt' (if non-null). */ 1.344 + bool get(JSContext *cx, HandleAtom source, JSString *maybeOpt, RegExpGuard *g); 1.345 + 1.346 + /* Get or create template object used to base the result of .exec() on. */ 1.347 + JSObject *getOrCreateMatchResultTemplateObject(JSContext *cx) { 1.348 + if (matchResultTemplateObject_) 1.349 + return matchResultTemplateObject_; 1.350 + return createMatchResultTemplateObject(cx); 1.351 + } 1.352 + 1.353 + size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); 1.354 +}; 1.355 + 1.356 +class RegExpObject : public JSObject 1.357 +{ 1.358 + static const unsigned LAST_INDEX_SLOT = 0; 1.359 + static const unsigned SOURCE_SLOT = 1; 1.360 + static const unsigned GLOBAL_FLAG_SLOT = 2; 1.361 + static const unsigned IGNORE_CASE_FLAG_SLOT = 3; 1.362 + static const unsigned MULTILINE_FLAG_SLOT = 4; 1.363 + static const unsigned STICKY_FLAG_SLOT = 5; 1.364 + 1.365 + public: 1.366 + static const unsigned RESERVED_SLOTS = 6; 1.367 + 1.368 + static const Class class_; 1.369 + 1.370 + /* 1.371 + * Note: The regexp statics flags are OR'd into the provided flags, 1.372 + * so this function is really meant for object creation during code 1.373 + * execution, as opposed to during something like XDR. 1.374 + */ 1.375 + static RegExpObject * 1.376 + create(ExclusiveContext *cx, RegExpStatics *res, const jschar *chars, size_t length, 1.377 + RegExpFlag flags, frontend::TokenStream *ts); 1.378 + 1.379 + static RegExpObject * 1.380 + createNoStatics(ExclusiveContext *cx, const jschar *chars, size_t length, RegExpFlag flags, 1.381 + frontend::TokenStream *ts); 1.382 + 1.383 + static RegExpObject * 1.384 + createNoStatics(ExclusiveContext *cx, HandleAtom atom, RegExpFlag flags, frontend::TokenStream *ts); 1.385 + 1.386 + /* Accessors. */ 1.387 + 1.388 + static unsigned lastIndexSlot() { return LAST_INDEX_SLOT; } 1.389 + 1.390 + const Value &getLastIndex() const { return getSlot(LAST_INDEX_SLOT); } 1.391 + 1.392 + void setLastIndex(double d) { 1.393 + setSlot(LAST_INDEX_SLOT, NumberValue(d)); 1.394 + } 1.395 + 1.396 + void zeroLastIndex() { 1.397 + setSlot(LAST_INDEX_SLOT, Int32Value(0)); 1.398 + } 1.399 + 1.400 + JSFlatString *toString(JSContext *cx) const; 1.401 + 1.402 + JSAtom *getSource() const { return &getSlot(SOURCE_SLOT).toString()->asAtom(); } 1.403 + 1.404 + void setSource(JSAtom *source) { 1.405 + setSlot(SOURCE_SLOT, StringValue(source)); 1.406 + } 1.407 + 1.408 + RegExpFlag getFlags() const { 1.409 + unsigned flags = 0; 1.410 + flags |= global() ? GlobalFlag : 0; 1.411 + flags |= ignoreCase() ? IgnoreCaseFlag : 0; 1.412 + flags |= multiline() ? MultilineFlag : 0; 1.413 + flags |= sticky() ? StickyFlag : 0; 1.414 + return RegExpFlag(flags); 1.415 + } 1.416 + 1.417 + /* Flags. */ 1.418 + 1.419 + void setIgnoreCase(bool enabled) { 1.420 + setSlot(IGNORE_CASE_FLAG_SLOT, BooleanValue(enabled)); 1.421 + } 1.422 + 1.423 + void setGlobal(bool enabled) { 1.424 + setSlot(GLOBAL_FLAG_SLOT, BooleanValue(enabled)); 1.425 + } 1.426 + 1.427 + void setMultiline(bool enabled) { 1.428 + setSlot(MULTILINE_FLAG_SLOT, BooleanValue(enabled)); 1.429 + } 1.430 + 1.431 + void setSticky(bool enabled) { 1.432 + setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled)); 1.433 + } 1.434 + 1.435 + bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); } 1.436 + bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); } 1.437 + bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); } 1.438 + bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); } 1.439 + 1.440 + void shared(RegExpGuard *g) const { 1.441 + JS_ASSERT(maybeShared() != nullptr); 1.442 + g->init(*maybeShared()); 1.443 + } 1.444 + 1.445 + bool getShared(ExclusiveContext *cx, RegExpGuard *g) { 1.446 + if (RegExpShared *shared = maybeShared()) { 1.447 + g->init(*shared); 1.448 + return true; 1.449 + } 1.450 + return createShared(cx, g); 1.451 + } 1.452 + 1.453 + void setShared(ExclusiveContext *cx, RegExpShared &shared) { 1.454 + shared.prepareForUse(cx); 1.455 + JSObject::setPrivate(&shared); 1.456 + } 1.457 + 1.458 + private: 1.459 + friend class RegExpObjectBuilder; 1.460 + 1.461 + /* For access to assignInitialShape. */ 1.462 + friend bool 1.463 + EmptyShape::ensureInitialCustomShape<RegExpObject>(ExclusiveContext *cx, 1.464 + Handle<RegExpObject*> obj); 1.465 + 1.466 + /* 1.467 + * Compute the initial shape to associate with fresh RegExp objects, 1.468 + * encoding their initial properties. Return the shape after 1.469 + * changing |obj|'s last property to it. 1.470 + */ 1.471 + static Shape * 1.472 + assignInitialShape(ExclusiveContext *cx, Handle<RegExpObject*> obj); 1.473 + 1.474 + bool init(ExclusiveContext *cx, HandleAtom source, RegExpFlag flags); 1.475 + 1.476 + /* 1.477 + * Precondition: the syntax for |source| has already been validated. 1.478 + * Side effect: sets the private field. 1.479 + */ 1.480 + bool createShared(ExclusiveContext *cx, RegExpGuard *g); 1.481 + RegExpShared *maybeShared() const { 1.482 + return static_cast<RegExpShared *>(JSObject::getPrivate()); 1.483 + } 1.484 + 1.485 + /* Call setShared in preference to setPrivate. */ 1.486 + void setPrivate(void *priv) MOZ_DELETE; 1.487 +}; 1.488 + 1.489 +/* 1.490 + * Parse regexp flags. Report an error and return false if an invalid 1.491 + * sequence of flags is encountered (repeat/invalid flag). 1.492 + * 1.493 + * N.B. flagStr must be rooted. 1.494 + */ 1.495 +bool 1.496 +ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut); 1.497 + 1.498 +/* 1.499 + * Assuming ObjectClassIs(obj, ESClass_RegExp), return obj's RegExpShared. 1.500 + * 1.501 + * Beware: this RegExpShared can be owned by a compartment other than 1.502 + * cx->compartment. Normal RegExpGuard (which is necessary anyways) 1.503 + * will protect the object but it is important not to assign the return value 1.504 + * to be the private of any RegExpObject. 1.505 + */ 1.506 +inline bool 1.507 +RegExpToShared(JSContext *cx, HandleObject obj, RegExpGuard *g) 1.508 +{ 1.509 + if (obj->is<RegExpObject>()) 1.510 + return obj->as<RegExpObject>().getShared(cx, g); 1.511 + return Proxy::regexp_toShared(cx, obj, g); 1.512 +} 1.513 + 1.514 +template<XDRMode mode> 1.515 +bool 1.516 +XDRScriptRegExpObject(XDRState<mode> *xdr, HeapPtrObject *objp); 1.517 + 1.518 +extern JSObject * 1.519 +CloneScriptRegExpObject(JSContext *cx, RegExpObject &re); 1.520 + 1.521 +} /* namespace js */ 1.522 + 1.523 +#endif /* vm_RegExpObject_h */