michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * vim: set ts=8 sts=4 et sw=4 tw=99: michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef vm_RegExpObject_h michael@0: #define vm_RegExpObject_h michael@0: michael@0: #include "mozilla/Attributes.h" michael@0: #include "mozilla/MemoryReporting.h" michael@0: michael@0: #include "jscntxt.h" michael@0: #include "jsproxy.h" michael@0: michael@0: #include "gc/Marking.h" michael@0: #include "gc/Zone.h" michael@0: #include "vm/Shape.h" michael@0: #if ENABLE_YARR_JIT michael@0: #include "yarr/YarrJIT.h" michael@0: #else michael@0: #include "yarr/YarrInterpreter.h" michael@0: #endif michael@0: michael@0: /* michael@0: * JavaScript Regular Expressions michael@0: * michael@0: * There are several engine concepts associated with a single logical regexp: michael@0: * michael@0: * RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp" michael@0: * michael@0: * RegExpShared - The compiled representation of the regexp. michael@0: * michael@0: * RegExpCompartment - Owns all RegExpShared instances in a compartment. michael@0: * michael@0: * To save memory, a RegExpShared is not created for a RegExpObject until it is michael@0: * needed for execution. When a RegExpShared needs to be created, it is looked michael@0: * up in a per-compartment table to allow reuse between objects. Lastly, on michael@0: * GC, every RegExpShared (that is not active on the callstack) is discarded. michael@0: * Because of the last point, any code using a RegExpShared (viz., by executing michael@0: * a regexp) must indicate the RegExpShared is active via RegExpGuard. michael@0: */ michael@0: namespace js { michael@0: michael@0: class MatchConduit; michael@0: class MatchPair; michael@0: class MatchPairs; michael@0: class RegExpShared; michael@0: michael@0: namespace frontend { class TokenStream; } michael@0: michael@0: enum RegExpFlag michael@0: { michael@0: IgnoreCaseFlag = 0x01, michael@0: GlobalFlag = 0x02, michael@0: MultilineFlag = 0x04, michael@0: StickyFlag = 0x08, michael@0: michael@0: NoFlags = 0x00, michael@0: AllFlags = 0x0f michael@0: }; michael@0: michael@0: enum RegExpRunStatus michael@0: { michael@0: RegExpRunStatus_Error, michael@0: RegExpRunStatus_Success, michael@0: RegExpRunStatus_Success_NotFound michael@0: }; michael@0: michael@0: class RegExpObjectBuilder michael@0: { michael@0: ExclusiveContext *cx; michael@0: Rooted reobj_; michael@0: michael@0: bool getOrCreate(); michael@0: bool getOrCreateClone(HandleTypeObject type); michael@0: michael@0: public: michael@0: RegExpObjectBuilder(ExclusiveContext *cx, RegExpObject *reobj = nullptr); michael@0: michael@0: RegExpObject *reobj() { return reobj_; } michael@0: michael@0: RegExpObject *build(HandleAtom source, RegExpFlag flags); michael@0: RegExpObject *build(HandleAtom source, RegExpShared &shared); michael@0: michael@0: /* Perform a VM-internal clone. */ michael@0: RegExpObject *clone(Handle other); michael@0: }; michael@0: michael@0: JSObject * michael@0: CloneRegExpObject(JSContext *cx, JSObject *obj); michael@0: michael@0: /* michael@0: * A RegExpShared is the compiled representation of a regexp. A RegExpShared is michael@0: * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may michael@0: * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a michael@0: * cache so that they can be reused when compiling the same regex string. michael@0: * michael@0: * During a GC, the trace hook for RegExpObject clears any pointers to michael@0: * RegExpShareds so that there will be no dangling pointers when they are michael@0: * deleted. However, some RegExpShareds are not deleted: michael@0: * michael@0: * 1. Any RegExpShared with pointers from the C++ stack is not deleted. michael@0: * 2. Any RegExpShared which has been embedded into jitcode is not deleted. michael@0: * This rarely comes into play, as jitcode is usually purged before the michael@0: * RegExpShared are sweeped. michael@0: * 3. Any RegExpShared that was installed in a RegExpObject during an michael@0: * incremental GC is not deleted. This is because the RegExpObject may have michael@0: * been traced through before the new RegExpShared was installed, in which michael@0: * case deleting the RegExpShared would turn the RegExpObject's reference michael@0: * into a dangling pointer michael@0: * michael@0: * The activeUseCount and gcNumberWhenUsed fields are used to track these michael@0: * conditions. michael@0: * michael@0: * There are two tables used to track RegExpShareds. map_ implements the cache michael@0: * and is cleared on every GC. inUse_ logically owns all RegExpShareds in the michael@0: * compartment and attempts to delete all RegExpShareds that aren't kept alive michael@0: * by the above conditions on every GC sweep phase. It is necessary to use two michael@0: * separate tables since map_ *must* be fully cleared on each GC since the Key michael@0: * points to a JSAtom that can become garbage. michael@0: */ michael@0: class RegExpShared michael@0: { michael@0: friend class RegExpCompartment; michael@0: friend class RegExpStatics; michael@0: friend class RegExpGuard; michael@0: michael@0: typedef frontend::TokenStream TokenStream; michael@0: typedef JSC::Yarr::BytecodePattern BytecodePattern; michael@0: typedef JSC::Yarr::ErrorCode ErrorCode; michael@0: typedef JSC::Yarr::YarrPattern YarrPattern; michael@0: #if ENABLE_YARR_JIT michael@0: typedef JSC::Yarr::JSGlobalData JSGlobalData; michael@0: typedef JSC::Yarr::YarrCodeBlock YarrCodeBlock; michael@0: typedef JSC::Yarr::YarrJITCompileMode YarrJITCompileMode; michael@0: #endif michael@0: michael@0: /* michael@0: * Source to the RegExp, for lazy compilation. michael@0: * The source must be rooted while activeUseCount is non-zero michael@0: * via RegExpGuard or explicit calls to trace(). michael@0: */ michael@0: JSAtom * source; michael@0: michael@0: RegExpFlag flags; michael@0: unsigned parenCount; michael@0: michael@0: #if ENABLE_YARR_JIT michael@0: /* Note: Native code is valid only if |codeBlock.isFallBack() == false|. */ michael@0: YarrCodeBlock codeBlock; michael@0: #endif michael@0: BytecodePattern *bytecode; michael@0: michael@0: /* Lifetime-preserving variables: see class-level comment above. */ michael@0: size_t activeUseCount; michael@0: uint64_t gcNumberWhenUsed; michael@0: michael@0: /* Internal functions. */ michael@0: bool compile(JSContext *cx, bool matchOnly); michael@0: bool compile(JSContext *cx, JSLinearString &pattern, bool matchOnly); michael@0: michael@0: bool compileIfNecessary(JSContext *cx); michael@0: bool compileMatchOnlyIfNecessary(JSContext *cx); michael@0: michael@0: public: michael@0: RegExpShared(JSAtom *source, RegExpFlag flags, uint64_t gcNumber); michael@0: ~RegExpShared(); michael@0: michael@0: /* Explicit trace function for use by the RegExpStatics and JITs. */ michael@0: void trace(JSTracer *trc) { michael@0: MarkStringUnbarriered(trc, &source, "regexpshared source"); michael@0: } michael@0: michael@0: /* Static functions to expose some Yarr logic. */ michael@0: michael@0: // This function should be deleted once bad Android platforms phase out. See bug 604774. michael@0: static bool isJITRuntimeEnabled(JSContext *cx) { michael@0: #if ENABLE_YARR_JIT michael@0: # if defined(ANDROID) michael@0: return !cx->jitIsBroken; michael@0: # else michael@0: return true; michael@0: # endif michael@0: #else michael@0: return false; michael@0: #endif michael@0: } michael@0: static void reportYarrError(ExclusiveContext *cx, TokenStream *ts, ErrorCode error); michael@0: static bool checkSyntax(ExclusiveContext *cx, TokenStream *tokenStream, JSLinearString *source); michael@0: michael@0: /* Called when a RegExpShared is installed into a RegExpObject. */ michael@0: void prepareForUse(ExclusiveContext *cx) { michael@0: gcNumberWhenUsed = cx->zone()->gcNumber(); michael@0: } michael@0: michael@0: /* Primary interface: run this regular expression on the given string. */ michael@0: RegExpRunStatus execute(JSContext *cx, const jschar *chars, size_t length, michael@0: size_t *lastIndex, MatchPairs &matches); michael@0: michael@0: /* Run the regular expression without collecting matches, for test(). */ michael@0: RegExpRunStatus executeMatchOnly(JSContext *cx, const jschar *chars, size_t length, michael@0: size_t *lastIndex, MatchPair &match); michael@0: michael@0: /* Accessors */ michael@0: michael@0: size_t getParenCount() const { JS_ASSERT(isCompiled()); return parenCount; } michael@0: void incRef() { activeUseCount++; } michael@0: void decRef() { JS_ASSERT(activeUseCount > 0); activeUseCount--; } michael@0: michael@0: /* Accounts for the "0" (whole match) pair. */ michael@0: size_t pairCount() const { return getParenCount() + 1; } michael@0: michael@0: RegExpFlag getFlags() const { return flags; } michael@0: bool ignoreCase() const { return flags & IgnoreCaseFlag; } michael@0: bool global() const { return flags & GlobalFlag; } michael@0: bool multiline() const { return flags & MultilineFlag; } michael@0: bool sticky() const { return flags & StickyFlag; } michael@0: michael@0: #ifdef ENABLE_YARR_JIT michael@0: bool hasCode() const { return codeBlock.has16BitCode(); } michael@0: bool hasMatchOnlyCode() const { return codeBlock.has16BitCodeMatchOnly(); } michael@0: #else michael@0: bool hasCode() const { return false; } michael@0: bool hasMatchOnlyCode() const { return false; } michael@0: #endif michael@0: bool hasBytecode() const { return bytecode != nullptr; } michael@0: bool isCompiled() const { return hasBytecode() || hasCode() || hasMatchOnlyCode(); } michael@0: }; michael@0: michael@0: /* michael@0: * Extend the lifetime of a given RegExpShared to at least the lifetime of michael@0: * the guard object. See Regular Expression comment at the top. michael@0: */ michael@0: class RegExpGuard michael@0: { michael@0: RegExpShared *re_; michael@0: michael@0: /* michael@0: * Prevent the RegExp source from being collected: michael@0: * because RegExpShared objects compile at execution time, the source michael@0: * must remain rooted for the active lifetime of the RegExpShared. michael@0: */ michael@0: RootedAtom source_; michael@0: michael@0: RegExpGuard(const RegExpGuard &) MOZ_DELETE; michael@0: void operator=(const RegExpGuard &) MOZ_DELETE; michael@0: michael@0: public: michael@0: RegExpGuard(ExclusiveContext *cx) michael@0: : re_(nullptr), source_(cx) michael@0: {} michael@0: michael@0: RegExpGuard(ExclusiveContext *cx, RegExpShared &re) michael@0: : re_(&re), source_(cx, re.source) michael@0: { michael@0: re_->incRef(); michael@0: } michael@0: michael@0: ~RegExpGuard() { michael@0: release(); michael@0: } michael@0: michael@0: public: michael@0: void init(RegExpShared &re) { michael@0: JS_ASSERT(!initialized()); michael@0: re_ = &re; michael@0: re_->incRef(); michael@0: source_ = re_->source; michael@0: } michael@0: michael@0: void release() { michael@0: if (re_) { michael@0: re_->decRef(); michael@0: re_ = nullptr; michael@0: source_ = nullptr; michael@0: } michael@0: } michael@0: michael@0: bool initialized() const { return !!re_; } michael@0: RegExpShared *re() const { JS_ASSERT(initialized()); return re_; } michael@0: RegExpShared *operator->() { return re(); } michael@0: RegExpShared &operator*() { return *re(); } michael@0: }; michael@0: michael@0: class RegExpCompartment michael@0: { michael@0: struct Key { michael@0: JSAtom *atom; michael@0: uint16_t flag; michael@0: michael@0: Key() {} michael@0: Key(JSAtom *atom, RegExpFlag flag) michael@0: : atom(atom), flag(flag) michael@0: { } michael@0: michael@0: typedef Key Lookup; michael@0: static HashNumber hash(const Lookup &l) { michael@0: return DefaultHasher::hash(l.atom) ^ (l.flag << 1); michael@0: } michael@0: static bool match(Key l, Key r) { michael@0: return l.atom == r.atom && l.flag == r.flag; michael@0: } michael@0: }; michael@0: michael@0: /* michael@0: * Cache to reuse RegExpShareds with the same source/flags/etc. The cache michael@0: * is entirely cleared on each GC. michael@0: */ michael@0: typedef HashMap Map; michael@0: Map map_; michael@0: michael@0: /* michael@0: * The set of all RegExpShareds in the compartment. On every GC, every michael@0: * RegExpShared that is not actively being used is deleted and removed from michael@0: * the set. michael@0: */ michael@0: typedef HashSet, RuntimeAllocPolicy> PendingSet; michael@0: PendingSet inUse_; michael@0: michael@0: /* michael@0: * This is the template object where the result of re.exec() is based on, michael@0: * if there is a result. This is used in CreateRegExpMatchResult to set michael@0: * the input/index properties faster. michael@0: */ michael@0: ReadBarriered matchResultTemplateObject_; michael@0: michael@0: JSObject *createMatchResultTemplateObject(JSContext *cx); michael@0: michael@0: public: michael@0: RegExpCompartment(JSRuntime *rt); michael@0: ~RegExpCompartment(); michael@0: michael@0: bool init(JSContext *cx); michael@0: void sweep(JSRuntime *rt); michael@0: void clearTables(); michael@0: michael@0: bool get(ExclusiveContext *cx, JSAtom *source, RegExpFlag flags, RegExpGuard *g); michael@0: michael@0: /* Like 'get', but compile 'maybeOpt' (if non-null). */ michael@0: bool get(JSContext *cx, HandleAtom source, JSString *maybeOpt, RegExpGuard *g); michael@0: michael@0: /* Get or create template object used to base the result of .exec() on. */ michael@0: JSObject *getOrCreateMatchResultTemplateObject(JSContext *cx) { michael@0: if (matchResultTemplateObject_) michael@0: return matchResultTemplateObject_; michael@0: return createMatchResultTemplateObject(cx); michael@0: } michael@0: michael@0: size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); michael@0: }; michael@0: michael@0: class RegExpObject : public JSObject michael@0: { michael@0: static const unsigned LAST_INDEX_SLOT = 0; michael@0: static const unsigned SOURCE_SLOT = 1; michael@0: static const unsigned GLOBAL_FLAG_SLOT = 2; michael@0: static const unsigned IGNORE_CASE_FLAG_SLOT = 3; michael@0: static const unsigned MULTILINE_FLAG_SLOT = 4; michael@0: static const unsigned STICKY_FLAG_SLOT = 5; michael@0: michael@0: public: michael@0: static const unsigned RESERVED_SLOTS = 6; michael@0: michael@0: static const Class class_; michael@0: michael@0: /* michael@0: * Note: The regexp statics flags are OR'd into the provided flags, michael@0: * so this function is really meant for object creation during code michael@0: * execution, as opposed to during something like XDR. michael@0: */ michael@0: static RegExpObject * michael@0: create(ExclusiveContext *cx, RegExpStatics *res, const jschar *chars, size_t length, michael@0: RegExpFlag flags, frontend::TokenStream *ts); michael@0: michael@0: static RegExpObject * michael@0: createNoStatics(ExclusiveContext *cx, const jschar *chars, size_t length, RegExpFlag flags, michael@0: frontend::TokenStream *ts); michael@0: michael@0: static RegExpObject * michael@0: createNoStatics(ExclusiveContext *cx, HandleAtom atom, RegExpFlag flags, frontend::TokenStream *ts); michael@0: michael@0: /* Accessors. */ michael@0: michael@0: static unsigned lastIndexSlot() { return LAST_INDEX_SLOT; } michael@0: michael@0: const Value &getLastIndex() const { return getSlot(LAST_INDEX_SLOT); } michael@0: michael@0: void setLastIndex(double d) { michael@0: setSlot(LAST_INDEX_SLOT, NumberValue(d)); michael@0: } michael@0: michael@0: void zeroLastIndex() { michael@0: setSlot(LAST_INDEX_SLOT, Int32Value(0)); michael@0: } michael@0: michael@0: JSFlatString *toString(JSContext *cx) const; michael@0: michael@0: JSAtom *getSource() const { return &getSlot(SOURCE_SLOT).toString()->asAtom(); } michael@0: michael@0: void setSource(JSAtom *source) { michael@0: setSlot(SOURCE_SLOT, StringValue(source)); michael@0: } michael@0: michael@0: RegExpFlag getFlags() const { michael@0: unsigned flags = 0; michael@0: flags |= global() ? GlobalFlag : 0; michael@0: flags |= ignoreCase() ? IgnoreCaseFlag : 0; michael@0: flags |= multiline() ? MultilineFlag : 0; michael@0: flags |= sticky() ? StickyFlag : 0; michael@0: return RegExpFlag(flags); michael@0: } michael@0: michael@0: /* Flags. */ michael@0: michael@0: void setIgnoreCase(bool enabled) { michael@0: setSlot(IGNORE_CASE_FLAG_SLOT, BooleanValue(enabled)); michael@0: } michael@0: michael@0: void setGlobal(bool enabled) { michael@0: setSlot(GLOBAL_FLAG_SLOT, BooleanValue(enabled)); michael@0: } michael@0: michael@0: void setMultiline(bool enabled) { michael@0: setSlot(MULTILINE_FLAG_SLOT, BooleanValue(enabled)); michael@0: } michael@0: michael@0: void setSticky(bool enabled) { michael@0: setSlot(STICKY_FLAG_SLOT, BooleanValue(enabled)); michael@0: } michael@0: michael@0: bool ignoreCase() const { return getFixedSlot(IGNORE_CASE_FLAG_SLOT).toBoolean(); } michael@0: bool global() const { return getFixedSlot(GLOBAL_FLAG_SLOT).toBoolean(); } michael@0: bool multiline() const { return getFixedSlot(MULTILINE_FLAG_SLOT).toBoolean(); } michael@0: bool sticky() const { return getFixedSlot(STICKY_FLAG_SLOT).toBoolean(); } michael@0: michael@0: void shared(RegExpGuard *g) const { michael@0: JS_ASSERT(maybeShared() != nullptr); michael@0: g->init(*maybeShared()); michael@0: } michael@0: michael@0: bool getShared(ExclusiveContext *cx, RegExpGuard *g) { michael@0: if (RegExpShared *shared = maybeShared()) { michael@0: g->init(*shared); michael@0: return true; michael@0: } michael@0: return createShared(cx, g); michael@0: } michael@0: michael@0: void setShared(ExclusiveContext *cx, RegExpShared &shared) { michael@0: shared.prepareForUse(cx); michael@0: JSObject::setPrivate(&shared); michael@0: } michael@0: michael@0: private: michael@0: friend class RegExpObjectBuilder; michael@0: michael@0: /* For access to assignInitialShape. */ michael@0: friend bool michael@0: EmptyShape::ensureInitialCustomShape(ExclusiveContext *cx, michael@0: Handle obj); michael@0: michael@0: /* michael@0: * Compute the initial shape to associate with fresh RegExp objects, michael@0: * encoding their initial properties. Return the shape after michael@0: * changing |obj|'s last property to it. michael@0: */ michael@0: static Shape * michael@0: assignInitialShape(ExclusiveContext *cx, Handle obj); michael@0: michael@0: bool init(ExclusiveContext *cx, HandleAtom source, RegExpFlag flags); michael@0: michael@0: /* michael@0: * Precondition: the syntax for |source| has already been validated. michael@0: * Side effect: sets the private field. michael@0: */ michael@0: bool createShared(ExclusiveContext *cx, RegExpGuard *g); michael@0: RegExpShared *maybeShared() const { michael@0: return static_cast(JSObject::getPrivate()); michael@0: } michael@0: michael@0: /* Call setShared in preference to setPrivate. */ michael@0: void setPrivate(void *priv) MOZ_DELETE; michael@0: }; michael@0: michael@0: /* michael@0: * Parse regexp flags. Report an error and return false if an invalid michael@0: * sequence of flags is encountered (repeat/invalid flag). michael@0: * michael@0: * N.B. flagStr must be rooted. michael@0: */ michael@0: bool michael@0: ParseRegExpFlags(JSContext *cx, JSString *flagStr, RegExpFlag *flagsOut); michael@0: michael@0: /* michael@0: * Assuming ObjectClassIs(obj, ESClass_RegExp), return obj's RegExpShared. michael@0: * michael@0: * Beware: this RegExpShared can be owned by a compartment other than michael@0: * cx->compartment. Normal RegExpGuard (which is necessary anyways) michael@0: * will protect the object but it is important not to assign the return value michael@0: * to be the private of any RegExpObject. michael@0: */ michael@0: inline bool michael@0: RegExpToShared(JSContext *cx, HandleObject obj, RegExpGuard *g) michael@0: { michael@0: if (obj->is()) michael@0: return obj->as().getShared(cx, g); michael@0: return Proxy::regexp_toShared(cx, obj, g); michael@0: } michael@0: michael@0: template michael@0: bool michael@0: XDRScriptRegExpObject(XDRState *xdr, HeapPtrObject *objp); michael@0: michael@0: extern JSObject * michael@0: CloneScriptRegExpObject(JSContext *cx, RegExpObject &re); michael@0: michael@0: } /* namespace js */ michael@0: michael@0: #endif /* vm_RegExpObject_h */