michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * vim: set ts=8 sts=4 et sw=4 tw=99: michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef vm_String_h michael@0: #define vm_String_h michael@0: michael@0: #include "mozilla/MemoryReporting.h" michael@0: #include "mozilla/PodOperations.h" michael@0: michael@0: #include "jsapi.h" michael@0: #include "jsfriendapi.h" michael@0: #include "jsstr.h" michael@0: michael@0: #include "gc/Barrier.h" michael@0: #include "gc/Heap.h" michael@0: #include "gc/Marking.h" michael@0: #include "gc/Rooting.h" michael@0: #include "js/CharacterEncoding.h" michael@0: #include "js/RootingAPI.h" michael@0: michael@0: class JSDependentString; michael@0: class JSExtensibleString; michael@0: class JSExternalString; michael@0: class JSInlineString; michael@0: class JSRope; michael@0: michael@0: namespace js { michael@0: michael@0: class StaticStrings; michael@0: class PropertyName; michael@0: michael@0: /* The buffer length required to contain any unsigned 32-bit integer. */ michael@0: static const size_t UINT32_CHAR_BUFFER_LENGTH = sizeof("4294967295") - 1; michael@0: michael@0: } /* namespace js */ michael@0: michael@0: /* michael@0: * JavaScript strings michael@0: * michael@0: * Conceptually, a JS string is just an array of chars and a length. This array michael@0: * of chars may or may not be null-terminated and, if it is, the null character michael@0: * is not included in the length. michael@0: * michael@0: * To improve performance of common operations, the following optimizations are michael@0: * made which affect the engine's representation of strings: michael@0: * michael@0: * - The plain vanilla representation is a "flat" string which consists of a michael@0: * string header in the GC heap and a malloc'd null terminated char array. michael@0: * michael@0: * - To avoid copying a substring of an existing "base" string , a "dependent" michael@0: * string (JSDependentString) can be created which points into the base michael@0: * string's char array. michael@0: * michael@0: * - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created michael@0: * to represent a delayed string concatenation. Concatenation (called michael@0: * flattening) is performed if and when a linear char array is requested. In michael@0: * general, ropes form a binary dag whose internal nodes are JSRope string michael@0: * headers with no associated char array and whose leaf nodes are either flat michael@0: * or dependent strings. michael@0: * michael@0: * - To avoid copying the left-hand side when flattening, the left-hand side's michael@0: * buffer may be grown to make space for a copy of the right-hand side (see michael@0: * comment in JSString::flatten). This optimization requires that there are michael@0: * no external pointers into the char array. We conservatively maintain this michael@0: * property via a flat string's "extensible" property. michael@0: * michael@0: * - To avoid allocating small char arrays, short strings can be stored inline michael@0: * in the string header (JSInlineString). To increase the max size of such michael@0: * inline strings, larger string headers can be used (JSFatInlineString). michael@0: * michael@0: * - To avoid comparing O(n) string equality comparison, strings can be michael@0: * canonicalized to "atoms" (JSAtom) such that there is a single atom with a michael@0: * given (length,chars). michael@0: * michael@0: * - To avoid copying all strings created through the JSAPI, an "external" michael@0: * string (JSExternalString) can be created whose chars are managed by the michael@0: * JSAPI client. michael@0: * michael@0: * Although all strings share the same basic memory layout, we can conceptually michael@0: * arrange them into a hierarchy of operations/invariants and represent this michael@0: * hierarchy in C++ with classes: michael@0: * michael@0: * C++ type operations+fields / invariants+properties michael@0: * ========================== ========================================= michael@0: * JSString (abstract) getCharsZ, getChars, length / - michael@0: * | \ michael@0: * | JSRope leftChild, rightChild / - michael@0: * | michael@0: * JSLinearString (abstract) chars / might be null-terminated michael@0: * | \ michael@0: * | JSDependentString base / - michael@0: * | michael@0: * JSFlatString - / null terminated michael@0: * | | michael@0: * | +-- JSExternalString - / char array memory managed by embedding michael@0: * | | michael@0: * | +-- JSExtensibleString capacity / no external pointers into char array michael@0: * | | michael@0: * | +-- JSUndependedString original dependent base / - michael@0: * | | michael@0: * | +-- JSInlineString - / chars stored in header michael@0: * | \ michael@0: * | JSFatInlineString - / header is fat michael@0: * | michael@0: * JSAtom - / string equality === pointer equality michael@0: * | michael@0: * js::PropertyName - / chars don't contain an index (uint32_t) michael@0: * michael@0: * Classes marked with (abstract) above are not literally C++ Abstract Base michael@0: * Classes (since there are no virtual functions, pure or not, in this michael@0: * hierarchy), but have the same meaning: there are no strings with this type as michael@0: * its most-derived type. michael@0: * michael@0: * Atoms can additionally be permanent, i.e. unable to be collected, and can michael@0: * be combined with other string types to create additional most-derived types michael@0: * that satisfy the invariants of more than one of the abovementioned michael@0: * most-derived types: michael@0: * - InlineAtom = JSInlineString + JSAtom (atom with inline chars) michael@0: * - FatInlineAtom = JSFatInlineString + JSAtom (atom with (more) inline chars) michael@0: * michael@0: * Derived string types can be queried from ancestor types via isX() and michael@0: * retrieved with asX() debug-only-checked casts. michael@0: * michael@0: * The ensureX() operations mutate 'this' in place to effectively the type to be michael@0: * at least X (e.g., ensureLinear will change a JSRope to be a JSFlatString). michael@0: */ michael@0: michael@0: class JSString : public js::gc::BarrieredCell michael@0: { michael@0: protected: michael@0: static const size_t NUM_INLINE_CHARS = 2 * sizeof(void *) / sizeof(jschar); michael@0: michael@0: /* Fields only apply to string types commented on the right. */ michael@0: struct Data michael@0: { michael@0: size_t lengthAndFlags; /* JSString */ michael@0: union { michael@0: const jschar *chars; /* JSLinearString */ michael@0: JSString *left; /* JSRope */ michael@0: } u1; michael@0: union { michael@0: jschar inlineStorage[NUM_INLINE_CHARS]; /* JS(Inline|FatInline)String */ michael@0: struct { michael@0: union { michael@0: JSLinearString *base; /* JS(Dependent|Undepended)String */ michael@0: JSString *right; /* JSRope */ michael@0: size_t capacity; /* JSFlatString (extensible) */ michael@0: const JSStringFinalizer *externalFinalizer;/* JSExternalString */ michael@0: } u2; michael@0: union { michael@0: JSString *parent; /* JSRope (temporary) */ michael@0: size_t reserved; /* may use for bug 615290 */ michael@0: } u3; michael@0: } s; michael@0: }; michael@0: } d; michael@0: michael@0: public: michael@0: /* Flags exposed only for jits */ michael@0: michael@0: /* michael@0: * The low LENGTH_SHIFT bits of lengthAndFlags are used to encode the type michael@0: * of the string. The remaining bits store the string length (which must be michael@0: * less or equal than MAX_LENGTH). michael@0: * michael@0: * Instead of using a dense index to represent the most-derived type, string michael@0: * types are encoded to allow single-op tests for hot queries (isRope, michael@0: * isDependent, isFlat, isAtom) which, in view of subtyping, would require michael@0: * slower (isX() || isY() || isZ()). michael@0: * michael@0: * The string type encoding can be summarized as follows. The "instance michael@0: * encoding" entry for a type specifies the flag bits used to create a michael@0: * string instance of that type. Abstract types have no instances and thus michael@0: * have no such entry. The "subtype predicate" entry for a type specifies michael@0: * the predicate used to query whether a JSString instance is subtype michael@0: * (reflexively) of that type. michael@0: * michael@0: * Rope 0000 0000 michael@0: * Linear - !0000 michael@0: * HasBase - xxx1 michael@0: * Dependent 0001 0001 michael@0: * Flat - isLinear && !isDependent michael@0: * Undepended 0011 0011 michael@0: * Extensible 0010 0010 michael@0: * Inline 0100 isFlat && !isExtensible && (u1.chars == inlineStorage) michael@0: * FatInline 0100 isInline && header in FINALIZE_FAT_INLINE_STRING arena michael@0: * External 0100 header in FINALIZE_EXTERNAL_STRING arena michael@0: * Atom - 1xxx michael@0: * PermanentAtom 1100 1100 michael@0: * InlineAtom - isAtom && isInline michael@0: * FatInlineAtom - isAtom && isFatInline michael@0: * michael@0: * "HasBase" here refers to the two string types that have a 'base' field: michael@0: * JSDependentString and JSUndependedString. michael@0: * A JSUndependedString is a JSDependentString which has been 'fixed' (by ensureFixed) michael@0: * to be null-terminated. In such cases, the string must keep marking its base since michael@0: * there may be any number of *other* JSDependentStrings transitively depending on it. michael@0: * michael@0: */ michael@0: michael@0: static const size_t LENGTH_SHIFT = 4; michael@0: static const size_t FLAGS_MASK = JS_BITMASK(LENGTH_SHIFT); michael@0: michael@0: static const size_t ROPE_FLAGS = 0; michael@0: static const size_t DEPENDENT_FLAGS = JS_BIT(0); michael@0: static const size_t UNDEPENDED_FLAGS = JS_BIT(0) | JS_BIT(1); michael@0: static const size_t EXTENSIBLE_FLAGS = JS_BIT(1); michael@0: static const size_t FIXED_FLAGS = JS_BIT(2); michael@0: michael@0: static const size_t INT32_MASK = JS_BITMASK(3); michael@0: static const size_t INT32_FLAGS = JS_BIT(1) | JS_BIT(2); michael@0: michael@0: static const size_t HAS_BASE_BIT = JS_BIT(0); michael@0: static const size_t PERMANENT_BIT = JS_BIT(2); michael@0: static const size_t ATOM_BIT = JS_BIT(3); michael@0: michael@0: static const size_t PERMANENT_ATOM_FLAGS = JS_BIT(2) | JS_BIT(3); michael@0: michael@0: static const size_t MAX_LENGTH = JS_BIT(32 - LENGTH_SHIFT) - 1; michael@0: michael@0: size_t buildLengthAndFlags(size_t length, size_t flags) { michael@0: JS_ASSERT(length <= MAX_LENGTH); michael@0: JS_ASSERT(flags <= FLAGS_MASK); michael@0: return (length << LENGTH_SHIFT) | flags; michael@0: } michael@0: michael@0: /* michael@0: * Helper function to validate that a string of a given length is michael@0: * representable by a JSString. An allocation overflow is reported if false michael@0: * is returned. michael@0: */ michael@0: static inline bool validateLength(js::ThreadSafeContext *maybecx, size_t length); michael@0: michael@0: static void staticAsserts() { michael@0: JS_STATIC_ASSERT(JS_BITS_PER_WORD >= 32); michael@0: JS_STATIC_ASSERT(((JSString::MAX_LENGTH << JSString::LENGTH_SHIFT) >> michael@0: JSString::LENGTH_SHIFT) == JSString::MAX_LENGTH); michael@0: JS_STATIC_ASSERT(sizeof(JSString) == michael@0: offsetof(JSString, d.inlineStorage) + NUM_INLINE_CHARS * sizeof(jschar)); michael@0: JS_STATIC_ASSERT(offsetof(JSString, d.u1.chars) == michael@0: offsetof(js::shadow::Atom, chars)); michael@0: } michael@0: michael@0: /* Avoid lame compile errors in JSRope::flatten */ michael@0: friend class JSRope; michael@0: michael@0: public: michael@0: /* All strings have length. */ michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: size_t length() const { michael@0: return d.lengthAndFlags >> LENGTH_SHIFT; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool empty() const { michael@0: return d.lengthAndFlags <= FLAGS_MASK; michael@0: } michael@0: michael@0: /* michael@0: * All strings have a fallible operation to get an array of chars. michael@0: * getCharsZ additionally ensures the array is null terminated. michael@0: */ michael@0: michael@0: inline const jschar *getChars(js::ExclusiveContext *cx); michael@0: inline const jschar *getCharsZ(js::ExclusiveContext *cx); michael@0: inline bool getChar(js::ExclusiveContext *cx, size_t index, jschar *code); michael@0: michael@0: /* michael@0: * A string has "pure" chars if it can return a pointer to its chars michael@0: * infallibly without mutating anything so they are safe to be from off the michael@0: * main thread. If a string does not have pure chars, the caller can call michael@0: * copyNonPureChars to allocate a copy of the chars which is also a michael@0: * non-mutating threadsafe operation. Beware, this is an O(n) operation michael@0: * (involving a DAG traversal for ropes). michael@0: */ michael@0: bool hasPureChars() const { return isLinear(); } michael@0: bool hasPureCharsZ() const { return isFlat(); } michael@0: inline const jschar *pureChars() const; michael@0: inline const jschar *pureCharsZ() const; michael@0: inline bool copyNonPureChars(js::ThreadSafeContext *cx, michael@0: js::ScopedJSFreePtr &out) const; michael@0: inline bool copyNonPureCharsZ(js::ThreadSafeContext *cx, michael@0: js::ScopedJSFreePtr &out) const; michael@0: michael@0: /* Fallible conversions to more-derived string types. */ michael@0: michael@0: inline JSLinearString *ensureLinear(js::ExclusiveContext *cx); michael@0: inline JSFlatString *ensureFlat(js::ExclusiveContext *cx); michael@0: michael@0: static bool ensureLinear(js::ExclusiveContext *cx, JSString *str) { michael@0: return str->ensureLinear(cx) != nullptr; michael@0: } michael@0: michael@0: /* Type query and debug-checked casts */ michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isRope() const { michael@0: return (d.lengthAndFlags & FLAGS_MASK) == ROPE_FLAGS; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: JSRope &asRope() const { michael@0: JS_ASSERT(isRope()); michael@0: return *(JSRope *)this; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isLinear() const { michael@0: return !isRope(); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: JSLinearString &asLinear() const { michael@0: JS_ASSERT(JSString::isLinear()); michael@0: return *(JSLinearString *)this; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isDependent() const { michael@0: return (d.lengthAndFlags & FLAGS_MASK) == DEPENDENT_FLAGS; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: JSDependentString &asDependent() const { michael@0: JS_ASSERT(isDependent()); michael@0: return *(JSDependentString *)this; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isFlat() const { michael@0: return isLinear() && !isDependent(); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: JSFlatString &asFlat() const { michael@0: JS_ASSERT(isFlat()); michael@0: return *(JSFlatString *)this; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isExtensible() const { michael@0: return (d.lengthAndFlags & FLAGS_MASK) == EXTENSIBLE_FLAGS; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: JSExtensibleString &asExtensible() const { michael@0: JS_ASSERT(isExtensible()); michael@0: return *(JSExtensibleString *)this; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isInline() const { michael@0: return isFlat() && !isExtensible() && (d.u1.chars == d.inlineStorage); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: JSInlineString &asInline() const { michael@0: JS_ASSERT(isInline()); michael@0: return *(JSInlineString *)this; michael@0: } michael@0: michael@0: bool isFatInline() const; michael@0: michael@0: /* For hot code, prefer other type queries. */ michael@0: bool isExternal() const; michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: JSExternalString &asExternal() const { michael@0: JS_ASSERT(isExternal()); michael@0: return *(JSExternalString *)this; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isUndepended() const { michael@0: return (d.lengthAndFlags & FLAGS_MASK) == UNDEPENDED_FLAGS; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isAtom() const { michael@0: return d.lengthAndFlags & ATOM_BIT; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isPermanentAtom() const { michael@0: return (d.lengthAndFlags & FLAGS_MASK) == PERMANENT_ATOM_FLAGS; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: JSAtom &asAtom() const { michael@0: JS_ASSERT(isAtom()); michael@0: return *(JSAtom *)this; michael@0: } michael@0: michael@0: /* Only called by the GC for dependent or undepended strings. */ michael@0: michael@0: inline bool hasBase() const { michael@0: JS_STATIC_ASSERT((DEPENDENT_FLAGS | JS_BIT(1)) == UNDEPENDED_FLAGS); michael@0: return d.lengthAndFlags & HAS_BASE_BIT; michael@0: } michael@0: michael@0: inline JSLinearString *base() const; michael@0: michael@0: inline void markBase(JSTracer *trc); michael@0: michael@0: /* Only called by the GC for strings with the FINALIZE_STRING kind. */ michael@0: michael@0: inline void finalize(js::FreeOp *fop); michael@0: michael@0: /* Gets the number of bytes that the chars take on the heap. */ michael@0: michael@0: size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); michael@0: michael@0: /* Offsets for direct field from jit code. */ michael@0: michael@0: static size_t offsetOfLengthAndFlags() { michael@0: return offsetof(JSString, d.lengthAndFlags); michael@0: } michael@0: michael@0: static size_t offsetOfChars() { michael@0: return offsetof(JSString, d.u1.chars); michael@0: } michael@0: michael@0: js::gc::AllocKind getAllocKind() const { return tenuredGetAllocKind(); } michael@0: michael@0: static inline js::ThingRootKind rootKind() { return js::THING_ROOT_STRING; } michael@0: michael@0: #ifdef DEBUG michael@0: void dump(); michael@0: static void dumpChars(const jschar *s, size_t len); michael@0: bool equals(const char *s); michael@0: #endif michael@0: michael@0: static MOZ_ALWAYS_INLINE void readBarrier(JSString *thing) { michael@0: #ifdef JSGC_INCREMENTAL michael@0: if (thing->isPermanentAtom()) michael@0: return; michael@0: michael@0: js::gc::BarrieredCell::readBarrier(thing); michael@0: #endif michael@0: } michael@0: michael@0: static MOZ_ALWAYS_INLINE void writeBarrierPre(JSString *thing) { michael@0: #ifdef JSGC_INCREMENTAL michael@0: if (isNullLike(thing) || thing->isPermanentAtom()) michael@0: return; michael@0: michael@0: js::gc::BarrieredCell::writeBarrierPre(thing); michael@0: #endif michael@0: } michael@0: michael@0: private: michael@0: JSString() MOZ_DELETE; michael@0: JSString(const JSString &other) MOZ_DELETE; michael@0: void operator=(const JSString &other) MOZ_DELETE; michael@0: }; michael@0: michael@0: class JSRope : public JSString michael@0: { michael@0: bool copyNonPureCharsInternal(js::ThreadSafeContext *cx, michael@0: js::ScopedJSFreePtr &out, michael@0: bool nullTerminate) const; michael@0: bool copyNonPureChars(js::ThreadSafeContext *cx, js::ScopedJSFreePtr &out) const; michael@0: bool copyNonPureCharsZ(js::ThreadSafeContext *cx, js::ScopedJSFreePtr &out) const; michael@0: michael@0: enum UsingBarrier { WithIncrementalBarrier, NoBarrier }; michael@0: template michael@0: JSFlatString *flattenInternal(js::ExclusiveContext *cx); michael@0: michael@0: friend class JSString; michael@0: JSFlatString *flatten(js::ExclusiveContext *cx); michael@0: michael@0: void init(js::ThreadSafeContext *cx, JSString *left, JSString *right, size_t length); michael@0: michael@0: public: michael@0: template michael@0: static inline JSRope *new_(js::ThreadSafeContext *cx, michael@0: typename js::MaybeRooted::HandleType left, michael@0: typename js::MaybeRooted::HandleType right, michael@0: size_t length); michael@0: michael@0: inline JSString *leftChild() const { michael@0: JS_ASSERT(isRope()); michael@0: return d.u1.left; michael@0: } michael@0: michael@0: inline JSString *rightChild() const { michael@0: JS_ASSERT(isRope()); michael@0: return d.s.u2.right; michael@0: } michael@0: michael@0: inline void markChildren(JSTracer *trc); michael@0: michael@0: inline static size_t offsetOfLeft() { michael@0: return offsetof(JSRope, d.u1.left); michael@0: } michael@0: inline static size_t offsetOfRight() { michael@0: return offsetof(JSRope, d.s.u2.right); michael@0: } michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSRope) == sizeof(JSString)); michael@0: michael@0: class JSLinearString : public JSString michael@0: { michael@0: friend class JSString; michael@0: michael@0: /* Vacuous and therefore unimplemented. */ michael@0: JSLinearString *ensureLinear(JSContext *cx) MOZ_DELETE; michael@0: bool isLinear() const MOZ_DELETE; michael@0: JSLinearString &asLinear() const MOZ_DELETE; michael@0: michael@0: public: michael@0: MOZ_ALWAYS_INLINE michael@0: const jschar *chars() const { michael@0: JS_ASSERT(JSString::isLinear()); michael@0: return d.u1.chars; michael@0: } michael@0: michael@0: JS::TwoByteChars range() const { michael@0: JS_ASSERT(JSString::isLinear()); michael@0: return JS::TwoByteChars(d.u1.chars, length()); michael@0: } michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSLinearString) == sizeof(JSString)); michael@0: michael@0: class JSDependentString : public JSLinearString michael@0: { michael@0: bool copyNonPureCharsZ(js::ThreadSafeContext *cx, js::ScopedJSFreePtr &out) const; michael@0: michael@0: friend class JSString; michael@0: JSFlatString *undepend(js::ExclusiveContext *cx); michael@0: michael@0: void init(js::ThreadSafeContext *cx, JSLinearString *base, const jschar *chars, michael@0: size_t length); michael@0: michael@0: /* Vacuous and therefore unimplemented. */ michael@0: bool isDependent() const MOZ_DELETE; michael@0: JSDependentString &asDependent() const MOZ_DELETE; michael@0: michael@0: public: michael@0: static inline JSLinearString *new_(js::ExclusiveContext *cx, JSLinearString *base, michael@0: const jschar *chars, size_t length); michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSDependentString) == sizeof(JSString)); michael@0: michael@0: class JSFlatString : public JSLinearString michael@0: { michael@0: /* Vacuous and therefore unimplemented. */ michael@0: JSFlatString *ensureFlat(JSContext *cx) MOZ_DELETE; michael@0: bool isFlat() const MOZ_DELETE; michael@0: JSFlatString &asFlat() const MOZ_DELETE; michael@0: michael@0: bool isIndexSlow(uint32_t *indexp) const; michael@0: michael@0: void init(const jschar *chars, size_t length); michael@0: michael@0: public: michael@0: template michael@0: static inline JSFlatString *new_(js::ThreadSafeContext *cx, michael@0: const jschar *chars, size_t length); michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: const jschar *charsZ() const { michael@0: JS_ASSERT(JSString::isFlat()); michael@0: return chars(); michael@0: } michael@0: michael@0: /* michael@0: * Returns true if this string's characters store an unsigned 32-bit michael@0: * integer value, initializing *indexp to that value if so. (Thus if michael@0: * calling isIndex returns true, js::IndexToString(cx, *indexp) will be a michael@0: * string equal to this string.) michael@0: */ michael@0: inline bool isIndex(uint32_t *indexp) const { michael@0: const jschar *s = chars(); michael@0: return JS7_ISDEC(*s) && isIndexSlow(indexp); michael@0: } michael@0: michael@0: /* michael@0: * Returns a property name represented by this string, or null on failure. michael@0: * You must verify that this is not an index per isIndex before calling michael@0: * this method. michael@0: */ michael@0: inline js::PropertyName *toPropertyName(JSContext *cx); michael@0: michael@0: /* michael@0: * Once a JSFlatString sub-class has been added to the atom state, this michael@0: * operation changes the string to the JSAtom type, in place. michael@0: */ michael@0: MOZ_ALWAYS_INLINE JSAtom *morphAtomizedStringIntoAtom() { michael@0: d.lengthAndFlags = buildLengthAndFlags(length(), ATOM_BIT); michael@0: return &asAtom(); michael@0: } michael@0: MOZ_ALWAYS_INLINE JSAtom *morphAtomizedStringIntoPermanentAtom() { michael@0: d.lengthAndFlags = buildLengthAndFlags(length(), PERMANENT_ATOM_FLAGS); michael@0: return &asAtom(); michael@0: } michael@0: michael@0: inline void finalize(js::FreeOp *fop); michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSFlatString) == sizeof(JSString)); michael@0: michael@0: class JSExtensibleString : public JSFlatString michael@0: { michael@0: /* Vacuous and therefore unimplemented. */ michael@0: bool isExtensible() const MOZ_DELETE; michael@0: JSExtensibleString &asExtensible() const MOZ_DELETE; michael@0: michael@0: public: michael@0: MOZ_ALWAYS_INLINE michael@0: size_t capacity() const { michael@0: JS_ASSERT(JSString::isExtensible()); michael@0: return d.s.u2.capacity; michael@0: } michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSExtensibleString) == sizeof(JSString)); michael@0: michael@0: /* On 32-bit platforms, MAX_INLINE_LENGTH is 4. On 64-bit platforms it is 8. */ michael@0: class JSInlineString : public JSFlatString michael@0: { michael@0: static const size_t MAX_INLINE_LENGTH = NUM_INLINE_CHARS - 1; michael@0: michael@0: public: michael@0: template michael@0: static inline JSInlineString *new_(js::ThreadSafeContext *cx); michael@0: michael@0: inline jschar *init(size_t length); michael@0: michael@0: inline void resetLength(size_t length); michael@0: michael@0: static bool lengthFits(size_t length) { michael@0: return length <= MAX_INLINE_LENGTH; michael@0: } michael@0: michael@0: static size_t offsetOfInlineStorage() { michael@0: return offsetof(JSInlineString, d.inlineStorage); michael@0: } michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSInlineString) == sizeof(JSString)); michael@0: michael@0: /* michael@0: * On both 32-bit and 64-bit platforms, INLINE_EXTENSION_CHARS is 12. This is michael@0: * deliberate, in order to minimize potential performance differences between michael@0: * 32-bit and 64-bit platforms. michael@0: * michael@0: * There are still some differences due to NUM_INLINE_CHARS being different. michael@0: * E.g. strings of length 4--7 will be JSFatInlineStrings on 32-bit platforms michael@0: * and JSInlineStrings on 64-bit platforms. But the more significant transition michael@0: * from inline strings to non-inline strings occurs at length 12 on both 32-bit michael@0: * and 64-bit platforms. michael@0: */ michael@0: class JSFatInlineString : public JSInlineString michael@0: { michael@0: static const size_t INLINE_EXTENSION_CHARS = 12 - NUM_INLINE_CHARS; michael@0: michael@0: static void staticAsserts() { michael@0: JS_STATIC_ASSERT((INLINE_EXTENSION_CHARS * sizeof(jschar)) % js::gc::CellSize == 0); michael@0: JS_STATIC_ASSERT(MAX_FAT_INLINE_LENGTH + 1 == michael@0: (sizeof(JSFatInlineString) - michael@0: offsetof(JSFatInlineString, d.inlineStorage)) / sizeof(jschar)); michael@0: } michael@0: michael@0: protected: /* to fool clang into not warning this is unused */ michael@0: jschar inlineStorageExtension[INLINE_EXTENSION_CHARS]; michael@0: michael@0: public: michael@0: template michael@0: static inline JSFatInlineString *new_(js::ThreadSafeContext *cx); michael@0: michael@0: static const size_t MAX_FAT_INLINE_LENGTH = JSString::NUM_INLINE_CHARS + michael@0: INLINE_EXTENSION_CHARS michael@0: -1 /* null terminator */; michael@0: michael@0: static bool lengthFits(size_t length) { michael@0: return length <= MAX_FAT_INLINE_LENGTH; michael@0: } michael@0: michael@0: /* Only called by the GC for strings with the FINALIZE_FAT_INLINE_STRING kind. */ michael@0: michael@0: MOZ_ALWAYS_INLINE void finalize(js::FreeOp *fop); michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSFatInlineString) % js::gc::CellSize == 0); michael@0: michael@0: class JSExternalString : public JSFlatString michael@0: { michael@0: void init(const jschar *chars, size_t length, const JSStringFinalizer *fin); michael@0: michael@0: /* Vacuous and therefore unimplemented. */ michael@0: bool isExternal() const MOZ_DELETE; michael@0: JSExternalString &asExternal() const MOZ_DELETE; michael@0: michael@0: public: michael@0: static inline JSExternalString *new_(JSContext *cx, const jschar *chars, size_t length, michael@0: const JSStringFinalizer *fin); michael@0: michael@0: const JSStringFinalizer *externalFinalizer() const { michael@0: JS_ASSERT(JSString::isExternal()); michael@0: return d.s.u2.externalFinalizer; michael@0: } michael@0: michael@0: /* Only called by the GC for strings with the FINALIZE_EXTERNAL_STRING kind. */ michael@0: michael@0: inline void finalize(js::FreeOp *fop); michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSExternalString) == sizeof(JSString)); michael@0: michael@0: class JSUndependedString : public JSFlatString michael@0: { michael@0: /* michael@0: * JSUndependedString is not explicitly used and is only present for michael@0: * consistency. See JSDependentString::undepend for how a JSDependentString michael@0: * gets morphed into a JSUndependedString. michael@0: */ michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSUndependedString) == sizeof(JSString)); michael@0: michael@0: class JSAtom : public JSFlatString michael@0: { michael@0: /* Vacuous and therefore unimplemented. */ michael@0: bool isAtom() const MOZ_DELETE; michael@0: JSAtom &asAtom() const MOZ_DELETE; michael@0: michael@0: public: michael@0: /* Returns the PropertyName for this. isIndex() must be false. */ michael@0: inline js::PropertyName *asPropertyName(); michael@0: michael@0: inline void finalize(js::FreeOp *fop); michael@0: michael@0: MOZ_ALWAYS_INLINE michael@0: bool isPermanent() const { michael@0: return d.lengthAndFlags & PERMANENT_BIT; michael@0: } michael@0: michael@0: // Transform this atom into a permanent atom. This is only done during michael@0: // initialization of the runtime. michael@0: MOZ_ALWAYS_INLINE void morphIntoPermanentAtom() { michael@0: d.lengthAndFlags = buildLengthAndFlags(length(), PERMANENT_ATOM_FLAGS); michael@0: } michael@0: michael@0: #ifdef DEBUG michael@0: void dump(); michael@0: #endif michael@0: }; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(JSAtom) == sizeof(JSString)); michael@0: michael@0: namespace js { michael@0: michael@0: /* michael@0: * Thread safe RAII wrapper for inspecting the contents of JSStrings. The michael@0: * thread safe operations such as |getCharsNonDestructive| require allocation michael@0: * of a char array. This allocation is not always required, such as when the michael@0: * string is already linear. This wrapper makes dealing with this detail more michael@0: * convenient by encapsulating the allocation logic. michael@0: * michael@0: * As the name suggests, this class is scoped. Return values from chars() and michael@0: * range() may not be valid after the inspector goes out of scope. michael@0: */ michael@0: michael@0: class ScopedThreadSafeStringInspector michael@0: { michael@0: private: michael@0: JSString *str_; michael@0: ScopedJSFreePtr scopedChars_; michael@0: const jschar *chars_; michael@0: michael@0: public: michael@0: ScopedThreadSafeStringInspector(JSString *str) michael@0: : str_(str), michael@0: chars_(nullptr) michael@0: { } michael@0: michael@0: bool ensureChars(ThreadSafeContext *cx); michael@0: michael@0: const jschar *chars() { michael@0: JS_ASSERT(chars_); michael@0: return chars_; michael@0: } michael@0: michael@0: JS::TwoByteChars range() { michael@0: JS_ASSERT(chars_); michael@0: return JS::TwoByteChars(chars_, str_->length()); michael@0: } michael@0: }; michael@0: michael@0: class StaticStrings michael@0: { michael@0: private: michael@0: /* Bigger chars cannot be in a length-2 string. */ michael@0: static const size_t SMALL_CHAR_LIMIT = 128U; michael@0: static const size_t NUM_SMALL_CHARS = 64U; michael@0: michael@0: JSAtom *length2StaticTable[NUM_SMALL_CHARS * NUM_SMALL_CHARS]; michael@0: michael@0: public: michael@0: /* We keep these public for the JITs. */ michael@0: static const size_t UNIT_STATIC_LIMIT = 256U; michael@0: JSAtom *unitStaticTable[UNIT_STATIC_LIMIT]; michael@0: michael@0: static const size_t INT_STATIC_LIMIT = 256U; michael@0: JSAtom *intStaticTable[INT_STATIC_LIMIT]; michael@0: michael@0: StaticStrings() { michael@0: mozilla::PodZero(this); michael@0: } michael@0: michael@0: bool init(JSContext *cx); michael@0: void trace(JSTracer *trc); michael@0: michael@0: static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; } michael@0: michael@0: JSAtom *getUint(uint32_t u) { michael@0: JS_ASSERT(hasUint(u)); michael@0: return intStaticTable[u]; michael@0: } michael@0: michael@0: static bool hasInt(int32_t i) { michael@0: return uint32_t(i) < INT_STATIC_LIMIT; michael@0: } michael@0: michael@0: JSAtom *getInt(int32_t i) { michael@0: JS_ASSERT(hasInt(i)); michael@0: return getUint(uint32_t(i)); michael@0: } michael@0: michael@0: static bool hasUnit(jschar c) { return c < UNIT_STATIC_LIMIT; } michael@0: michael@0: JSAtom *getUnit(jschar c) { michael@0: JS_ASSERT(hasUnit(c)); michael@0: return unitStaticTable[c]; michael@0: } michael@0: michael@0: /* May not return atom, returns null on (reported) failure. */ michael@0: inline JSLinearString *getUnitStringForElement(JSContext *cx, JSString *str, size_t index); michael@0: michael@0: static bool isStatic(JSAtom *atom); michael@0: michael@0: /* Return null if no static atom exists for the given (chars, length). */ michael@0: JSAtom *lookup(const jschar *chars, size_t length) { michael@0: switch (length) { michael@0: case 1: michael@0: if (chars[0] < UNIT_STATIC_LIMIT) michael@0: return getUnit(chars[0]); michael@0: return nullptr; michael@0: case 2: michael@0: if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1])) michael@0: return getLength2(chars[0], chars[1]); michael@0: return nullptr; michael@0: case 3: michael@0: /* michael@0: * Here we know that JSString::intStringTable covers only 256 (or at least michael@0: * not 1000 or more) chars. We rely on order here to resolve the unit vs. michael@0: * int string/length-2 string atom identity issue by giving priority to unit michael@0: * strings for "0" through "9" and length-2 strings for "10" through "99". michael@0: */ michael@0: JS_STATIC_ASSERT(INT_STATIC_LIMIT <= 999); michael@0: if ('1' <= chars[0] && chars[0] <= '9' && michael@0: '0' <= chars[1] && chars[1] <= '9' && michael@0: '0' <= chars[2] && chars[2] <= '9') { michael@0: int i = (chars[0] - '0') * 100 + michael@0: (chars[1] - '0') * 10 + michael@0: (chars[2] - '0'); michael@0: michael@0: if (unsigned(i) < INT_STATIC_LIMIT) michael@0: return getInt(i); michael@0: } michael@0: return nullptr; michael@0: } michael@0: michael@0: return nullptr; michael@0: } michael@0: michael@0: private: michael@0: typedef uint8_t SmallChar; michael@0: static const SmallChar INVALID_SMALL_CHAR = -1; michael@0: michael@0: static bool fitsInSmallChar(jschar c) { michael@0: return c < SMALL_CHAR_LIMIT && toSmallChar[c] != INVALID_SMALL_CHAR; michael@0: } michael@0: michael@0: static const SmallChar toSmallChar[]; michael@0: michael@0: JSAtom *getLength2(jschar c1, jschar c2); michael@0: JSAtom *getLength2(uint32_t u) { michael@0: JS_ASSERT(u < 100); michael@0: return getLength2('0' + u / 10, '0' + u % 10); michael@0: } michael@0: }; michael@0: michael@0: /* michael@0: * Represents an atomized string which does not contain an index (that is, an michael@0: * unsigned 32-bit value). Thus for any PropertyName propname, michael@0: * ToString(ToUint32(propname)) never equals propname. michael@0: * michael@0: * To more concretely illustrate the utility of PropertyName, consider that it michael@0: * is used to partition, in a type-safe manner, the ways to refer to a michael@0: * property, as follows: michael@0: * michael@0: * - uint32_t indexes, michael@0: * - PropertyName strings which don't encode uint32_t indexes, and michael@0: * - jsspecial special properties (non-ES5 properties like object-valued michael@0: * jsids, JSID_EMPTY, JSID_VOID, and maybe in the future Harmony-proposed michael@0: * private names). michael@0: */ michael@0: class PropertyName : public JSAtom michael@0: {}; michael@0: michael@0: JS_STATIC_ASSERT(sizeof(PropertyName) == sizeof(JSString)); michael@0: michael@0: static MOZ_ALWAYS_INLINE jsid michael@0: NameToId(PropertyName *name) michael@0: { michael@0: return NON_INTEGER_ATOM_TO_JSID(name); michael@0: } michael@0: michael@0: typedef HeapPtr HeapPtrAtom; michael@0: michael@0: class AutoNameVector : public AutoVectorRooter michael@0: { michael@0: typedef AutoVectorRooter BaseType; michael@0: public: michael@0: explicit AutoNameVector(JSContext *cx michael@0: MOZ_GUARD_OBJECT_NOTIFIER_PARAM) michael@0: : AutoVectorRooter(cx, NAMEVECTOR) michael@0: { michael@0: MOZ_GUARD_OBJECT_NOTIFIER_INIT; michael@0: } michael@0: michael@0: HandlePropertyName operator[](size_t i) const { michael@0: return HandlePropertyName::fromMarkedLocation(&begin()[i]); michael@0: } michael@0: michael@0: MOZ_DECL_USE_GUARD_OBJECT_NOTIFIER michael@0: }; michael@0: michael@0: } /* namespace js */ michael@0: michael@0: /* Avoid requiring vm/String-inl.h just to call getChars. */ michael@0: michael@0: MOZ_ALWAYS_INLINE const jschar * michael@0: JSString::getChars(js::ExclusiveContext *cx) michael@0: { michael@0: if (JSLinearString *str = ensureLinear(cx)) michael@0: return str->chars(); michael@0: return nullptr; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE bool michael@0: JSString::getChar(js::ExclusiveContext *cx, size_t index, jschar *code) michael@0: { michael@0: JS_ASSERT(index < length()); michael@0: michael@0: /* michael@0: * Optimization for one level deep ropes. michael@0: * This is common for the following pattern: michael@0: * michael@0: * while() { michael@0: * text = text.substr(0, x) + "bla" + text.substr(x) michael@0: * test.charCodeAt(x + 1) michael@0: * } michael@0: */ michael@0: const jschar *chars; michael@0: if (isRope()) { michael@0: JSRope *rope = &asRope(); michael@0: if (uint32_t(index) < rope->leftChild()->length()) { michael@0: chars = rope->leftChild()->getChars(cx); michael@0: } else { michael@0: chars = rope->rightChild()->getChars(cx); michael@0: index -= rope->leftChild()->length(); michael@0: } michael@0: } else { michael@0: chars = getChars(cx); michael@0: } michael@0: michael@0: if (!chars) michael@0: return false; michael@0: michael@0: *code = chars[index]; michael@0: return true; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE const jschar * michael@0: JSString::getCharsZ(js::ExclusiveContext *cx) michael@0: { michael@0: if (JSFlatString *str = ensureFlat(cx)) michael@0: return str->chars(); michael@0: return nullptr; michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE const jschar * michael@0: JSString::pureChars() const michael@0: { michael@0: JS_ASSERT(hasPureChars()); michael@0: return asLinear().chars(); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE const jschar * michael@0: JSString::pureCharsZ() const michael@0: { michael@0: JS_ASSERT(hasPureCharsZ()); michael@0: return asFlat().charsZ(); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE bool michael@0: JSString::copyNonPureChars(js::ThreadSafeContext *cx, js::ScopedJSFreePtr &out) const michael@0: { michael@0: JS_ASSERT(!hasPureChars()); michael@0: return asRope().copyNonPureChars(cx, out); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE bool michael@0: JSString::copyNonPureCharsZ(js::ThreadSafeContext *cx, js::ScopedJSFreePtr &out) const michael@0: { michael@0: JS_ASSERT(!hasPureChars()); michael@0: if (isDependent()) michael@0: return asDependent().copyNonPureCharsZ(cx, out); michael@0: return asRope().copyNonPureCharsZ(cx, out); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE JSLinearString * michael@0: JSString::ensureLinear(js::ExclusiveContext *cx) michael@0: { michael@0: return isLinear() michael@0: ? &asLinear() michael@0: : asRope().flatten(cx); michael@0: } michael@0: michael@0: MOZ_ALWAYS_INLINE JSFlatString * michael@0: JSString::ensureFlat(js::ExclusiveContext *cx) michael@0: { michael@0: return isFlat() michael@0: ? &asFlat() michael@0: : isDependent() michael@0: ? asDependent().undepend(cx) michael@0: : asRope().flatten(cx); michael@0: } michael@0: michael@0: inline JSLinearString * michael@0: JSString::base() const michael@0: { michael@0: JS_ASSERT(hasBase()); michael@0: JS_ASSERT(!d.s.u2.base->isInline()); michael@0: return d.s.u2.base; michael@0: } michael@0: michael@0: inline js::PropertyName * michael@0: JSAtom::asPropertyName() michael@0: { michael@0: #ifdef DEBUG michael@0: uint32_t dummy; michael@0: JS_ASSERT(!isIndex(&dummy)); michael@0: #endif michael@0: return static_cast(this); michael@0: } michael@0: michael@0: #endif /* vm_String_h */