michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * Implements (almost always) lock-free atomic operations. The operations here michael@0: * are a subset of that which can be found in C++11's header, with a michael@0: * different API to enforce consistent memory ordering constraints. michael@0: * michael@0: * Anyone caught using |volatile| for inter-thread memory safety needs to be michael@0: * sent a copy of this header and the C++11 standard. michael@0: */ michael@0: michael@0: #ifndef mozilla_Atomics_h michael@0: #define mozilla_Atomics_h michael@0: michael@0: #include "mozilla/Assertions.h" michael@0: #include "mozilla/Attributes.h" michael@0: #include "mozilla/Compiler.h" michael@0: #include "mozilla/TypeTraits.h" michael@0: michael@0: #include michael@0: michael@0: /* michael@0: * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK michael@0: * does not have . So be sure to check for support michael@0: * along with C++0x support. michael@0: */ michael@0: #if defined(__clang__) || defined(__GNUC__) michael@0: /* michael@0: * Clang doesn't like from libstdc++ before 4.7 due to the michael@0: * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline michael@0: * definitions for unspecialized std::atomic and causes linking errors. michael@0: * Therefore, we require at least 4.7.0 for using libstdc++. michael@0: */ michael@0: # if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0) michael@0: # define MOZ_HAVE_CXX11_ATOMICS michael@0: # elif MOZ_USING_LIBCXX michael@0: # define MOZ_HAVE_CXX11_ATOMICS michael@0: # endif michael@0: #elif defined(_MSC_VER) && _MSC_VER >= 1700 michael@0: # if defined(DEBUG) michael@0: /* michael@0: * Provide our own failure code since we're having trouble linking to michael@0: * std::_Debug_message (bug 982310). michael@0: */ michael@0: # define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order") michael@0: # endif michael@0: # define MOZ_HAVE_CXX11_ATOMICS michael@0: #endif michael@0: michael@0: namespace mozilla { michael@0: michael@0: /** michael@0: * An enum of memory ordering possibilities for atomics. michael@0: * michael@0: * Memory ordering is the observable state of distinct values in memory. michael@0: * (It's a separate concept from atomicity, which concerns whether an michael@0: * operation can ever be observed in an intermediate state. Don't michael@0: * conflate the two!) Given a sequence of operations in source code on michael@0: * memory, it is *not* always the case that, at all times and on all michael@0: * cores, those operations will appear to have occurred in that exact michael@0: * sequence. First, the compiler might reorder that sequence, if it michael@0: * thinks another ordering will be more efficient. Second, the CPU may michael@0: * not expose so consistent a view of memory. CPUs will often perform michael@0: * their own instruction reordering, above and beyond that performed by michael@0: * the compiler. And each core has its own memory caches, and accesses michael@0: * (reads and writes both) to "memory" may only resolve to out-of-date michael@0: * cache entries -- not to the "most recently" performed operation in michael@0: * some global sense. Any access to a value that may be used by michael@0: * multiple threads, potentially across multiple cores, must therefore michael@0: * have a memory ordering imposed on it, for all code on all michael@0: * threads/cores to have a sufficiently coherent worldview. michael@0: * michael@0: * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and michael@0: * http://en.cppreference.com/w/cpp/atomic/memory_order go into more michael@0: * detail on all this, including examples of how each mode works. michael@0: * michael@0: * Note that for simplicity and practicality, not all of the modes in michael@0: * C++11 are supported. The missing C++11 modes are either subsumed by michael@0: * the modes we provide below, or not relevant for the CPUs we support michael@0: * in Gecko. These three modes are confusing enough as it is! michael@0: */ michael@0: enum MemoryOrdering { michael@0: /* michael@0: * Relaxed ordering is the simplest memory ordering: none at all. michael@0: * When the result of a write is observed, nothing may be inferred michael@0: * about other memory. Writes ostensibly performed "before" on the michael@0: * writing thread may not yet be visible. Writes performed "after" on michael@0: * the writing thread may already be visible, if the compiler or CPU michael@0: * reordered them. (The latter can happen if reads and/or writes get michael@0: * held up in per-processor caches.) Relaxed ordering means michael@0: * operations can always use cached values (as long as the actual michael@0: * updates to atomic values actually occur, correctly, eventually), so michael@0: * it's usually the fastest sort of atomic access. For this reason, michael@0: * *it's also the most dangerous kind of access*. michael@0: * michael@0: * Relaxed ordering is good for things like process-wide statistics michael@0: * counters that don't need to be consistent with anything else, so michael@0: * long as updates themselves are atomic. (And so long as any michael@0: * observations of that value can tolerate being out-of-date -- if you michael@0: * need some sort of up-to-date value, you need some sort of other michael@0: * synchronizing operation.) It's *not* good for locks, mutexes, michael@0: * reference counts, etc. that mediate access to other memory, or must michael@0: * be observably consistent with other memory. michael@0: * michael@0: * x86 architectures don't take advantage of the optimization michael@0: * opportunities that relaxed ordering permits. Thus it's possible michael@0: * that using relaxed ordering will "work" on x86 but fail elsewhere michael@0: * (ARM, say, which *does* implement non-sequentially-consistent michael@0: * relaxed ordering semantics). Be extra-careful using relaxed michael@0: * ordering if you can't easily test non-x86 architectures! michael@0: */ michael@0: Relaxed, michael@0: /* michael@0: * When an atomic value is updated with ReleaseAcquire ordering, and michael@0: * that new value is observed with ReleaseAcquire ordering, prior michael@0: * writes (atomic or not) are also observable. What ReleaseAcquire michael@0: * *doesn't* give you is any observable ordering guarantees for michael@0: * ReleaseAcquire-ordered operations on different objects. For michael@0: * example, if there are two cores that each perform ReleaseAcquire michael@0: * operations on separate objects, each core may or may not observe michael@0: * the operations made by the other core. The only way the cores can michael@0: * be synchronized with ReleaseAcquire is if they both michael@0: * ReleaseAcquire-access the same object. This implies that you can't michael@0: * necessarily describe some global total ordering of ReleaseAcquire michael@0: * operations. michael@0: * michael@0: * ReleaseAcquire ordering is good for (as the name implies) atomic michael@0: * operations on values controlling ownership of things: reference michael@0: * counts, mutexes, and the like. However, if you are thinking about michael@0: * using these to implement your own locks or mutexes, you should take michael@0: * a good, hard look at actual lock or mutex primitives first. michael@0: */ michael@0: ReleaseAcquire, michael@0: /* michael@0: * When an atomic value is updated with SequentiallyConsistent michael@0: * ordering, all writes observable when the update is observed, just michael@0: * as with ReleaseAcquire ordering. But, furthermore, a global total michael@0: * ordering of SequentiallyConsistent operations *can* be described. michael@0: * For example, if two cores perform SequentiallyConsistent operations michael@0: * on separate objects, one core will observably perform its update michael@0: * (and all previous operations will have completed), then the other michael@0: * core will observably perform its update (and all previous michael@0: * operations will have completed). (Although those previous michael@0: * operations aren't themselves ordered -- they could be intermixed, michael@0: * or ordered if they occur on atomic values with ordering michael@0: * requirements.) SequentiallyConsistent is the *simplest and safest* michael@0: * ordering of atomic operations -- it's always as if one operation michael@0: * happens, then another, then another, in some order -- and every michael@0: * core observes updates to happen in that single order. Because it michael@0: * has the most synchronization requirements, operations ordered this michael@0: * way also tend to be slowest. michael@0: * michael@0: * SequentiallyConsistent ordering can be desirable when multiple michael@0: * threads observe objects, and they all have to agree on the michael@0: * observable order of changes to them. People expect michael@0: * SequentiallyConsistent ordering, even if they shouldn't, when michael@0: * writing code, atomic or otherwise. SequentiallyConsistent is also michael@0: * the ordering of choice when designing lockless data structures. If michael@0: * you don't know what order to use, use this one. michael@0: */ michael@0: SequentiallyConsistent, michael@0: }; michael@0: michael@0: } // namespace mozilla michael@0: michael@0: // Build up the underlying intrinsics. michael@0: #ifdef MOZ_HAVE_CXX11_ATOMICS michael@0: michael@0: # include michael@0: michael@0: namespace mozilla { michael@0: namespace detail { michael@0: michael@0: /* michael@0: * We provide CompareExchangeFailureOrder to work around a bug in some michael@0: * versions of GCC's header. See bug 898491. michael@0: */ michael@0: template struct AtomicOrderConstraints; michael@0: michael@0: template<> michael@0: struct AtomicOrderConstraints michael@0: { michael@0: static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed; michael@0: static const std::memory_order LoadOrder = std::memory_order_relaxed; michael@0: static const std::memory_order StoreOrder = std::memory_order_relaxed; michael@0: static const std::memory_order CompareExchangeFailureOrder = michael@0: std::memory_order_relaxed; michael@0: }; michael@0: michael@0: template<> michael@0: struct AtomicOrderConstraints michael@0: { michael@0: static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel; michael@0: static const std::memory_order LoadOrder = std::memory_order_acquire; michael@0: static const std::memory_order StoreOrder = std::memory_order_release; michael@0: static const std::memory_order CompareExchangeFailureOrder = michael@0: std::memory_order_acquire; michael@0: }; michael@0: michael@0: template<> michael@0: struct AtomicOrderConstraints michael@0: { michael@0: static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst; michael@0: static const std::memory_order LoadOrder = std::memory_order_seq_cst; michael@0: static const std::memory_order StoreOrder = std::memory_order_seq_cst; michael@0: static const std::memory_order CompareExchangeFailureOrder = michael@0: std::memory_order_seq_cst; michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicBase michael@0: { michael@0: typedef std::atomic ValueType; michael@0: typedef AtomicOrderConstraints OrderedOp; michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicMemoryOps : public IntrinsicBase michael@0: { michael@0: typedef IntrinsicBase Base; michael@0: static T load(const typename Base::ValueType& ptr) { michael@0: return ptr.load(Base::OrderedOp::LoadOrder); michael@0: } michael@0: static void store(typename Base::ValueType& ptr, T val) { michael@0: ptr.store(val, Base::OrderedOp::StoreOrder); michael@0: } michael@0: static T exchange(typename Base::ValueType& ptr, T val) { michael@0: return ptr.exchange(val, Base::OrderedOp::AtomicRMWOrder); michael@0: } michael@0: static bool compareExchange(typename Base::ValueType& ptr, T oldVal, T newVal) { michael@0: return ptr.compare_exchange_strong(oldVal, newVal, michael@0: Base::OrderedOp::AtomicRMWOrder, michael@0: Base::OrderedOp::CompareExchangeFailureOrder); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicAddSub : public IntrinsicBase michael@0: { michael@0: typedef IntrinsicBase Base; michael@0: static T add(typename Base::ValueType& ptr, T val) { michael@0: return ptr.fetch_add(val, Base::OrderedOp::AtomicRMWOrder); michael@0: } michael@0: static T sub(typename Base::ValueType& ptr, T val) { michael@0: return ptr.fetch_sub(val, Base::OrderedOp::AtomicRMWOrder); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicAddSub : public IntrinsicBase michael@0: { michael@0: typedef IntrinsicBase Base; michael@0: static T* add(typename Base::ValueType& ptr, ptrdiff_t val) { michael@0: return ptr.fetch_add(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder); michael@0: } michael@0: static T* sub(typename Base::ValueType& ptr, ptrdiff_t val) { michael@0: return ptr.fetch_sub(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder); michael@0: } michael@0: private: michael@0: /* michael@0: * GCC 4.6's header has a bug where adding X to an michael@0: * atomic is not the same as adding X to a T*. Hence the need michael@0: * for this function to provide the correct addend. michael@0: */ michael@0: static ptrdiff_t fixupAddend(ptrdiff_t val) { michael@0: #if defined(__clang__) || defined(_MSC_VER) michael@0: return val; michael@0: #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \ michael@0: !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0) michael@0: return val * sizeof(T); michael@0: #else michael@0: return val; michael@0: #endif michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicIncDec : public IntrinsicAddSub michael@0: { michael@0: typedef IntrinsicBase Base; michael@0: static T inc(typename Base::ValueType& ptr) { michael@0: return IntrinsicAddSub::add(ptr, 1); michael@0: } michael@0: static T dec(typename Base::ValueType& ptr) { michael@0: return IntrinsicAddSub::sub(ptr, 1); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct AtomicIntrinsics : public IntrinsicMemoryOps, michael@0: public IntrinsicIncDec michael@0: { michael@0: typedef IntrinsicBase Base; michael@0: static T or_(typename Base::ValueType& ptr, T val) { michael@0: return ptr.fetch_or(val, Base::OrderedOp::AtomicRMWOrder); michael@0: } michael@0: static T xor_(typename Base::ValueType& ptr, T val) { michael@0: return ptr.fetch_xor(val, Base::OrderedOp::AtomicRMWOrder); michael@0: } michael@0: static T and_(typename Base::ValueType& ptr, T val) { michael@0: return ptr.fetch_and(val, Base::OrderedOp::AtomicRMWOrder); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct AtomicIntrinsics michael@0: : public IntrinsicMemoryOps, public IntrinsicIncDec michael@0: { michael@0: }; michael@0: michael@0: } // namespace detail michael@0: } // namespace mozilla michael@0: michael@0: #elif defined(__GNUC__) michael@0: michael@0: namespace mozilla { michael@0: namespace detail { michael@0: michael@0: /* michael@0: * The __sync_* family of intrinsics is documented here: michael@0: * michael@0: * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html michael@0: * michael@0: * While these intrinsics are deprecated in favor of the newer __atomic_* michael@0: * family of intrincs: michael@0: * michael@0: * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html michael@0: * michael@0: * any GCC version that supports the __atomic_* intrinsics will also support michael@0: * the header and so will be handled above. We provide a version of michael@0: * atomics using the __sync_* intrinsics to support older versions of GCC. michael@0: * michael@0: * All __sync_* intrinsics that we use below act as full memory barriers, for michael@0: * both compiler and hardware reordering, except for __sync_lock_test_and_set, michael@0: * which is a only an acquire barrier. When we call __sync_lock_test_and_set, michael@0: * we add a barrier above it as appropriate. michael@0: */ michael@0: michael@0: template struct Barrier; michael@0: michael@0: /* michael@0: * Some processors (in particular, x86) don't require quite so many calls to michael@0: * __sync_sychronize as our specializations of Barrier produce. If michael@0: * performance turns out to be an issue, defining these specializations michael@0: * on a per-processor basis would be a good first tuning step. michael@0: */ michael@0: michael@0: template<> michael@0: struct Barrier michael@0: { michael@0: static void beforeLoad() {} michael@0: static void afterLoad() {} michael@0: static void beforeStore() {} michael@0: static void afterStore() {} michael@0: }; michael@0: michael@0: template<> michael@0: struct Barrier michael@0: { michael@0: static void beforeLoad() {} michael@0: static void afterLoad() { __sync_synchronize(); } michael@0: static void beforeStore() { __sync_synchronize(); } michael@0: static void afterStore() {} michael@0: }; michael@0: michael@0: template<> michael@0: struct Barrier michael@0: { michael@0: static void beforeLoad() { __sync_synchronize(); } michael@0: static void afterLoad() { __sync_synchronize(); } michael@0: static void beforeStore() { __sync_synchronize(); } michael@0: static void afterStore() { __sync_synchronize(); } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicMemoryOps michael@0: { michael@0: static T load(const T& ptr) { michael@0: Barrier::beforeLoad(); michael@0: T val = ptr; michael@0: Barrier::afterLoad(); michael@0: return val; michael@0: } michael@0: static void store(T& ptr, T val) { michael@0: Barrier::beforeStore(); michael@0: ptr = val; michael@0: Barrier::afterStore(); michael@0: } michael@0: static T exchange(T& ptr, T val) { michael@0: // __sync_lock_test_and_set is only an acquire barrier; loads and stores michael@0: // can't be moved up from after to before it, but they can be moved down michael@0: // from before to after it. We may want a stricter ordering, so we need michael@0: // an explicit barrier. michael@0: michael@0: Barrier::beforeStore(); michael@0: return __sync_lock_test_and_set(&ptr, val); michael@0: } michael@0: static bool compareExchange(T& ptr, T oldVal, T newVal) { michael@0: return __sync_bool_compare_and_swap(&ptr, oldVal, newVal); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicAddSub michael@0: { michael@0: typedef T ValueType; michael@0: static T add(T& ptr, T val) { michael@0: return __sync_fetch_and_add(&ptr, val); michael@0: } michael@0: static T sub(T& ptr, T val) { michael@0: return __sync_fetch_and_sub(&ptr, val); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicAddSub michael@0: { michael@0: typedef T* ValueType; michael@0: /* michael@0: * The reinterpret_casts are needed so that michael@0: * __sync_fetch_and_{add,sub} will properly type-check. michael@0: * michael@0: * Also, these functions do not provide standard semantics for michael@0: * pointer types, so we need to adjust the addend. michael@0: */ michael@0: static ValueType add(ValueType& ptr, ptrdiff_t val) { michael@0: ValueType amount = reinterpret_cast(val * sizeof(T)); michael@0: return __sync_fetch_and_add(&ptr, amount); michael@0: } michael@0: static ValueType sub(ValueType& ptr, ptrdiff_t val) { michael@0: ValueType amount = reinterpret_cast(val * sizeof(T)); michael@0: return __sync_fetch_and_sub(&ptr, amount); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicIncDec : public IntrinsicAddSub michael@0: { michael@0: static T inc(T& ptr) { return IntrinsicAddSub::add(ptr, 1); } michael@0: static T dec(T& ptr) { return IntrinsicAddSub::sub(ptr, 1); } michael@0: }; michael@0: michael@0: template michael@0: struct AtomicIntrinsics : public IntrinsicMemoryOps, michael@0: public IntrinsicIncDec michael@0: { michael@0: static T or_(T& ptr, T val) { michael@0: return __sync_fetch_and_or(&ptr, val); michael@0: } michael@0: static T xor_(T& ptr, T val) { michael@0: return __sync_fetch_and_xor(&ptr, val); michael@0: } michael@0: static T and_(T& ptr, T val) { michael@0: return __sync_fetch_and_and(&ptr, val); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct AtomicIntrinsics : public IntrinsicMemoryOps, michael@0: public IntrinsicIncDec michael@0: { michael@0: }; michael@0: michael@0: } // namespace detail michael@0: } // namespace mozilla michael@0: michael@0: #elif defined(_MSC_VER) michael@0: michael@0: /* michael@0: * Windows comes with a full complement of atomic operations. michael@0: * Unfortunately, most of those aren't available for Windows XP (even if michael@0: * the compiler supports intrinsics for them), which is the oldest michael@0: * version of Windows we support. Therefore, we only provide operations michael@0: * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows michael@0: * versions, we support 64-bit datatypes as well. michael@0: * michael@0: * To avoid namespace pollution issues, we declare whatever functions we michael@0: * need ourselves. michael@0: */ michael@0: michael@0: extern "C" { michael@0: long __cdecl _InterlockedExchangeAdd(long volatile* dst, long value); michael@0: long __cdecl _InterlockedOr(long volatile* dst, long value); michael@0: long __cdecl _InterlockedXor(long volatile* dst, long value); michael@0: long __cdecl _InterlockedAnd(long volatile* dst, long value); michael@0: long __cdecl _InterlockedExchange(long volatile *dst, long value); michael@0: long __cdecl _InterlockedCompareExchange(long volatile *dst, long newVal, long oldVal); michael@0: } michael@0: michael@0: # pragma intrinsic(_InterlockedExchangeAdd) michael@0: # pragma intrinsic(_InterlockedOr) michael@0: # pragma intrinsic(_InterlockedXor) michael@0: # pragma intrinsic(_InterlockedAnd) michael@0: # pragma intrinsic(_InterlockedExchange) michael@0: # pragma intrinsic(_InterlockedCompareExchange) michael@0: michael@0: namespace mozilla { michael@0: namespace detail { michael@0: michael@0: # if !defined(_M_IX86) && !defined(_M_X64) michael@0: /* michael@0: * The implementations below are optimized for x86ish systems. You michael@0: * will have to modify them if you are porting to Windows on a michael@0: * different architecture. michael@0: */ michael@0: # error "Unknown CPU type" michael@0: # endif michael@0: michael@0: /* michael@0: * The PrimitiveIntrinsics template should define |Type|, the datatype of size michael@0: * DataSize upon which we operate, and the following eight functions. michael@0: * michael@0: * static Type add(Type* ptr, Type val); michael@0: * static Type sub(Type* ptr, Type val); michael@0: * static Type or_(Type* ptr, Type val); michael@0: * static Type xor_(Type* ptr, Type val); michael@0: * static Type and_(Type* ptr, Type val); michael@0: * michael@0: * These functions perform the obvious operation on the value contained in michael@0: * |*ptr| combined with |val| and return the value previously stored in michael@0: * |*ptr|. michael@0: * michael@0: * static void store(Type* ptr, Type val); michael@0: * michael@0: * This function atomically stores |val| into |*ptr| and must provide a full michael@0: * memory fence after the store to prevent compiler and hardware instruction michael@0: * reordering. It should also act as a compiler barrier to prevent reads and michael@0: * writes from moving to after the store. michael@0: * michael@0: * static Type exchange(Type* ptr, Type val); michael@0: * michael@0: * This function atomically stores |val| into |*ptr| and returns the previous michael@0: * contents of *ptr; michael@0: * michael@0: * static bool compareExchange(Type* ptr, Type oldVal, Type newVal); michael@0: * michael@0: * This function atomically performs the following operation: michael@0: * michael@0: * if (*ptr == oldVal) { michael@0: * *ptr = newVal; michael@0: * return true; michael@0: * } else { michael@0: * return false; michael@0: * } michael@0: * michael@0: */ michael@0: template struct PrimitiveIntrinsics; michael@0: michael@0: template<> michael@0: struct PrimitiveIntrinsics<4> michael@0: { michael@0: typedef long Type; michael@0: michael@0: static Type add(Type* ptr, Type val) { michael@0: return _InterlockedExchangeAdd(ptr, val); michael@0: } michael@0: static Type sub(Type* ptr, Type val) { michael@0: /* michael@0: * _InterlockedExchangeSubtract isn't available before Windows 7, michael@0: * and we must support Windows XP. michael@0: */ michael@0: return _InterlockedExchangeAdd(ptr, -val); michael@0: } michael@0: static Type or_(Type* ptr, Type val) { michael@0: return _InterlockedOr(ptr, val); michael@0: } michael@0: static Type xor_(Type* ptr, Type val) { michael@0: return _InterlockedXor(ptr, val); michael@0: } michael@0: static Type and_(Type* ptr, Type val) { michael@0: return _InterlockedAnd(ptr, val); michael@0: } michael@0: static void store(Type* ptr, Type val) { michael@0: _InterlockedExchange(ptr, val); michael@0: } michael@0: static Type exchange(Type* ptr, Type val) { michael@0: return _InterlockedExchange(ptr, val); michael@0: } michael@0: static bool compareExchange(Type* ptr, Type oldVal, Type newVal) { michael@0: return _InterlockedCompareExchange(ptr, newVal, oldVal) == oldVal; michael@0: } michael@0: }; michael@0: michael@0: # if defined(_M_X64) michael@0: michael@0: extern "C" { michael@0: long long __cdecl _InterlockedExchangeAdd64(long long volatile* dst, michael@0: long long value); michael@0: long long __cdecl _InterlockedOr64(long long volatile* dst, michael@0: long long value); michael@0: long long __cdecl _InterlockedXor64(long long volatile* dst, michael@0: long long value); michael@0: long long __cdecl _InterlockedAnd64(long long volatile* dst, michael@0: long long value); michael@0: long long __cdecl _InterlockedExchange64(long long volatile* dst, michael@0: long long value); michael@0: long long __cdecl _InterlockedCompareExchange64(long long volatile* dst, michael@0: long long newVal, michael@0: long long oldVal); michael@0: } michael@0: michael@0: # pragma intrinsic(_InterlockedExchangeAdd64) michael@0: # pragma intrinsic(_InterlockedOr64) michael@0: # pragma intrinsic(_InterlockedXor64) michael@0: # pragma intrinsic(_InterlockedAnd64) michael@0: # pragma intrinsic(_InterlockedExchange64) michael@0: # pragma intrinsic(_InterlockedCompareExchange64) michael@0: michael@0: template <> michael@0: struct PrimitiveIntrinsics<8> michael@0: { michael@0: typedef __int64 Type; michael@0: michael@0: static Type add(Type* ptr, Type val) { michael@0: return _InterlockedExchangeAdd64(ptr, val); michael@0: } michael@0: static Type sub(Type* ptr, Type val) { michael@0: /* michael@0: * There is no _InterlockedExchangeSubtract64. michael@0: */ michael@0: return _InterlockedExchangeAdd64(ptr, -val); michael@0: } michael@0: static Type or_(Type* ptr, Type val) { michael@0: return _InterlockedOr64(ptr, val); michael@0: } michael@0: static Type xor_(Type* ptr, Type val) { michael@0: return _InterlockedXor64(ptr, val); michael@0: } michael@0: static Type and_(Type* ptr, Type val) { michael@0: return _InterlockedAnd64(ptr, val); michael@0: } michael@0: static void store(Type* ptr, Type val) { michael@0: _InterlockedExchange64(ptr, val); michael@0: } michael@0: static Type exchange(Type* ptr, Type val) { michael@0: return _InterlockedExchange64(ptr, val); michael@0: } michael@0: static bool compareExchange(Type* ptr, Type oldVal, Type newVal) { michael@0: return _InterlockedCompareExchange64(ptr, newVal, oldVal) == oldVal; michael@0: } michael@0: }; michael@0: michael@0: # endif michael@0: michael@0: extern "C" { void _ReadWriteBarrier(); } michael@0: michael@0: # pragma intrinsic(_ReadWriteBarrier) michael@0: michael@0: template struct Barrier; michael@0: michael@0: /* michael@0: * We do not provide an afterStore method in Barrier, as Relaxed and michael@0: * ReleaseAcquire orderings do not require one, and the required barrier michael@0: * for SequentiallyConsistent is handled by PrimitiveIntrinsics. michael@0: */ michael@0: michael@0: template<> michael@0: struct Barrier michael@0: { michael@0: static void beforeLoad() {} michael@0: static void afterLoad() {} michael@0: static void beforeStore() {} michael@0: }; michael@0: michael@0: template<> michael@0: struct Barrier michael@0: { michael@0: static void beforeLoad() {} michael@0: static void afterLoad() { _ReadWriteBarrier(); } michael@0: static void beforeStore() { _ReadWriteBarrier(); } michael@0: }; michael@0: michael@0: template<> michael@0: struct Barrier michael@0: { michael@0: static void beforeLoad() { _ReadWriteBarrier(); } michael@0: static void afterLoad() { _ReadWriteBarrier(); } michael@0: static void beforeStore() { _ReadWriteBarrier(); } michael@0: }; michael@0: michael@0: template michael@0: struct CastHelper michael@0: { michael@0: static PrimType toPrimType(T val) { return static_cast(val); } michael@0: static T fromPrimType(PrimType val) { return static_cast(val); } michael@0: }; michael@0: michael@0: template michael@0: struct CastHelper michael@0: { michael@0: static PrimType toPrimType(T* val) { return reinterpret_cast(val); } michael@0: static T* fromPrimType(PrimType val) { return reinterpret_cast(val); } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicBase michael@0: { michael@0: typedef T ValueType; michael@0: typedef PrimitiveIntrinsics Primitives; michael@0: typedef typename Primitives::Type PrimType; michael@0: static_assert(sizeof(PrimType) == sizeof(T), michael@0: "Selection of PrimitiveIntrinsics was wrong"); michael@0: typedef CastHelper Cast; michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicMemoryOps : public IntrinsicBase michael@0: { michael@0: typedef typename IntrinsicBase::ValueType ValueType; michael@0: typedef typename IntrinsicBase::Primitives Primitives; michael@0: typedef typename IntrinsicBase::PrimType PrimType; michael@0: typedef typename IntrinsicBase::Cast Cast; michael@0: static ValueType load(const ValueType& ptr) { michael@0: Barrier::beforeLoad(); michael@0: ValueType val = ptr; michael@0: Barrier::afterLoad(); michael@0: return val; michael@0: } michael@0: static void store(ValueType& ptr, ValueType val) { michael@0: // For SequentiallyConsistent, Primitives::store() will generate the michael@0: // proper memory fence. Everything else just needs a barrier before michael@0: // the store. michael@0: if (Order == SequentiallyConsistent) { michael@0: Primitives::store(reinterpret_cast(&ptr), michael@0: Cast::toPrimType(val)); michael@0: } else { michael@0: Barrier::beforeStore(); michael@0: ptr = val; michael@0: } michael@0: } michael@0: static ValueType exchange(ValueType& ptr, ValueType val) { michael@0: PrimType oldval = michael@0: Primitives::exchange(reinterpret_cast(&ptr), michael@0: Cast::toPrimType(val)); michael@0: return Cast::fromPrimType(oldval); michael@0: } michael@0: static bool compareExchange(ValueType& ptr, ValueType oldVal, ValueType newVal) { michael@0: return Primitives::compareExchange(reinterpret_cast(&ptr), michael@0: Cast::toPrimType(oldVal), michael@0: Cast::toPrimType(newVal)); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicApplyHelper : public IntrinsicBase michael@0: { michael@0: typedef typename IntrinsicBase::ValueType ValueType; michael@0: typedef typename IntrinsicBase::PrimType PrimType; michael@0: typedef typename IntrinsicBase::Cast Cast; michael@0: typedef PrimType (*BinaryOp)(PrimType*, PrimType); michael@0: typedef PrimType (*UnaryOp)(PrimType*); michael@0: michael@0: static ValueType applyBinaryFunction(BinaryOp op, ValueType& ptr, michael@0: ValueType val) { michael@0: PrimType* primTypePtr = reinterpret_cast(&ptr); michael@0: PrimType primTypeVal = Cast::toPrimType(val); michael@0: return Cast::fromPrimType(op(primTypePtr, primTypeVal)); michael@0: } michael@0: michael@0: static ValueType applyUnaryFunction(UnaryOp op, ValueType& ptr) { michael@0: PrimType* primTypePtr = reinterpret_cast(&ptr); michael@0: return Cast::fromPrimType(op(primTypePtr)); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicAddSub : public IntrinsicApplyHelper michael@0: { michael@0: typedef typename IntrinsicApplyHelper::ValueType ValueType; michael@0: typedef typename IntrinsicBase::Primitives Primitives; michael@0: static ValueType add(ValueType& ptr, ValueType val) { michael@0: return applyBinaryFunction(&Primitives::add, ptr, val); michael@0: } michael@0: static ValueType sub(ValueType& ptr, ValueType val) { michael@0: return applyBinaryFunction(&Primitives::sub, ptr, val); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicAddSub : public IntrinsicApplyHelper michael@0: { michael@0: typedef typename IntrinsicApplyHelper::ValueType ValueType; michael@0: static ValueType add(ValueType& ptr, ptrdiff_t amount) { michael@0: return applyBinaryFunction(&Primitives::add, ptr, michael@0: (ValueType)(amount * sizeof(ValueType))); michael@0: } michael@0: static ValueType sub(ValueType& ptr, ptrdiff_t amount) { michael@0: return applyBinaryFunction(&Primitives::sub, ptr, michael@0: (ValueType)(amount * sizeof(ValueType))); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct IntrinsicIncDec : public IntrinsicAddSub michael@0: { michael@0: typedef typename IntrinsicAddSub::ValueType ValueType; michael@0: static ValueType inc(ValueType& ptr) { return add(ptr, 1); } michael@0: static ValueType dec(ValueType& ptr) { return sub(ptr, 1); } michael@0: }; michael@0: michael@0: template michael@0: struct AtomicIntrinsics : public IntrinsicMemoryOps, michael@0: public IntrinsicIncDec michael@0: { michael@0: typedef typename IntrinsicIncDec::ValueType ValueType; michael@0: static ValueType or_(ValueType& ptr, T val) { michael@0: return applyBinaryFunction(&Primitives::or_, ptr, val); michael@0: } michael@0: static ValueType xor_(ValueType& ptr, T val) { michael@0: return applyBinaryFunction(&Primitives::xor_, ptr, val); michael@0: } michael@0: static ValueType and_(ValueType& ptr, T val) { michael@0: return applyBinaryFunction(&Primitives::and_, ptr, val); michael@0: } michael@0: }; michael@0: michael@0: template michael@0: struct AtomicIntrinsics : public IntrinsicMemoryOps, michael@0: public IntrinsicIncDec michael@0: { michael@0: typedef typename IntrinsicMemoryOps::ValueType ValueType; michael@0: }; michael@0: michael@0: } // namespace detail michael@0: } // namespace mozilla michael@0: michael@0: #else michael@0: # error "Atomic compiler intrinsics are not supported on your platform" michael@0: #endif michael@0: michael@0: namespace mozilla { michael@0: michael@0: namespace detail { michael@0: michael@0: template michael@0: class AtomicBase michael@0: { michael@0: // We only support 32-bit types on 32-bit Windows, which constrains our michael@0: // implementation elsewhere. But we support pointer-sized types everywhere. michael@0: static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8), michael@0: "mozilla/Atomics.h only supports 32-bit and pointer-sized types"); michael@0: michael@0: protected: michael@0: typedef typename detail::AtomicIntrinsics Intrinsics; michael@0: typename Intrinsics::ValueType mValue; michael@0: michael@0: public: michael@0: MOZ_CONSTEXPR AtomicBase() : mValue() {} michael@0: MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {} michael@0: michael@0: // Note: we can't provide operator T() here because Atomic inherits michael@0: // from AtomcBase with T=uint32_t and not T=bool. If we implemented michael@0: // operator T() here, it would cause errors when comparing Atomic with michael@0: // a regular bool. michael@0: michael@0: T operator=(T aValue) { michael@0: Intrinsics::store(mValue, aValue); michael@0: return aValue; michael@0: } michael@0: michael@0: /** michael@0: * Performs an atomic swap operation. aValue is stored and the previous michael@0: * value of this variable is returned. michael@0: */ michael@0: T exchange(T aValue) { michael@0: return Intrinsics::exchange(mValue, aValue); michael@0: } michael@0: michael@0: /** michael@0: * Performs an atomic compare-and-swap operation and returns true if it michael@0: * succeeded. This is equivalent to atomically doing michael@0: * michael@0: * if (mValue == aOldValue) { michael@0: * mValue = aNewValue; michael@0: * return true; michael@0: * } else { michael@0: * return false; michael@0: * } michael@0: */ michael@0: bool compareExchange(T aOldValue, T aNewValue) { michael@0: return Intrinsics::compareExchange(mValue, aOldValue, aNewValue); michael@0: } michael@0: michael@0: private: michael@0: template michael@0: AtomicBase(const AtomicBase& aCopy) MOZ_DELETE; michael@0: }; michael@0: michael@0: template michael@0: class AtomicBaseIncDec : public AtomicBase michael@0: { michael@0: typedef typename detail::AtomicBase Base; michael@0: michael@0: public: michael@0: MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {} michael@0: MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {} michael@0: michael@0: using Base::operator=; michael@0: michael@0: operator T() const { return Base::Intrinsics::load(Base::mValue); } michael@0: T operator++(int) { return Base::Intrinsics::inc(Base::mValue); } michael@0: T operator--(int) { return Base::Intrinsics::dec(Base::mValue); } michael@0: T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; } michael@0: T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; } michael@0: michael@0: private: michael@0: template michael@0: AtomicBaseIncDec(const AtomicBaseIncDec& aCopy) MOZ_DELETE; michael@0: }; michael@0: michael@0: } // namespace detail michael@0: michael@0: /** michael@0: * A wrapper for a type that enforces that all memory accesses are atomic. michael@0: * michael@0: * In general, where a variable |T foo| exists, |Atomic foo| can be used in michael@0: * its place. Implementations for integral and pointer types are provided michael@0: * below. michael@0: * michael@0: * Atomic accesses are sequentially consistent by default. You should michael@0: * use the default unless you are tall enough to ride the michael@0: * memory-ordering roller coaster (if you're not sure, you aren't) and michael@0: * you have a compelling reason to do otherwise. michael@0: * michael@0: * There is one exception to the case of atomic memory accesses: providing an michael@0: * initial value of the atomic value is not guaranteed to be atomic. This is a michael@0: * deliberate design choice that enables static atomic variables to be declared michael@0: * without introducing extra static constructors. michael@0: */ michael@0: template michael@0: class Atomic; michael@0: michael@0: /** michael@0: * Atomic implementation for integral types. michael@0: * michael@0: * In addition to atomic store and load operations, compound assignment and michael@0: * increment/decrement operators are implemented which perform the michael@0: * corresponding read-modify-write operation atomically. Finally, an atomic michael@0: * swap method is provided. michael@0: */ michael@0: template michael@0: class Atomic::value && !IsSame::value>::Type> michael@0: : public detail::AtomicBaseIncDec michael@0: { michael@0: typedef typename detail::AtomicBaseIncDec Base; michael@0: michael@0: public: michael@0: MOZ_CONSTEXPR Atomic() : Base() {} michael@0: MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {} michael@0: michael@0: using Base::operator=; michael@0: michael@0: T operator+=(T delta) { return Base::Intrinsics::add(Base::mValue, delta) + delta; } michael@0: T operator-=(T delta) { return Base::Intrinsics::sub(Base::mValue, delta) - delta; } michael@0: T operator|=(T val) { return Base::Intrinsics::or_(Base::mValue, val) | val; } michael@0: T operator^=(T val) { return Base::Intrinsics::xor_(Base::mValue, val) ^ val; } michael@0: T operator&=(T val) { return Base::Intrinsics::and_(Base::mValue, val) & val; } michael@0: michael@0: private: michael@0: Atomic(Atomic& aOther) MOZ_DELETE; michael@0: }; michael@0: michael@0: /** michael@0: * Atomic implementation for pointer types. michael@0: * michael@0: * An atomic compare-and-swap primitive for pointer variables is provided, as michael@0: * are atomic increment and decement operators. Also provided are the compound michael@0: * assignment operators for addition and subtraction. Atomic swap (via michael@0: * exchange()) is included as well. michael@0: */ michael@0: template michael@0: class Atomic : public detail::AtomicBaseIncDec michael@0: { michael@0: typedef typename detail::AtomicBaseIncDec Base; michael@0: michael@0: public: michael@0: MOZ_CONSTEXPR Atomic() : Base() {} michael@0: MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {} michael@0: michael@0: using Base::operator=; michael@0: michael@0: T* operator+=(ptrdiff_t delta) { michael@0: return Base::Intrinsics::add(Base::mValue, delta) + delta; michael@0: } michael@0: T* operator-=(ptrdiff_t delta) { michael@0: return Base::Intrinsics::sub(Base::mValue, delta) - delta; michael@0: } michael@0: michael@0: private: michael@0: Atomic(Atomic& aOther) MOZ_DELETE; michael@0: }; michael@0: michael@0: /** michael@0: * Atomic implementation for enum types. michael@0: * michael@0: * The atomic store and load operations and the atomic swap method is provided. michael@0: */ michael@0: template michael@0: class Atomic::value>::Type> michael@0: : public detail::AtomicBase michael@0: { michael@0: typedef typename detail::AtomicBase Base; michael@0: michael@0: public: michael@0: MOZ_CONSTEXPR Atomic() : Base() {} michael@0: MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {} michael@0: michael@0: operator T() const { return Base::Intrinsics::load(Base::mValue); } michael@0: michael@0: using Base::operator=; michael@0: michael@0: private: michael@0: Atomic(Atomic& aOther) MOZ_DELETE; michael@0: }; michael@0: michael@0: /** michael@0: * Atomic implementation for boolean types. michael@0: * michael@0: * The atomic store and load operations and the atomic swap method is provided. michael@0: * michael@0: * Note: michael@0: * michael@0: * - sizeof(Atomic) != sizeof(bool) for some implementations of michael@0: * bool and/or some implementations of std::atomic. This is allowed in michael@0: * [atomic.types.generic]p9. michael@0: * michael@0: * - It's not obvious whether the 8-bit atomic functions on Windows are always michael@0: * inlined or not. If they are not inlined, the corresponding functions in the michael@0: * runtime library are not available on Windows XP. This is why we implement michael@0: * Atomic with an underlying type of uint32_t. michael@0: */ michael@0: template michael@0: class Atomic michael@0: : protected detail::AtomicBase michael@0: { michael@0: typedef typename detail::AtomicBase Base; michael@0: michael@0: public: michael@0: MOZ_CONSTEXPR Atomic() : Base() {} michael@0: MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {} michael@0: michael@0: // We provide boolean wrappers for the underlying AtomicBase methods. michael@0: operator bool() const { return Base::Intrinsics::load(Base::mValue); } michael@0: bool operator=(bool aValue) { return Base::operator=(aValue); } michael@0: bool exchange(bool aValue) { return Base::exchange(aValue); } michael@0: bool compareExchange(bool aOldValue, bool aNewValue) { michael@0: return Base::compareExchange(aOldValue, aNewValue); michael@0: } michael@0: michael@0: private: michael@0: Atomic(Atomic& aOther) MOZ_DELETE; michael@0: }; michael@0: michael@0: } // namespace mozilla michael@0: michael@0: #endif /* mozilla_Atomics_h */