mfbt/Atomics.h

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 /*
     8  * Implements (almost always) lock-free atomic operations. The operations here
     9  * are a subset of that which can be found in C++11's <atomic> header, with a
    10  * different API to enforce consistent memory ordering constraints.
    11  *
    12  * Anyone caught using |volatile| for inter-thread memory safety needs to be
    13  * sent a copy of this header and the C++11 standard.
    14  */
    16 #ifndef mozilla_Atomics_h
    17 #define mozilla_Atomics_h
    19 #include "mozilla/Assertions.h"
    20 #include "mozilla/Attributes.h"
    21 #include "mozilla/Compiler.h"
    22 #include "mozilla/TypeTraits.h"
    24 #include <stdint.h>
    26 /*
    27  * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
    28  * does not have <atomic>.  So be sure to check for <atomic> support
    29  * along with C++0x support.
    30  */
    31 #if defined(__clang__) || defined(__GNUC__)
    32    /*
    33     * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
    34     * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
    35     * definitions for unspecialized std::atomic and causes linking errors.
    36     * Therefore, we require at least 4.7.0 for using libstdc++.
    37     */
    38 #  if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
    39 #    define MOZ_HAVE_CXX11_ATOMICS
    40 #  elif MOZ_USING_LIBCXX
    41 #    define MOZ_HAVE_CXX11_ATOMICS
    42 #  endif
    43 #elif defined(_MSC_VER) && _MSC_VER >= 1700
    44 #  if defined(DEBUG)
    45      /*
    46       * Provide our own failure code since we're having trouble linking to
    47       * std::_Debug_message (bug 982310).
    48       */
    49 #    define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order")
    50 #  endif
    51 #  define MOZ_HAVE_CXX11_ATOMICS
    52 #endif
    54 namespace mozilla {
    56 /**
    57  * An enum of memory ordering possibilities for atomics.
    58  *
    59  * Memory ordering is the observable state of distinct values in memory.
    60  * (It's a separate concept from atomicity, which concerns whether an
    61  * operation can ever be observed in an intermediate state.  Don't
    62  * conflate the two!)  Given a sequence of operations in source code on
    63  * memory, it is *not* always the case that, at all times and on all
    64  * cores, those operations will appear to have occurred in that exact
    65  * sequence.  First, the compiler might reorder that sequence, if it
    66  * thinks another ordering will be more efficient.  Second, the CPU may
    67  * not expose so consistent a view of memory.  CPUs will often perform
    68  * their own instruction reordering, above and beyond that performed by
    69  * the compiler.  And each core has its own memory caches, and accesses
    70  * (reads and writes both) to "memory" may only resolve to out-of-date
    71  * cache entries -- not to the "most recently" performed operation in
    72  * some global sense.  Any access to a value that may be used by
    73  * multiple threads, potentially across multiple cores, must therefore
    74  * have a memory ordering imposed on it, for all code on all
    75  * threads/cores to have a sufficiently coherent worldview.
    76  *
    77  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
    78  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
    79  * detail on all this, including examples of how each mode works.
    80  *
    81  * Note that for simplicity and practicality, not all of the modes in
    82  * C++11 are supported.  The missing C++11 modes are either subsumed by
    83  * the modes we provide below, or not relevant for the CPUs we support
    84  * in Gecko.  These three modes are confusing enough as it is!
    85  */
    86 enum MemoryOrdering {
    87   /*
    88    * Relaxed ordering is the simplest memory ordering: none at all.
    89    * When the result of a write is observed, nothing may be inferred
    90    * about other memory.  Writes ostensibly performed "before" on the
    91    * writing thread may not yet be visible.  Writes performed "after" on
    92    * the writing thread may already be visible, if the compiler or CPU
    93    * reordered them.  (The latter can happen if reads and/or writes get
    94    * held up in per-processor caches.)  Relaxed ordering means
    95    * operations can always use cached values (as long as the actual
    96    * updates to atomic values actually occur, correctly, eventually), so
    97    * it's usually the fastest sort of atomic access.  For this reason,
    98    * *it's also the most dangerous kind of access*.
    99    *
   100    * Relaxed ordering is good for things like process-wide statistics
   101    * counters that don't need to be consistent with anything else, so
   102    * long as updates themselves are atomic.  (And so long as any
   103    * observations of that value can tolerate being out-of-date -- if you
   104    * need some sort of up-to-date value, you need some sort of other
   105    * synchronizing operation.)  It's *not* good for locks, mutexes,
   106    * reference counts, etc. that mediate access to other memory, or must
   107    * be observably consistent with other memory.
   108    *
   109    * x86 architectures don't take advantage of the optimization
   110    * opportunities that relaxed ordering permits.  Thus it's possible
   111    * that using relaxed ordering will "work" on x86 but fail elsewhere
   112    * (ARM, say, which *does* implement non-sequentially-consistent
   113    * relaxed ordering semantics).  Be extra-careful using relaxed
   114    * ordering if you can't easily test non-x86 architectures!
   115    */
   116   Relaxed,
   117   /*
   118    * When an atomic value is updated with ReleaseAcquire ordering, and
   119    * that new value is observed with ReleaseAcquire ordering, prior
   120    * writes (atomic or not) are also observable.  What ReleaseAcquire
   121    * *doesn't* give you is any observable ordering guarantees for
   122    * ReleaseAcquire-ordered operations on different objects.  For
   123    * example, if there are two cores that each perform ReleaseAcquire
   124    * operations on separate objects, each core may or may not observe
   125    * the operations made by the other core.  The only way the cores can
   126    * be synchronized with ReleaseAcquire is if they both
   127    * ReleaseAcquire-access the same object.  This implies that you can't
   128    * necessarily describe some global total ordering of ReleaseAcquire
   129    * operations.
   130    *
   131    * ReleaseAcquire ordering is good for (as the name implies) atomic
   132    * operations on values controlling ownership of things: reference
   133    * counts, mutexes, and the like.  However, if you are thinking about
   134    * using these to implement your own locks or mutexes, you should take
   135    * a good, hard look at actual lock or mutex primitives first.
   136    */
   137   ReleaseAcquire,
   138   /*
   139    * When an atomic value is updated with SequentiallyConsistent
   140    * ordering, all writes observable when the update is observed, just
   141    * as with ReleaseAcquire ordering.  But, furthermore, a global total
   142    * ordering of SequentiallyConsistent operations *can* be described.
   143    * For example, if two cores perform SequentiallyConsistent operations
   144    * on separate objects, one core will observably perform its update
   145    * (and all previous operations will have completed), then the other
   146    * core will observably perform its update (and all previous
   147    * operations will have completed).  (Although those previous
   148    * operations aren't themselves ordered -- they could be intermixed,
   149    * or ordered if they occur on atomic values with ordering
   150    * requirements.)  SequentiallyConsistent is the *simplest and safest*
   151    * ordering of atomic operations -- it's always as if one operation
   152    * happens, then another, then another, in some order -- and every
   153    * core observes updates to happen in that single order.  Because it
   154    * has the most synchronization requirements, operations ordered this
   155    * way also tend to be slowest.
   156    *
   157    * SequentiallyConsistent ordering can be desirable when multiple
   158    * threads observe objects, and they all have to agree on the
   159    * observable order of changes to them.  People expect
   160    * SequentiallyConsistent ordering, even if they shouldn't, when
   161    * writing code, atomic or otherwise.  SequentiallyConsistent is also
   162    * the ordering of choice when designing lockless data structures.  If
   163    * you don't know what order to use, use this one.
   164    */
   165   SequentiallyConsistent,
   166 };
   168 } // namespace mozilla
   170 // Build up the underlying intrinsics.
   171 #ifdef MOZ_HAVE_CXX11_ATOMICS
   173 #  include <atomic>
   175 namespace mozilla {
   176 namespace detail {
   178 /*
   179  * We provide CompareExchangeFailureOrder to work around a bug in some
   180  * versions of GCC's <atomic> header.  See bug 898491.
   181  */
   182 template<MemoryOrdering Order> struct AtomicOrderConstraints;
   184 template<>
   185 struct AtomicOrderConstraints<Relaxed>
   186 {
   187     static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
   188     static const std::memory_order LoadOrder = std::memory_order_relaxed;
   189     static const std::memory_order StoreOrder = std::memory_order_relaxed;
   190     static const std::memory_order CompareExchangeFailureOrder =
   191       std::memory_order_relaxed;
   192 };
   194 template<>
   195 struct AtomicOrderConstraints<ReleaseAcquire>
   196 {
   197     static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
   198     static const std::memory_order LoadOrder = std::memory_order_acquire;
   199     static const std::memory_order StoreOrder = std::memory_order_release;
   200     static const std::memory_order CompareExchangeFailureOrder =
   201       std::memory_order_acquire;
   202 };
   204 template<>
   205 struct AtomicOrderConstraints<SequentiallyConsistent>
   206 {
   207     static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
   208     static const std::memory_order LoadOrder = std::memory_order_seq_cst;
   209     static const std::memory_order StoreOrder = std::memory_order_seq_cst;
   210     static const std::memory_order CompareExchangeFailureOrder =
   211       std::memory_order_seq_cst;
   212 };
   214 template<typename T, MemoryOrdering Order>
   215 struct IntrinsicBase
   216 {
   217     typedef std::atomic<T> ValueType;
   218     typedef AtomicOrderConstraints<Order> OrderedOp;
   219 };
   221 template<typename T, MemoryOrdering Order>
   222 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
   223 {
   224     typedef IntrinsicBase<T, Order> Base;
   225     static T load(const typename Base::ValueType& ptr) {
   226       return ptr.load(Base::OrderedOp::LoadOrder);
   227     }
   228     static void store(typename Base::ValueType& ptr, T val) {
   229       ptr.store(val, Base::OrderedOp::StoreOrder);
   230     }
   231     static T exchange(typename Base::ValueType& ptr, T val) {
   232       return ptr.exchange(val, Base::OrderedOp::AtomicRMWOrder);
   233     }
   234     static bool compareExchange(typename Base::ValueType& ptr, T oldVal, T newVal) {
   235       return ptr.compare_exchange_strong(oldVal, newVal,
   236                                          Base::OrderedOp::AtomicRMWOrder,
   237                                          Base::OrderedOp::CompareExchangeFailureOrder);
   238     }
   239 };
   241 template<typename T, MemoryOrdering Order>
   242 struct IntrinsicAddSub : public IntrinsicBase<T, Order>
   243 {
   244     typedef IntrinsicBase<T, Order> Base;
   245     static T add(typename Base::ValueType& ptr, T val) {
   246       return ptr.fetch_add(val, Base::OrderedOp::AtomicRMWOrder);
   247     }
   248     static T sub(typename Base::ValueType& ptr, T val) {
   249       return ptr.fetch_sub(val, Base::OrderedOp::AtomicRMWOrder);
   250     }
   251 };
   253 template<typename T, MemoryOrdering Order>
   254 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
   255 {
   256     typedef IntrinsicBase<T*, Order> Base;
   257     static T* add(typename Base::ValueType& ptr, ptrdiff_t val) {
   258       return ptr.fetch_add(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
   259     }
   260     static T* sub(typename Base::ValueType& ptr, ptrdiff_t val) {
   261       return ptr.fetch_sub(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
   262     }
   263   private:
   264     /*
   265      * GCC 4.6's <atomic> header has a bug where adding X to an
   266      * atomic<T*> is not the same as adding X to a T*.  Hence the need
   267      * for this function to provide the correct addend.
   268      */
   269     static ptrdiff_t fixupAddend(ptrdiff_t val) {
   270 #if defined(__clang__) || defined(_MSC_VER)
   271       return val;
   272 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
   273       !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
   274       return val * sizeof(T);
   275 #else
   276       return val;
   277 #endif
   278     }
   279 };
   281 template<typename T, MemoryOrdering Order>
   282 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
   283 {
   284     typedef IntrinsicBase<T, Order> Base;
   285     static T inc(typename Base::ValueType& ptr) {
   286       return IntrinsicAddSub<T, Order>::add(ptr, 1);
   287     }
   288     static T dec(typename Base::ValueType& ptr) {
   289       return IntrinsicAddSub<T, Order>::sub(ptr, 1);
   290     }
   291 };
   293 template<typename T, MemoryOrdering Order>
   294 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
   295                           public IntrinsicIncDec<T, Order>
   296 {
   297     typedef IntrinsicBase<T, Order> Base;
   298     static T or_(typename Base::ValueType& ptr, T val) {
   299       return ptr.fetch_or(val, Base::OrderedOp::AtomicRMWOrder);
   300     }
   301     static T xor_(typename Base::ValueType& ptr, T val) {
   302       return ptr.fetch_xor(val, Base::OrderedOp::AtomicRMWOrder);
   303     }
   304     static T and_(typename Base::ValueType& ptr, T val) {
   305       return ptr.fetch_and(val, Base::OrderedOp::AtomicRMWOrder);
   306     }
   307 };
   309 template<typename T, MemoryOrdering Order>
   310 struct AtomicIntrinsics<T*, Order>
   311   : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
   312 {
   313 };
   315 } // namespace detail
   316 } // namespace mozilla
   318 #elif defined(__GNUC__)
   320 namespace mozilla {
   321 namespace detail {
   323 /*
   324  * The __sync_* family of intrinsics is documented here:
   325  *
   326  * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
   327  *
   328  * While these intrinsics are deprecated in favor of the newer __atomic_*
   329  * family of intrincs:
   330  *
   331  * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
   332  *
   333  * any GCC version that supports the __atomic_* intrinsics will also support
   334  * the <atomic> header and so will be handled above.  We provide a version of
   335  * atomics using the __sync_* intrinsics to support older versions of GCC.
   336  *
   337  * All __sync_* intrinsics that we use below act as full memory barriers, for
   338  * both compiler and hardware reordering, except for __sync_lock_test_and_set,
   339  * which is a only an acquire barrier.  When we call __sync_lock_test_and_set,
   340  * we add a barrier above it as appropriate.
   341  */
   343 template<MemoryOrdering Order> struct Barrier;
   345 /*
   346  * Some processors (in particular, x86) don't require quite so many calls to
   347  * __sync_sychronize as our specializations of Barrier produce.  If
   348  * performance turns out to be an issue, defining these specializations
   349  * on a per-processor basis would be a good first tuning step.
   350  */
   352 template<>
   353 struct Barrier<Relaxed>
   354 {
   355     static void beforeLoad() {}
   356     static void afterLoad() {}
   357     static void beforeStore() {}
   358     static void afterStore() {}
   359 };
   361 template<>
   362 struct Barrier<ReleaseAcquire>
   363 {
   364     static void beforeLoad() {}
   365     static void afterLoad() { __sync_synchronize(); }
   366     static void beforeStore() { __sync_synchronize(); }
   367     static void afterStore() {}
   368 };
   370 template<>
   371 struct Barrier<SequentiallyConsistent>
   372 {
   373     static void beforeLoad() { __sync_synchronize(); }
   374     static void afterLoad() { __sync_synchronize(); }
   375     static void beforeStore() { __sync_synchronize(); }
   376     static void afterStore() { __sync_synchronize(); }
   377 };
   379 template<typename T, MemoryOrdering Order>
   380 struct IntrinsicMemoryOps
   381 {
   382     static T load(const T& ptr) {
   383       Barrier<Order>::beforeLoad();
   384       T val = ptr;
   385       Barrier<Order>::afterLoad();
   386       return val;
   387     }
   388     static void store(T& ptr, T val) {
   389       Barrier<Order>::beforeStore();
   390       ptr = val;
   391       Barrier<Order>::afterStore();
   392     }
   393     static T exchange(T& ptr, T val) {
   394       // __sync_lock_test_and_set is only an acquire barrier; loads and stores
   395       // can't be moved up from after to before it, but they can be moved down
   396       // from before to after it.  We may want a stricter ordering, so we need
   397       // an explicit barrier.
   399       Barrier<Order>::beforeStore();
   400       return __sync_lock_test_and_set(&ptr, val);
   401     }
   402     static bool compareExchange(T& ptr, T oldVal, T newVal) {
   403       return __sync_bool_compare_and_swap(&ptr, oldVal, newVal);
   404     }
   405 };
   407 template<typename T>
   408 struct IntrinsicAddSub
   409 {
   410     typedef T ValueType;
   411     static T add(T& ptr, T val) {
   412       return __sync_fetch_and_add(&ptr, val);
   413     }
   414     static T sub(T& ptr, T val) {
   415       return __sync_fetch_and_sub(&ptr, val);
   416     }
   417 };
   419 template<typename T>
   420 struct IntrinsicAddSub<T*>
   421 {
   422     typedef T* ValueType;
   423     /*
   424      * The reinterpret_casts are needed so that
   425      * __sync_fetch_and_{add,sub} will properly type-check.
   426      *
   427      * Also, these functions do not provide standard semantics for
   428      * pointer types, so we need to adjust the addend.
   429      */
   430     static ValueType add(ValueType& ptr, ptrdiff_t val) {
   431       ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
   432       return __sync_fetch_and_add(&ptr, amount);
   433     }
   434     static ValueType sub(ValueType& ptr, ptrdiff_t val) {
   435       ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
   436       return __sync_fetch_and_sub(&ptr, amount);
   437     }
   438 };
   440 template<typename T>
   441 struct IntrinsicIncDec : public IntrinsicAddSub<T>
   442 {
   443     static T inc(T& ptr) { return IntrinsicAddSub<T>::add(ptr, 1); }
   444     static T dec(T& ptr) { return IntrinsicAddSub<T>::sub(ptr, 1); }
   445 };
   447 template<typename T, MemoryOrdering Order>
   448 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
   449                           public IntrinsicIncDec<T>
   450 {
   451     static T or_(T& ptr, T val) {
   452       return __sync_fetch_and_or(&ptr, val);
   453     }
   454     static T xor_(T& ptr, T val) {
   455       return __sync_fetch_and_xor(&ptr, val);
   456     }
   457     static T and_(T& ptr, T val) {
   458       return __sync_fetch_and_and(&ptr, val);
   459     }
   460 };
   462 template<typename T, MemoryOrdering Order>
   463 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
   464                                      public IntrinsicIncDec<T*>
   465 {
   466 };
   468 } // namespace detail
   469 } // namespace mozilla
   471 #elif defined(_MSC_VER)
   473 /*
   474  * Windows comes with a full complement of atomic operations.
   475  * Unfortunately, most of those aren't available for Windows XP (even if
   476  * the compiler supports intrinsics for them), which is the oldest
   477  * version of Windows we support.  Therefore, we only provide operations
   478  * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
   479  * versions, we support 64-bit datatypes as well.
   480  *
   481  * To avoid namespace pollution issues, we declare whatever functions we
   482  * need ourselves.
   483  */
   485 extern "C" {
   486 long __cdecl _InterlockedExchangeAdd(long volatile* dst, long value);
   487 long __cdecl _InterlockedOr(long volatile* dst, long value);
   488 long __cdecl _InterlockedXor(long volatile* dst, long value);
   489 long __cdecl _InterlockedAnd(long volatile* dst, long value);
   490 long __cdecl _InterlockedExchange(long volatile *dst, long value);
   491 long __cdecl _InterlockedCompareExchange(long volatile *dst, long newVal, long oldVal);
   492 }
   494 #  pragma intrinsic(_InterlockedExchangeAdd)
   495 #  pragma intrinsic(_InterlockedOr)
   496 #  pragma intrinsic(_InterlockedXor)
   497 #  pragma intrinsic(_InterlockedAnd)
   498 #  pragma intrinsic(_InterlockedExchange)
   499 #  pragma intrinsic(_InterlockedCompareExchange)
   501 namespace mozilla {
   502 namespace detail {
   504 #  if !defined(_M_IX86) && !defined(_M_X64)
   505      /*
   506       * The implementations below are optimized for x86ish systems.  You
   507       * will have to modify them if you are porting to Windows on a
   508       * different architecture.
   509       */
   510 #    error "Unknown CPU type"
   511 #  endif
   513 /*
   514  * The PrimitiveIntrinsics template should define |Type|, the datatype of size
   515  * DataSize upon which we operate, and the following eight functions.
   516  *
   517  * static Type add(Type* ptr, Type val);
   518  * static Type sub(Type* ptr, Type val);
   519  * static Type or_(Type* ptr, Type val);
   520  * static Type xor_(Type* ptr, Type val);
   521  * static Type and_(Type* ptr, Type val);
   522  *
   523  *   These functions perform the obvious operation on the value contained in
   524  *   |*ptr| combined with |val| and return the value previously stored in
   525  *   |*ptr|.
   526  *
   527  * static void store(Type* ptr, Type val);
   528  *
   529  *   This function atomically stores |val| into |*ptr| and must provide a full
   530  *   memory fence after the store to prevent compiler and hardware instruction
   531  *   reordering.  It should also act as a compiler barrier to prevent reads and
   532  *   writes from moving to after the store.
   533  *
   534  * static Type exchange(Type* ptr, Type val);
   535  *
   536  *   This function atomically stores |val| into |*ptr| and returns the previous
   537  *   contents of *ptr;
   538  *
   539  * static bool compareExchange(Type* ptr, Type oldVal, Type newVal);
   540  *
   541  *   This function atomically performs the following operation:
   542  *
   543  *     if (*ptr == oldVal) {
   544  *       *ptr = newVal;
   545  *       return true;
   546  *     } else {
   547  *       return false;
   548  *     }
   549  *
   550  */
   551 template<size_t DataSize> struct PrimitiveIntrinsics;
   553 template<>
   554 struct PrimitiveIntrinsics<4>
   555 {
   556     typedef long Type;
   558     static Type add(Type* ptr, Type val) {
   559       return _InterlockedExchangeAdd(ptr, val);
   560     }
   561     static Type sub(Type* ptr, Type val) {
   562       /*
   563        * _InterlockedExchangeSubtract isn't available before Windows 7,
   564        * and we must support Windows XP.
   565        */
   566       return _InterlockedExchangeAdd(ptr, -val);
   567     }
   568     static Type or_(Type* ptr, Type val) {
   569       return _InterlockedOr(ptr, val);
   570     }
   571     static Type xor_(Type* ptr, Type val) {
   572       return _InterlockedXor(ptr, val);
   573     }
   574     static Type and_(Type* ptr, Type val) {
   575       return _InterlockedAnd(ptr, val);
   576     }
   577     static void store(Type* ptr, Type val) {
   578       _InterlockedExchange(ptr, val);
   579     }
   580     static Type exchange(Type* ptr, Type val) {
   581       return _InterlockedExchange(ptr, val);
   582     }
   583     static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {
   584       return _InterlockedCompareExchange(ptr, newVal, oldVal) == oldVal;
   585     }
   586 };
   588 #  if defined(_M_X64)
   590 extern "C" {
   591 long long __cdecl _InterlockedExchangeAdd64(long long volatile* dst,
   592                                             long long value);
   593 long long __cdecl _InterlockedOr64(long long volatile* dst,
   594                                    long long value);
   595 long long __cdecl _InterlockedXor64(long long volatile* dst,
   596                                     long long value);
   597 long long __cdecl _InterlockedAnd64(long long volatile* dst,
   598                                     long long value);
   599 long long __cdecl _InterlockedExchange64(long long volatile* dst,
   600                                          long long value);
   601 long long __cdecl _InterlockedCompareExchange64(long long volatile* dst,
   602                                                 long long newVal,
   603                                                 long long oldVal);
   604 }
   606 #    pragma intrinsic(_InterlockedExchangeAdd64)
   607 #    pragma intrinsic(_InterlockedOr64)
   608 #    pragma intrinsic(_InterlockedXor64)
   609 #    pragma intrinsic(_InterlockedAnd64)
   610 #    pragma intrinsic(_InterlockedExchange64)
   611 #    pragma intrinsic(_InterlockedCompareExchange64)
   613 template <>
   614 struct PrimitiveIntrinsics<8>
   615 {
   616     typedef __int64 Type;
   618     static Type add(Type* ptr, Type val) {
   619       return _InterlockedExchangeAdd64(ptr, val);
   620     }
   621     static Type sub(Type* ptr, Type val) {
   622       /*
   623        * There is no _InterlockedExchangeSubtract64.
   624        */
   625       return _InterlockedExchangeAdd64(ptr, -val);
   626     }
   627     static Type or_(Type* ptr, Type val) {
   628       return _InterlockedOr64(ptr, val);
   629     }
   630     static Type xor_(Type* ptr, Type val) {
   631       return _InterlockedXor64(ptr, val);
   632     }
   633     static Type and_(Type* ptr, Type val) {
   634       return _InterlockedAnd64(ptr, val);
   635     }
   636     static void store(Type* ptr, Type val) {
   637       _InterlockedExchange64(ptr, val);
   638     }
   639     static Type exchange(Type* ptr, Type val) {
   640       return _InterlockedExchange64(ptr, val);
   641     }
   642     static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {
   643       return _InterlockedCompareExchange64(ptr, newVal, oldVal) == oldVal;
   644     }
   645 };
   647 #  endif
   649 extern "C" { void _ReadWriteBarrier(); }
   651 #  pragma intrinsic(_ReadWriteBarrier)
   653 template<MemoryOrdering Order> struct Barrier;
   655 /*
   656  * We do not provide an afterStore method in Barrier, as Relaxed and
   657  * ReleaseAcquire orderings do not require one, and the required barrier
   658  * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
   659  */
   661 template<>
   662 struct Barrier<Relaxed>
   663 {
   664     static void beforeLoad() {}
   665     static void afterLoad() {}
   666     static void beforeStore() {}
   667 };
   669 template<>
   670 struct Barrier<ReleaseAcquire>
   671 {
   672     static void beforeLoad() {}
   673     static void afterLoad() { _ReadWriteBarrier(); }
   674     static void beforeStore() { _ReadWriteBarrier(); }
   675 };
   677 template<>
   678 struct Barrier<SequentiallyConsistent>
   679 {
   680     static void beforeLoad() { _ReadWriteBarrier(); }
   681     static void afterLoad() { _ReadWriteBarrier(); }
   682     static void beforeStore() { _ReadWriteBarrier(); }
   683 };
   685 template<typename PrimType, typename T>
   686 struct CastHelper
   687 {
   688   static PrimType toPrimType(T val) { return static_cast<PrimType>(val); }
   689   static T fromPrimType(PrimType val) { return static_cast<T>(val); }
   690 };
   692 template<typename PrimType, typename T>
   693 struct CastHelper<PrimType, T*>
   694 {
   695   static PrimType toPrimType(T* val) { return reinterpret_cast<PrimType>(val); }
   696   static T* fromPrimType(PrimType val) { return reinterpret_cast<T*>(val); }
   697 };
   699 template<typename T>
   700 struct IntrinsicBase
   701 {
   702     typedef T ValueType;
   703     typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
   704     typedef typename Primitives::Type PrimType;
   705     static_assert(sizeof(PrimType) == sizeof(T),
   706                   "Selection of PrimitiveIntrinsics was wrong");
   707     typedef CastHelper<PrimType, T> Cast;
   708 };
   710 template<typename T, MemoryOrdering Order>
   711 struct IntrinsicMemoryOps : public IntrinsicBase<T>
   712 {
   713     typedef typename IntrinsicBase<T>::ValueType ValueType;
   714     typedef typename IntrinsicBase<T>::Primitives Primitives;
   715     typedef typename IntrinsicBase<T>::PrimType PrimType;
   716     typedef typename IntrinsicBase<T>::Cast Cast;
   717     static ValueType load(const ValueType& ptr) {
   718       Barrier<Order>::beforeLoad();
   719       ValueType val = ptr;
   720       Barrier<Order>::afterLoad();
   721       return val;
   722     }
   723     static void store(ValueType& ptr, ValueType val) {
   724       // For SequentiallyConsistent, Primitives::store() will generate the
   725       // proper memory fence.  Everything else just needs a barrier before
   726       // the store.
   727       if (Order == SequentiallyConsistent) {
   728         Primitives::store(reinterpret_cast<PrimType*>(&ptr),
   729                           Cast::toPrimType(val));
   730       } else {
   731         Barrier<Order>::beforeStore();
   732         ptr = val;
   733       }
   734     }
   735     static ValueType exchange(ValueType& ptr, ValueType val) {
   736       PrimType oldval =
   737         Primitives::exchange(reinterpret_cast<PrimType*>(&ptr),
   738                              Cast::toPrimType(val));
   739       return Cast::fromPrimType(oldval);
   740     }
   741     static bool compareExchange(ValueType& ptr, ValueType oldVal, ValueType newVal) {
   742       return Primitives::compareExchange(reinterpret_cast<PrimType*>(&ptr),
   743                                          Cast::toPrimType(oldVal),
   744                                          Cast::toPrimType(newVal));
   745     }
   746 };
   748 template<typename T>
   749 struct IntrinsicApplyHelper : public IntrinsicBase<T>
   750 {
   751     typedef typename IntrinsicBase<T>::ValueType ValueType;
   752     typedef typename IntrinsicBase<T>::PrimType PrimType;
   753     typedef typename IntrinsicBase<T>::Cast Cast;
   754     typedef PrimType (*BinaryOp)(PrimType*, PrimType);
   755     typedef PrimType (*UnaryOp)(PrimType*);
   757     static ValueType applyBinaryFunction(BinaryOp op, ValueType& ptr,
   758                                          ValueType val) {
   759       PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
   760       PrimType primTypeVal = Cast::toPrimType(val);
   761       return Cast::fromPrimType(op(primTypePtr, primTypeVal));
   762     }
   764     static ValueType applyUnaryFunction(UnaryOp op, ValueType& ptr) {
   765       PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
   766       return Cast::fromPrimType(op(primTypePtr));
   767     }
   768 };
   770 template<typename T>
   771 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
   772 {
   773     typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;
   774     typedef typename IntrinsicBase<T>::Primitives Primitives;
   775     static ValueType add(ValueType& ptr, ValueType val) {
   776       return applyBinaryFunction(&Primitives::add, ptr, val);
   777     }
   778     static ValueType sub(ValueType& ptr, ValueType val) {
   779       return applyBinaryFunction(&Primitives::sub, ptr, val);
   780     }
   781 };
   783 template<typename T>
   784 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
   785 {
   786     typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;
   787     static ValueType add(ValueType& ptr, ptrdiff_t amount) {
   788       return applyBinaryFunction(&Primitives::add, ptr,
   789                                  (ValueType)(amount * sizeof(ValueType)));
   790     }
   791     static ValueType sub(ValueType& ptr, ptrdiff_t amount) {
   792       return applyBinaryFunction(&Primitives::sub, ptr,
   793                                  (ValueType)(amount * sizeof(ValueType)));
   794     }
   795 };
   797 template<typename T>
   798 struct IntrinsicIncDec : public IntrinsicAddSub<T>
   799 {
   800     typedef typename IntrinsicAddSub<T>::ValueType ValueType;
   801     static ValueType inc(ValueType& ptr) { return add(ptr, 1); }
   802     static ValueType dec(ValueType& ptr) { return sub(ptr, 1); }
   803 };
   805 template<typename T, MemoryOrdering Order>
   806 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
   807                           public IntrinsicIncDec<T>
   808 {
   809     typedef typename IntrinsicIncDec<T>::ValueType ValueType;
   810     static ValueType or_(ValueType& ptr, T val) {
   811       return applyBinaryFunction(&Primitives::or_, ptr, val);
   812     }
   813     static ValueType xor_(ValueType& ptr, T val) {
   814       return applyBinaryFunction(&Primitives::xor_, ptr, val);
   815     }
   816     static ValueType and_(ValueType& ptr, T val) {
   817       return applyBinaryFunction(&Primitives::and_, ptr, val);
   818     }
   819 };
   821 template<typename T, MemoryOrdering Order>
   822 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
   823                                      public IntrinsicIncDec<T*>
   824 {
   825     typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;
   826 };
   828 } // namespace detail
   829 } // namespace mozilla
   831 #else
   832 # error "Atomic compiler intrinsics are not supported on your platform"
   833 #endif
   835 namespace mozilla {
   837 namespace detail {
   839 template<typename T, MemoryOrdering Order>
   840 class AtomicBase
   841 {
   842     // We only support 32-bit types on 32-bit Windows, which constrains our
   843     // implementation elsewhere.  But we support pointer-sized types everywhere.
   844     static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
   845                   "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
   847   protected:
   848     typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
   849     typename Intrinsics::ValueType mValue;
   851   public:
   852     MOZ_CONSTEXPR AtomicBase() : mValue() {}
   853     MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}
   855     // Note: we can't provide operator T() here because Atomic<bool> inherits
   856     // from AtomcBase with T=uint32_t and not T=bool. If we implemented
   857     // operator T() here, it would cause errors when comparing Atomic<bool> with
   858     // a regular bool.
   860     T operator=(T aValue) {
   861       Intrinsics::store(mValue, aValue);
   862       return aValue;
   863     }
   865     /**
   866      * Performs an atomic swap operation.  aValue is stored and the previous
   867      * value of this variable is returned.
   868      */
   869     T exchange(T aValue) {
   870       return Intrinsics::exchange(mValue, aValue);
   871     }
   873     /**
   874      * Performs an atomic compare-and-swap operation and returns true if it
   875      * succeeded. This is equivalent to atomically doing
   876      *
   877      *   if (mValue == aOldValue) {
   878      *     mValue = aNewValue;
   879      *     return true;
   880      *   } else {
   881      *     return false;
   882      *   }
   883      */
   884     bool compareExchange(T aOldValue, T aNewValue) {
   885       return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
   886     }
   888   private:
   889     template<MemoryOrdering AnyOrder>
   890     AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
   891 };
   893 template<typename T, MemoryOrdering Order>
   894 class AtomicBaseIncDec : public AtomicBase<T, Order>
   895 {
   896     typedef typename detail::AtomicBase<T, Order> Base;
   898   public:
   899     MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}
   900     MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}
   902     using Base::operator=;
   904     operator T() const { return Base::Intrinsics::load(Base::mValue); }
   905     T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
   906     T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
   907     T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
   908     T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
   910   private:
   911     template<MemoryOrdering AnyOrder>
   912     AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) MOZ_DELETE;
   913 };
   915 } // namespace detail
   917 /**
   918  * A wrapper for a type that enforces that all memory accesses are atomic.
   919  *
   920  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
   921  * its place.  Implementations for integral and pointer types are provided
   922  * below.
   923  *
   924  * Atomic accesses are sequentially consistent by default.  You should
   925  * use the default unless you are tall enough to ride the
   926  * memory-ordering roller coaster (if you're not sure, you aren't) and
   927  * you have a compelling reason to do otherwise.
   928  *
   929  * There is one exception to the case of atomic memory accesses: providing an
   930  * initial value of the atomic value is not guaranteed to be atomic.  This is a
   931  * deliberate design choice that enables static atomic variables to be declared
   932  * without introducing extra static constructors.
   933  */
   934 template<typename T,
   935          MemoryOrdering Order = SequentiallyConsistent,
   936          typename Enable = void>
   937 class Atomic;
   939 /**
   940  * Atomic<T> implementation for integral types.
   941  *
   942  * In addition to atomic store and load operations, compound assignment and
   943  * increment/decrement operators are implemented which perform the
   944  * corresponding read-modify-write operation atomically.  Finally, an atomic
   945  * swap method is provided.
   946  */
   947 template<typename T, MemoryOrdering Order>
   948 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value && !IsSame<T, bool>::value>::Type>
   949   : public detail::AtomicBaseIncDec<T, Order>
   950 {
   951     typedef typename detail::AtomicBaseIncDec<T, Order> Base;
   953   public:
   954     MOZ_CONSTEXPR Atomic() : Base() {}
   955     MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
   957     using Base::operator=;
   959     T operator+=(T delta) { return Base::Intrinsics::add(Base::mValue, delta) + delta; }
   960     T operator-=(T delta) { return Base::Intrinsics::sub(Base::mValue, delta) - delta; }
   961     T operator|=(T val) { return Base::Intrinsics::or_(Base::mValue, val) | val; }
   962     T operator^=(T val) { return Base::Intrinsics::xor_(Base::mValue, val) ^ val; }
   963     T operator&=(T val) { return Base::Intrinsics::and_(Base::mValue, val) & val; }
   965   private:
   966     Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
   967 };
   969 /**
   970  * Atomic<T> implementation for pointer types.
   971  *
   972  * An atomic compare-and-swap primitive for pointer variables is provided, as
   973  * are atomic increment and decement operators.  Also provided are the compound
   974  * assignment operators for addition and subtraction. Atomic swap (via
   975  * exchange()) is included as well.
   976  */
   977 template<typename T, MemoryOrdering Order>
   978 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>
   979 {
   980     typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
   982   public:
   983     MOZ_CONSTEXPR Atomic() : Base() {}
   984     MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}
   986     using Base::operator=;
   988     T* operator+=(ptrdiff_t delta) {
   989       return Base::Intrinsics::add(Base::mValue, delta) + delta;
   990     }
   991     T* operator-=(ptrdiff_t delta) {
   992       return Base::Intrinsics::sub(Base::mValue, delta) - delta;
   993     }
   995   private:
   996     Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
   997 };
   999 /**
  1000  * Atomic<T> implementation for enum types.
  1002  * The atomic store and load operations and the atomic swap method is provided.
  1003  */
  1004 template<typename T, MemoryOrdering Order>
  1005 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
  1006   : public detail::AtomicBase<T, Order>
  1008     typedef typename detail::AtomicBase<T, Order> Base;
  1010   public:
  1011     MOZ_CONSTEXPR Atomic() : Base() {}
  1012     MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
  1014     operator T() const { return Base::Intrinsics::load(Base::mValue); }
  1016     using Base::operator=;
  1018   private:
  1019     Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
  1020 };
  1022 /**
  1023  * Atomic<T> implementation for boolean types.
  1025  * The atomic store and load operations and the atomic swap method is provided.
  1027  * Note:
  1029  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
  1030  *   bool and/or some implementations of std::atomic. This is allowed in
  1031  *   [atomic.types.generic]p9.
  1033  * - It's not obvious whether the 8-bit atomic functions on Windows are always
  1034  *   inlined or not. If they are not inlined, the corresponding functions in the
  1035  *   runtime library are not available on Windows XP. This is why we implement
  1036  *   Atomic<bool> with an underlying type of uint32_t.
  1037  */
  1038 template<MemoryOrdering Order>
  1039 class Atomic<bool, Order>
  1040   : protected detail::AtomicBase<uint32_t, Order>
  1042     typedef typename detail::AtomicBase<uint32_t, Order> Base;
  1044   public:
  1045     MOZ_CONSTEXPR Atomic() : Base() {}
  1046     MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {}
  1048     // We provide boolean wrappers for the underlying AtomicBase methods.
  1049     operator bool() const { return Base::Intrinsics::load(Base::mValue); }
  1050     bool operator=(bool aValue) { return Base::operator=(aValue); }
  1051     bool exchange(bool aValue) { return Base::exchange(aValue); }
  1052     bool compareExchange(bool aOldValue, bool aNewValue) {
  1053       return Base::compareExchange(aOldValue, aNewValue);
  1056   private:
  1057     Atomic(Atomic<bool, Order>& aOther) MOZ_DELETE;
  1058 };
  1060 } // namespace mozilla
  1062 #endif /* mozilla_Atomics_h */

mercurial