The Tor Browser: mfbt/Atomics.h@97036ab72558

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

     2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */

     3 /* This Source Code Form is subject to the terms of the Mozilla Public

     4  * License, v. 2.0. If a copy of the MPL was not distributed with this

     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     7 /*

     8  * Implements (almost always) lock-free atomic operations. The operations here

     9  * are a subset of that which can be found in C++11's <atomic> header, with a

    10  * different API to enforce consistent memory ordering constraints.

    11  *

    12  * Anyone caught using |volatile| for inter-thread memory safety needs to be

    13  * sent a copy of this header and the C++11 standard.

    14  */

    16 #ifndef mozilla_Atomics_h

    17 #define mozilla_Atomics_h

    19 #include "mozilla/Assertions.h"

    20 #include "mozilla/Attributes.h"

    21 #include "mozilla/Compiler.h"

    22 #include "mozilla/TypeTraits.h"

    24 #include <stdint.h>

    26 /*

    27  * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK

    28  * does not have <atomic>.  So be sure to check for <atomic> support

    29  * along with C++0x support.

    30  */

    31 #if defined(__clang__) || defined(__GNUC__)

    32    /*

    33     * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the

    34     * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline

    35     * definitions for unspecialized std::atomic and causes linking errors.

    36     * Therefore, we require at least 4.7.0 for using libstdc++.

    37     */

    38 #  if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)

    39 #    define MOZ_HAVE_CXX11_ATOMICS

    40 #  elif MOZ_USING_LIBCXX

    41 #    define MOZ_HAVE_CXX11_ATOMICS

    42 #  endif

    43 #elif defined(_MSC_VER) && _MSC_VER >= 1700

    44 #  if defined(DEBUG)

    45      /*

    46       * Provide our own failure code since we're having trouble linking to

    47       * std::_Debug_message (bug 982310).

    48       */

    49 #    define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order")

    50 #  endif

    51 #  define MOZ_HAVE_CXX11_ATOMICS

    52 #endif

    54 namespace mozilla {

    56 /**

    57  * An enum of memory ordering possibilities for atomics.

    58  *

    59  * Memory ordering is the observable state of distinct values in memory.

    60  * (It's a separate concept from atomicity, which concerns whether an

    61  * operation can ever be observed in an intermediate state.  Don't

    62  * conflate the two!)  Given a sequence of operations in source code on

    63  * memory, it is *not* always the case that, at all times and on all

    64  * cores, those operations will appear to have occurred in that exact

    65  * sequence.  First, the compiler might reorder that sequence, if it

    66  * thinks another ordering will be more efficient.  Second, the CPU may

    67  * not expose so consistent a view of memory.  CPUs will often perform

    68  * their own instruction reordering, above and beyond that performed by

    69  * the compiler.  And each core has its own memory caches, and accesses

    70  * (reads and writes both) to "memory" may only resolve to out-of-date

    71  * cache entries -- not to the "most recently" performed operation in

    72  * some global sense.  Any access to a value that may be used by

    73  * multiple threads, potentially across multiple cores, must therefore

    74  * have a memory ordering imposed on it, for all code on all

    75  * threads/cores to have a sufficiently coherent worldview.

    76  *

    77  * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and

    78  * http://en.cppreference.com/w/cpp/atomic/memory_order go into more

    79  * detail on all this, including examples of how each mode works.

    80  *

    81  * Note that for simplicity and practicality, not all of the modes in

    82  * C++11 are supported.  The missing C++11 modes are either subsumed by

    83  * the modes we provide below, or not relevant for the CPUs we support

    84  * in Gecko.  These three modes are confusing enough as it is!

    85  */

    86 enum MemoryOrdering {

    87   /*

    88    * Relaxed ordering is the simplest memory ordering: none at all.

    89    * When the result of a write is observed, nothing may be inferred

    90    * about other memory.  Writes ostensibly performed "before" on the

    91    * writing thread may not yet be visible.  Writes performed "after" on

    92    * the writing thread may already be visible, if the compiler or CPU

    93    * reordered them.  (The latter can happen if reads and/or writes get

    94    * held up in per-processor caches.)  Relaxed ordering means

    95    * operations can always use cached values (as long as the actual

    96    * updates to atomic values actually occur, correctly, eventually), so

    97    * it's usually the fastest sort of atomic access.  For this reason,

    98    * *it's also the most dangerous kind of access*.

    99    *

   100    * Relaxed ordering is good for things like process-wide statistics

   101    * counters that don't need to be consistent with anything else, so

   102    * long as updates themselves are atomic.  (And so long as any

   103    * observations of that value can tolerate being out-of-date -- if you

   104    * need some sort of up-to-date value, you need some sort of other

   105    * synchronizing operation.)  It's *not* good for locks, mutexes,

   106    * reference counts, etc. that mediate access to other memory, or must

   107    * be observably consistent with other memory.

   108    *

   109    * x86 architectures don't take advantage of the optimization

   110    * opportunities that relaxed ordering permits.  Thus it's possible

   111    * that using relaxed ordering will "work" on x86 but fail elsewhere

   112    * (ARM, say, which *does* implement non-sequentially-consistent

   113    * relaxed ordering semantics).  Be extra-careful using relaxed

   114    * ordering if you can't easily test non-x86 architectures!

   115    */

   116   Relaxed,

   117   /*

   118    * When an atomic value is updated with ReleaseAcquire ordering, and

   119    * that new value is observed with ReleaseAcquire ordering, prior

   120    * writes (atomic or not) are also observable.  What ReleaseAcquire

   121    * *doesn't* give you is any observable ordering guarantees for

   122    * ReleaseAcquire-ordered operations on different objects.  For

   123    * example, if there are two cores that each perform ReleaseAcquire

   124    * operations on separate objects, each core may or may not observe

   125    * the operations made by the other core.  The only way the cores can

   126    * be synchronized with ReleaseAcquire is if they both

   127    * ReleaseAcquire-access the same object.  This implies that you can't

   128    * necessarily describe some global total ordering of ReleaseAcquire

   129    * operations.

   130    *

   131    * ReleaseAcquire ordering is good for (as the name implies) atomic

   132    * operations on values controlling ownership of things: reference

   133    * counts, mutexes, and the like.  However, if you are thinking about

   134    * using these to implement your own locks or mutexes, you should take

   135    * a good, hard look at actual lock or mutex primitives first.

   136    */

   137   ReleaseAcquire,

   138   /*

   139    * When an atomic value is updated with SequentiallyConsistent

   140    * ordering, all writes observable when the update is observed, just

   141    * as with ReleaseAcquire ordering.  But, furthermore, a global total

   142    * ordering of SequentiallyConsistent operations *can* be described.

   143    * For example, if two cores perform SequentiallyConsistent operations

   144    * on separate objects, one core will observably perform its update

   145    * (and all previous operations will have completed), then the other

   146    * core will observably perform its update (and all previous

   147    * operations will have completed).  (Although those previous

   148    * operations aren't themselves ordered -- they could be intermixed,

   149    * or ordered if they occur on atomic values with ordering

   150    * requirements.)  SequentiallyConsistent is the *simplest and safest*

   151    * ordering of atomic operations -- it's always as if one operation

   152    * happens, then another, then another, in some order -- and every

   153    * core observes updates to happen in that single order.  Because it

   154    * has the most synchronization requirements, operations ordered this

   155    * way also tend to be slowest.

   156    *

   157    * SequentiallyConsistent ordering can be desirable when multiple

   158    * threads observe objects, and they all have to agree on the

   159    * observable order of changes to them.  People expect

   160    * SequentiallyConsistent ordering, even if they shouldn't, when

   161    * writing code, atomic or otherwise.  SequentiallyConsistent is also

   162    * the ordering of choice when designing lockless data structures.  If

   163    * you don't know what order to use, use this one.

   164    */

   165   SequentiallyConsistent,

   166 };

   168 } // namespace mozilla

   170 // Build up the underlying intrinsics.

   171 #ifdef MOZ_HAVE_CXX11_ATOMICS

   173 #  include <atomic>

   175 namespace mozilla {

   176 namespace detail {

   178 /*

   179  * We provide CompareExchangeFailureOrder to work around a bug in some

   180  * versions of GCC's <atomic> header.  See bug 898491.

   181  */

   182 template<MemoryOrdering Order> struct AtomicOrderConstraints;

   184 template<>

   185 struct AtomicOrderConstraints<Relaxed>

   186 {

   187     static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;

   188     static const std::memory_order LoadOrder = std::memory_order_relaxed;

   189     static const std::memory_order StoreOrder = std::memory_order_relaxed;

   190     static const std::memory_order CompareExchangeFailureOrder =

   191       std::memory_order_relaxed;

   192 };

   194 template<>

   195 struct AtomicOrderConstraints<ReleaseAcquire>

   196 {

   197     static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;

   198     static const std::memory_order LoadOrder = std::memory_order_acquire;

   199     static const std::memory_order StoreOrder = std::memory_order_release;

   200     static const std::memory_order CompareExchangeFailureOrder =

   201       std::memory_order_acquire;

   202 };

   204 template<>

   205 struct AtomicOrderConstraints<SequentiallyConsistent>

   206 {

   207     static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;

   208     static const std::memory_order LoadOrder = std::memory_order_seq_cst;

   209     static const std::memory_order StoreOrder = std::memory_order_seq_cst;

   210     static const std::memory_order CompareExchangeFailureOrder =

   211       std::memory_order_seq_cst;

   212 };

   214 template<typename T, MemoryOrdering Order>

   215 struct IntrinsicBase

   216 {

   217     typedef std::atomic<T> ValueType;

   218     typedef AtomicOrderConstraints<Order> OrderedOp;

   219 };

   221 template<typename T, MemoryOrdering Order>

   222 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>

   223 {

   224     typedef IntrinsicBase<T, Order> Base;

   225     static T load(const typename Base::ValueType& ptr) {

   226       return ptr.load(Base::OrderedOp::LoadOrder);

   227     }

   228     static void store(typename Base::ValueType& ptr, T val) {

   229       ptr.store(val, Base::OrderedOp::StoreOrder);

   230     }

   231     static T exchange(typename Base::ValueType& ptr, T val) {

   232       return ptr.exchange(val, Base::OrderedOp::AtomicRMWOrder);

   233     }

   234     static bool compareExchange(typename Base::ValueType& ptr, T oldVal, T newVal) {

   235       return ptr.compare_exchange_strong(oldVal, newVal,

   236                                          Base::OrderedOp::AtomicRMWOrder,

   237                                          Base::OrderedOp::CompareExchangeFailureOrder);

   238     }

   239 };

   241 template<typename T, MemoryOrdering Order>

   242 struct IntrinsicAddSub : public IntrinsicBase<T, Order>

   243 {

   244     typedef IntrinsicBase<T, Order> Base;

   245     static T add(typename Base::ValueType& ptr, T val) {

   246       return ptr.fetch_add(val, Base::OrderedOp::AtomicRMWOrder);

   247     }

   248     static T sub(typename Base::ValueType& ptr, T val) {

   249       return ptr.fetch_sub(val, Base::OrderedOp::AtomicRMWOrder);

   250     }

   251 };

   253 template<typename T, MemoryOrdering Order>

   254 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>

   255 {

   256     typedef IntrinsicBase<T*, Order> Base;

   257     static T* add(typename Base::ValueType& ptr, ptrdiff_t val) {

   258       return ptr.fetch_add(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);

   259     }

   260     static T* sub(typename Base::ValueType& ptr, ptrdiff_t val) {

   261       return ptr.fetch_sub(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);

   262     }

   263   private:

   264     /*

   265      * GCC 4.6's <atomic> header has a bug where adding X to an

   266      * atomic<T*> is not the same as adding X to a T*.  Hence the need

   267      * for this function to provide the correct addend.

   268      */

   269     static ptrdiff_t fixupAddend(ptrdiff_t val) {

   270 #if defined(__clang__) || defined(_MSC_VER)

   271       return val;

   272 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \

   273       !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)

   274       return val * sizeof(T);

   275 #else

   276       return val;

   277 #endif

   278     }

   279 };

   281 template<typename T, MemoryOrdering Order>

   282 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>

   283 {

   284     typedef IntrinsicBase<T, Order> Base;

   285     static T inc(typename Base::ValueType& ptr) {

   286       return IntrinsicAddSub<T, Order>::add(ptr, 1);

   287     }

   288     static T dec(typename Base::ValueType& ptr) {

   289       return IntrinsicAddSub<T, Order>::sub(ptr, 1);

   290     }

   291 };

   293 template<typename T, MemoryOrdering Order>

   294 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,

   295                           public IntrinsicIncDec<T, Order>

   296 {

   297     typedef IntrinsicBase<T, Order> Base;

   298     static T or_(typename Base::ValueType& ptr, T val) {

   299       return ptr.fetch_or(val, Base::OrderedOp::AtomicRMWOrder);

   300     }

   301     static T xor_(typename Base::ValueType& ptr, T val) {

   302       return ptr.fetch_xor(val, Base::OrderedOp::AtomicRMWOrder);

   303     }

   304     static T and_(typename Base::ValueType& ptr, T val) {

   305       return ptr.fetch_and(val, Base::OrderedOp::AtomicRMWOrder);

   306     }

   307 };

   309 template<typename T, MemoryOrdering Order>

   310 struct AtomicIntrinsics<T*, Order>

   311   : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>

   312 {

   313 };

   315 } // namespace detail

   316 } // namespace mozilla

   318 #elif defined(__GNUC__)

   320 namespace mozilla {

   321 namespace detail {

   323 /*

   324  * The __sync_* family of intrinsics is documented here:

   325  *

   326  * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html

   327  *

   328  * While these intrinsics are deprecated in favor of the newer __atomic_*

   329  * family of intrincs:

   330  *

   331  * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html

   332  *

   333  * any GCC version that supports the __atomic_* intrinsics will also support

   334  * the <atomic> header and so will be handled above.  We provide a version of

   335  * atomics using the __sync_* intrinsics to support older versions of GCC.

   336  *

   337  * All __sync_* intrinsics that we use below act as full memory barriers, for

   338  * both compiler and hardware reordering, except for __sync_lock_test_and_set,

   339  * which is a only an acquire barrier.  When we call __sync_lock_test_and_set,

   340  * we add a barrier above it as appropriate.

   341  */

   343 template<MemoryOrdering Order> struct Barrier;

   345 /*

   346  * Some processors (in particular, x86) don't require quite so many calls to

   347  * __sync_sychronize as our specializations of Barrier produce.  If

   348  * performance turns out to be an issue, defining these specializations

   349  * on a per-processor basis would be a good first tuning step.

   350  */

   352 template<>

   353 struct Barrier<Relaxed>

   354 {

   355     static void beforeLoad() {}

   356     static void afterLoad() {}

   357     static void beforeStore() {}

   358     static void afterStore() {}

   359 };

   361 template<>

   362 struct Barrier<ReleaseAcquire>

   363 {

   364     static void beforeLoad() {}

   365     static void afterLoad() { __sync_synchronize(); }

   366     static void beforeStore() { __sync_synchronize(); }

   367     static void afterStore() {}

   368 };

   370 template<>

   371 struct Barrier<SequentiallyConsistent>

   372 {

   373     static void beforeLoad() { __sync_synchronize(); }

   374     static void afterLoad() { __sync_synchronize(); }

   375     static void beforeStore() { __sync_synchronize(); }

   376     static void afterStore() { __sync_synchronize(); }

   377 };

   379 template<typename T, MemoryOrdering Order>

   380 struct IntrinsicMemoryOps

   381 {

   382     static T load(const T& ptr) {

   383       Barrier<Order>::beforeLoad();

   384       T val = ptr;

   385       Barrier<Order>::afterLoad();

   386       return val;

   387     }

   388     static void store(T& ptr, T val) {

   389       Barrier<Order>::beforeStore();

   390       ptr = val;

   391       Barrier<Order>::afterStore();

   392     }

   393     static T exchange(T& ptr, T val) {

   394       // __sync_lock_test_and_set is only an acquire barrier; loads and stores

   395       // can't be moved up from after to before it, but they can be moved down

   396       // from before to after it.  We may want a stricter ordering, so we need

   397       // an explicit barrier.

   399       Barrier<Order>::beforeStore();

   400       return __sync_lock_test_and_set(&ptr, val);

   401     }

   402     static bool compareExchange(T& ptr, T oldVal, T newVal) {

   403       return __sync_bool_compare_and_swap(&ptr, oldVal, newVal);

   404     }

   405 };

   407 template<typename T>

   408 struct IntrinsicAddSub

   409 {

   410     typedef T ValueType;

   411     static T add(T& ptr, T val) {

   412       return __sync_fetch_and_add(&ptr, val);

   413     }

   414     static T sub(T& ptr, T val) {

   415       return __sync_fetch_and_sub(&ptr, val);

   416     }

   417 };

   419 template<typename T>

   420 struct IntrinsicAddSub<T*>

   421 {

   422     typedef T* ValueType;

   423     /*

   424      * The reinterpret_casts are needed so that

   425      * __sync_fetch_and_{add,sub} will properly type-check.

   426      *

   427      * Also, these functions do not provide standard semantics for

   428      * pointer types, so we need to adjust the addend.

   429      */

   430     static ValueType add(ValueType& ptr, ptrdiff_t val) {

   431       ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));

   432       return __sync_fetch_and_add(&ptr, amount);

   433     }

   434     static ValueType sub(ValueType& ptr, ptrdiff_t val) {

   435       ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));

   436       return __sync_fetch_and_sub(&ptr, amount);

   437     }

   438 };

   440 template<typename T>

   441 struct IntrinsicIncDec : public IntrinsicAddSub<T>

   442 {

   443     static T inc(T& ptr) { return IntrinsicAddSub<T>::add(ptr, 1); }

   444     static T dec(T& ptr) { return IntrinsicAddSub<T>::sub(ptr, 1); }

   445 };

   447 template<typename T, MemoryOrdering Order>

   448 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,

   449                           public IntrinsicIncDec<T>

   450 {

   451     static T or_(T& ptr, T val) {

   452       return __sync_fetch_and_or(&ptr, val);

   453     }

   454     static T xor_(T& ptr, T val) {

   455       return __sync_fetch_and_xor(&ptr, val);

   456     }

   457     static T and_(T& ptr, T val) {

   458       return __sync_fetch_and_and(&ptr, val);

   459     }

   460 };

   462 template<typename T, MemoryOrdering Order>

   463 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,

   464                                      public IntrinsicIncDec<T*>

   465 {

   466 };

   468 } // namespace detail

   469 } // namespace mozilla

   471 #elif defined(_MSC_VER)

   473 /*

   474  * Windows comes with a full complement of atomic operations.

   475  * Unfortunately, most of those aren't available for Windows XP (even if

   476  * the compiler supports intrinsics for them), which is the oldest

   477  * version of Windows we support.  Therefore, we only provide operations

   478  * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows

   479  * versions, we support 64-bit datatypes as well.

   480  *

   481  * To avoid namespace pollution issues, we declare whatever functions we

   482  * need ourselves.

   483  */

   485 extern "C" {

   486 long __cdecl _InterlockedExchangeAdd(long volatile* dst, long value);

   487 long __cdecl _InterlockedOr(long volatile* dst, long value);

   488 long __cdecl _InterlockedXor(long volatile* dst, long value);

   489 long __cdecl _InterlockedAnd(long volatile* dst, long value);

   490 long __cdecl _InterlockedExchange(long volatile *dst, long value);

   491 long __cdecl _InterlockedCompareExchange(long volatile *dst, long newVal, long oldVal);

   492 }

   494 #  pragma intrinsic(_InterlockedExchangeAdd)

   495 #  pragma intrinsic(_InterlockedOr)

   496 #  pragma intrinsic(_InterlockedXor)

   497 #  pragma intrinsic(_InterlockedAnd)

   498 #  pragma intrinsic(_InterlockedExchange)

   499 #  pragma intrinsic(_InterlockedCompareExchange)

   501 namespace mozilla {

   502 namespace detail {

   504 #  if !defined(_M_IX86) && !defined(_M_X64)

   505      /*

   506       * The implementations below are optimized for x86ish systems.  You

   507       * will have to modify them if you are porting to Windows on a

   508       * different architecture.

   509       */

   510 #    error "Unknown CPU type"

   511 #  endif

   513 /*

   514  * The PrimitiveIntrinsics template should define |Type|, the datatype of size

   515  * DataSize upon which we operate, and the following eight functions.

   516  *

   517  * static Type add(Type* ptr, Type val);

   518  * static Type sub(Type* ptr, Type val);

   519  * static Type or_(Type* ptr, Type val);

   520  * static Type xor_(Type* ptr, Type val);

   521  * static Type and_(Type* ptr, Type val);

   522  *

   523  *   These functions perform the obvious operation on the value contained in

   524  *   |*ptr| combined with |val| and return the value previously stored in

   525  *   |*ptr|.

   526  *

   527  * static void store(Type* ptr, Type val);

   528  *

   529  *   This function atomically stores |val| into |*ptr| and must provide a full

   530  *   memory fence after the store to prevent compiler and hardware instruction

   531  *   reordering.  It should also act as a compiler barrier to prevent reads and

   532  *   writes from moving to after the store.

   533  *

   534  * static Type exchange(Type* ptr, Type val);

   535  *

   536  *   This function atomically stores |val| into |*ptr| and returns the previous

   537  *   contents of *ptr;

   538  *

   539  * static bool compareExchange(Type* ptr, Type oldVal, Type newVal);

   540  *

   541  *   This function atomically performs the following operation:

   542  *

   543  *     if (*ptr == oldVal) {

   544  *       *ptr = newVal;

   545  *       return true;

   546  *     } else {

   547  *       return false;

   548  *     }

   549  *

   550  */

   551 template<size_t DataSize> struct PrimitiveIntrinsics;

   553 template<>

   554 struct PrimitiveIntrinsics<4>

   555 {

   556     typedef long Type;

   558     static Type add(Type* ptr, Type val) {

   559       return _InterlockedExchangeAdd(ptr, val);

   560     }

   561     static Type sub(Type* ptr, Type val) {

   562       /*

   563        * _InterlockedExchangeSubtract isn't available before Windows 7,

   564        * and we must support Windows XP.

   565        */

   566       return _InterlockedExchangeAdd(ptr, -val);

   567     }

   568     static Type or_(Type* ptr, Type val) {

   569       return _InterlockedOr(ptr, val);

   570     }

   571     static Type xor_(Type* ptr, Type val) {

   572       return _InterlockedXor(ptr, val);

   573     }

   574     static Type and_(Type* ptr, Type val) {

   575       return _InterlockedAnd(ptr, val);

   576     }

   577     static void store(Type* ptr, Type val) {

   578       _InterlockedExchange(ptr, val);

   579     }

   580     static Type exchange(Type* ptr, Type val) {

   581       return _InterlockedExchange(ptr, val);

   582     }

   583     static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {

   584       return _InterlockedCompareExchange(ptr, newVal, oldVal) == oldVal;

   585     }

   586 };

   588 #  if defined(_M_X64)

   590 extern "C" {

   591 long long __cdecl _InterlockedExchangeAdd64(long long volatile* dst,

   592                                             long long value);

   593 long long __cdecl _InterlockedOr64(long long volatile* dst,

   594                                    long long value);

   595 long long __cdecl _InterlockedXor64(long long volatile* dst,

   596                                     long long value);

   597 long long __cdecl _InterlockedAnd64(long long volatile* dst,

   598                                     long long value);

   599 long long __cdecl _InterlockedExchange64(long long volatile* dst,

   600                                          long long value);

   601 long long __cdecl _InterlockedCompareExchange64(long long volatile* dst,

   602                                                 long long newVal,

   603                                                 long long oldVal);

   604 }

   606 #    pragma intrinsic(_InterlockedExchangeAdd64)

   607 #    pragma intrinsic(_InterlockedOr64)

   608 #    pragma intrinsic(_InterlockedXor64)

   609 #    pragma intrinsic(_InterlockedAnd64)

   610 #    pragma intrinsic(_InterlockedExchange64)

   611 #    pragma intrinsic(_InterlockedCompareExchange64)

   613 template <>

   614 struct PrimitiveIntrinsics<8>

   615 {

   616     typedef __int64 Type;

   618     static Type add(Type* ptr, Type val) {

   619       return _InterlockedExchangeAdd64(ptr, val);

   620     }

   621     static Type sub(Type* ptr, Type val) {

   622       /*

   623        * There is no _InterlockedExchangeSubtract64.

   624        */

   625       return _InterlockedExchangeAdd64(ptr, -val);

   626     }

   627     static Type or_(Type* ptr, Type val) {

   628       return _InterlockedOr64(ptr, val);

   629     }

   630     static Type xor_(Type* ptr, Type val) {

   631       return _InterlockedXor64(ptr, val);

   632     }

   633     static Type and_(Type* ptr, Type val) {

   634       return _InterlockedAnd64(ptr, val);

   635     }

   636     static void store(Type* ptr, Type val) {

   637       _InterlockedExchange64(ptr, val);

   638     }

   639     static Type exchange(Type* ptr, Type val) {

   640       return _InterlockedExchange64(ptr, val);

   641     }

   642     static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {

   643       return _InterlockedCompareExchange64(ptr, newVal, oldVal) == oldVal;

   644     }

   645 };

   647 #  endif

   649 extern "C" { void _ReadWriteBarrier(); }

   651 #  pragma intrinsic(_ReadWriteBarrier)

   653 template<MemoryOrdering Order> struct Barrier;

   655 /*

   656  * We do not provide an afterStore method in Barrier, as Relaxed and

   657  * ReleaseAcquire orderings do not require one, and the required barrier

   658  * for SequentiallyConsistent is handled by PrimitiveIntrinsics.

   659  */

   661 template<>

   662 struct Barrier<Relaxed>

   663 {

   664     static void beforeLoad() {}

   665     static void afterLoad() {}

   666     static void beforeStore() {}

   667 };

   669 template<>

   670 struct Barrier<ReleaseAcquire>

   671 {

   672     static void beforeLoad() {}

   673     static void afterLoad() { _ReadWriteBarrier(); }

   674     static void beforeStore() { _ReadWriteBarrier(); }

   675 };

   677 template<>

   678 struct Barrier<SequentiallyConsistent>

   679 {

   680     static void beforeLoad() { _ReadWriteBarrier(); }

   681     static void afterLoad() { _ReadWriteBarrier(); }

   682     static void beforeStore() { _ReadWriteBarrier(); }

   683 };

   685 template<typename PrimType, typename T>

   686 struct CastHelper

   687 {

   688   static PrimType toPrimType(T val) { return static_cast<PrimType>(val); }

   689   static T fromPrimType(PrimType val) { return static_cast<T>(val); }

   690 };

   692 template<typename PrimType, typename T>

   693 struct CastHelper<PrimType, T*>

   694 {

   695   static PrimType toPrimType(T* val) { return reinterpret_cast<PrimType>(val); }

   696   static T* fromPrimType(PrimType val) { return reinterpret_cast<T*>(val); }

   697 };

   699 template<typename T>

   700 struct IntrinsicBase

   701 {

   702     typedef T ValueType;

   703     typedef PrimitiveIntrinsics<sizeof(T)> Primitives;

   704     typedef typename Primitives::Type PrimType;

   705     static_assert(sizeof(PrimType) == sizeof(T),

   706                   "Selection of PrimitiveIntrinsics was wrong");

   707     typedef CastHelper<PrimType, T> Cast;

   708 };

   710 template<typename T, MemoryOrdering Order>

   711 struct IntrinsicMemoryOps : public IntrinsicBase<T>

   712 {

   713     typedef typename IntrinsicBase<T>::ValueType ValueType;

   714     typedef typename IntrinsicBase<T>::Primitives Primitives;

   715     typedef typename IntrinsicBase<T>::PrimType PrimType;

   716     typedef typename IntrinsicBase<T>::Cast Cast;

   717     static ValueType load(const ValueType& ptr) {

   718       Barrier<Order>::beforeLoad();

   719       ValueType val = ptr;

   720       Barrier<Order>::afterLoad();

   721       return val;

   722     }

   723     static void store(ValueType& ptr, ValueType val) {

   724       // For SequentiallyConsistent, Primitives::store() will generate the

   725       // proper memory fence.  Everything else just needs a barrier before

   726       // the store.

   727       if (Order == SequentiallyConsistent) {

   728         Primitives::store(reinterpret_cast<PrimType*>(&ptr),

   729                           Cast::toPrimType(val));

   730       } else {

   731         Barrier<Order>::beforeStore();

   732         ptr = val;

   733       }

   734     }

   735     static ValueType exchange(ValueType& ptr, ValueType val) {

   736       PrimType oldval =

   737         Primitives::exchange(reinterpret_cast<PrimType*>(&ptr),

   738                              Cast::toPrimType(val));

   739       return Cast::fromPrimType(oldval);

   740     }

   741     static bool compareExchange(ValueType& ptr, ValueType oldVal, ValueType newVal) {

   742       return Primitives::compareExchange(reinterpret_cast<PrimType*>(&ptr),

   743                                          Cast::toPrimType(oldVal),

   744                                          Cast::toPrimType(newVal));

   745     }

   746 };

   748 template<typename T>

   749 struct IntrinsicApplyHelper : public IntrinsicBase<T>

   750 {

   751     typedef typename IntrinsicBase<T>::ValueType ValueType;

   752     typedef typename IntrinsicBase<T>::PrimType PrimType;

   753     typedef typename IntrinsicBase<T>::Cast Cast;

   754     typedef PrimType (*BinaryOp)(PrimType*, PrimType);

   755     typedef PrimType (*UnaryOp)(PrimType*);

   757     static ValueType applyBinaryFunction(BinaryOp op, ValueType& ptr,

   758                                          ValueType val) {

   759       PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);

   760       PrimType primTypeVal = Cast::toPrimType(val);

   761       return Cast::fromPrimType(op(primTypePtr, primTypeVal));

   762     }

   764     static ValueType applyUnaryFunction(UnaryOp op, ValueType& ptr) {

   765       PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);

   766       return Cast::fromPrimType(op(primTypePtr));

   767     }

   768 };

   770 template<typename T>

   771 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>

   772 {

   773     typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;

   774     typedef typename IntrinsicBase<T>::Primitives Primitives;

   775     static ValueType add(ValueType& ptr, ValueType val) {

   776       return applyBinaryFunction(&Primitives::add, ptr, val);

   777     }

   778     static ValueType sub(ValueType& ptr, ValueType val) {

   779       return applyBinaryFunction(&Primitives::sub, ptr, val);

   780     }

   781 };

   783 template<typename T>

   784 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>

   785 {

   786     typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;

   787     static ValueType add(ValueType& ptr, ptrdiff_t amount) {

   788       return applyBinaryFunction(&Primitives::add, ptr,

   789                                  (ValueType)(amount * sizeof(ValueType)));

   790     }

   791     static ValueType sub(ValueType& ptr, ptrdiff_t amount) {

   792       return applyBinaryFunction(&Primitives::sub, ptr,

   793                                  (ValueType)(amount * sizeof(ValueType)));

   794     }

   795 };

   797 template<typename T>

   798 struct IntrinsicIncDec : public IntrinsicAddSub<T>

   799 {

   800     typedef typename IntrinsicAddSub<T>::ValueType ValueType;

   801     static ValueType inc(ValueType& ptr) { return add(ptr, 1); }

   802     static ValueType dec(ValueType& ptr) { return sub(ptr, 1); }

   803 };

   805 template<typename T, MemoryOrdering Order>

   806 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,

   807                           public IntrinsicIncDec<T>

   808 {

   809     typedef typename IntrinsicIncDec<T>::ValueType ValueType;

   810     static ValueType or_(ValueType& ptr, T val) {

   811       return applyBinaryFunction(&Primitives::or_, ptr, val);

   812     }

   813     static ValueType xor_(ValueType& ptr, T val) {

   814       return applyBinaryFunction(&Primitives::xor_, ptr, val);

   815     }

   816     static ValueType and_(ValueType& ptr, T val) {

   817       return applyBinaryFunction(&Primitives::and_, ptr, val);

   818     }

   819 };

   821 template<typename T, MemoryOrdering Order>

   822 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,

   823                                      public IntrinsicIncDec<T*>

   824 {

   825     typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;

   826 };

   828 } // namespace detail

   829 } // namespace mozilla

   831 #else

   832 # error "Atomic compiler intrinsics are not supported on your platform"

   833 #endif

   835 namespace mozilla {

   837 namespace detail {

   839 template<typename T, MemoryOrdering Order>

   840 class AtomicBase

   841 {

   842     // We only support 32-bit types on 32-bit Windows, which constrains our

   843     // implementation elsewhere.  But we support pointer-sized types everywhere.

   844     static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),

   845                   "mozilla/Atomics.h only supports 32-bit and pointer-sized types");

   847   protected:

   848     typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;

   849     typename Intrinsics::ValueType mValue;

   851   public:

   852     MOZ_CONSTEXPR AtomicBase() : mValue() {}

   853     MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}

   855     // Note: we can't provide operator T() here because Atomic<bool> inherits

   856     // from AtomcBase with T=uint32_t and not T=bool. If we implemented

   857     // operator T() here, it would cause errors when comparing Atomic<bool> with

   858     // a regular bool.

   860     T operator=(T aValue) {

   861       Intrinsics::store(mValue, aValue);

   862       return aValue;

   863     }

   865     /**

   866      * Performs an atomic swap operation.  aValue is stored and the previous

   867      * value of this variable is returned.

   868      */

   869     T exchange(T aValue) {

   870       return Intrinsics::exchange(mValue, aValue);

   871     }

   873     /**

   874      * Performs an atomic compare-and-swap operation and returns true if it

   875      * succeeded. This is equivalent to atomically doing

   876      *

   877      *   if (mValue == aOldValue) {

   878      *     mValue = aNewValue;

   879      *     return true;

   880      *   } else {

   881      *     return false;

   882      *   }

   883      */

   884     bool compareExchange(T aOldValue, T aNewValue) {

   885       return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);

   886     }

   888   private:

   889     template<MemoryOrdering AnyOrder>

   890     AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;

   891 };

   893 template<typename T, MemoryOrdering Order>

   894 class AtomicBaseIncDec : public AtomicBase<T, Order>

   895 {

   896     typedef typename detail::AtomicBase<T, Order> Base;

   898   public:

   899     MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}

   900     MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}

   902     using Base::operator=;

   904     operator T() const { return Base::Intrinsics::load(Base::mValue); }

   905     T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }

   906     T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }

   907     T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }

   908     T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }

   910   private:

   911     template<MemoryOrdering AnyOrder>

   912     AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) MOZ_DELETE;

   913 };

   915 } // namespace detail

   917 /**

   918  * A wrapper for a type that enforces that all memory accesses are atomic.

   919  *

   920  * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in

   921  * its place.  Implementations for integral and pointer types are provided

   922  * below.

   923  *

   924  * Atomic accesses are sequentially consistent by default.  You should

   925  * use the default unless you are tall enough to ride the

   926  * memory-ordering roller coaster (if you're not sure, you aren't) and

   927  * you have a compelling reason to do otherwise.

   928  *

   929  * There is one exception to the case of atomic memory accesses: providing an

   930  * initial value of the atomic value is not guaranteed to be atomic.  This is a

   931  * deliberate design choice that enables static atomic variables to be declared

   932  * without introducing extra static constructors.

   933  */

   934 template<typename T,

   935          MemoryOrdering Order = SequentiallyConsistent,

   936          typename Enable = void>

   937 class Atomic;

   939 /**

   940  * Atomic<T> implementation for integral types.

   941  *

   942  * In addition to atomic store and load operations, compound assignment and

   943  * increment/decrement operators are implemented which perform the

   944  * corresponding read-modify-write operation atomically.  Finally, an atomic

   945  * swap method is provided.

   946  */

   947 template<typename T, MemoryOrdering Order>

   948 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value && !IsSame<T, bool>::value>::Type>

   949   : public detail::AtomicBaseIncDec<T, Order>

   950 {

   951     typedef typename detail::AtomicBaseIncDec<T, Order> Base;

   953   public:

   954     MOZ_CONSTEXPR Atomic() : Base() {}

   955     MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}

   957     using Base::operator=;

   959     T operator+=(T delta) { return Base::Intrinsics::add(Base::mValue, delta) + delta; }

   960     T operator-=(T delta) { return Base::Intrinsics::sub(Base::mValue, delta) - delta; }

   961     T operator|=(T val) { return Base::Intrinsics::or_(Base::mValue, val) | val; }

   962     T operator^=(T val) { return Base::Intrinsics::xor_(Base::mValue, val) ^ val; }

   963     T operator&=(T val) { return Base::Intrinsics::and_(Base::mValue, val) & val; }

   965   private:

   966     Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;

   967 };

   969 /**

   970  * Atomic<T> implementation for pointer types.

   971  *

   972  * An atomic compare-and-swap primitive for pointer variables is provided, as

   973  * are atomic increment and decement operators.  Also provided are the compound

   974  * assignment operators for addition and subtraction. Atomic swap (via

   975  * exchange()) is included as well.

   976  */

   977 template<typename T, MemoryOrdering Order>

   978 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>

   979 {

   980     typedef typename detail::AtomicBaseIncDec<T*, Order> Base;

   982   public:

   983     MOZ_CONSTEXPR Atomic() : Base() {}

   984     MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}

   986     using Base::operator=;

   988     T* operator+=(ptrdiff_t delta) {

   989       return Base::Intrinsics::add(Base::mValue, delta) + delta;

   990     }

   991     T* operator-=(ptrdiff_t delta) {

   992       return Base::Intrinsics::sub(Base::mValue, delta) - delta;

   993     }

   995   private:

   996     Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;

   997 };

   999 /**

  1000  * Atomic<T> implementation for enum types.

  1001  *

  1002  * The atomic store and load operations and the atomic swap method is provided.

  1003  */

  1004 template<typename T, MemoryOrdering Order>

  1005 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>

  1006   : public detail::AtomicBase<T, Order>

  1007 {

  1008     typedef typename detail::AtomicBase<T, Order> Base;

  1010   public:

  1011     MOZ_CONSTEXPR Atomic() : Base() {}

  1012     MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}

  1014     operator T() const { return Base::Intrinsics::load(Base::mValue); }

  1016     using Base::operator=;

  1018   private:

  1019     Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;

  1020 };

  1022 /**

  1023  * Atomic<T> implementation for boolean types.

  1024  *

  1025  * The atomic store and load operations and the atomic swap method is provided.

  1026  *

  1027  * Note:

  1028  *

  1029  * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of

  1030  *   bool and/or some implementations of std::atomic. This is allowed in

  1031  *   [atomic.types.generic]p9.

  1032  *

  1033  * - It's not obvious whether the 8-bit atomic functions on Windows are always

  1034  *   inlined or not. If they are not inlined, the corresponding functions in the

  1035  *   runtime library are not available on Windows XP. This is why we implement

  1036  *   Atomic<bool> with an underlying type of uint32_t.

  1037  */

  1038 template<MemoryOrdering Order>

  1039 class Atomic<bool, Order>

  1040   : protected detail::AtomicBase<uint32_t, Order>

  1041 {

  1042     typedef typename detail::AtomicBase<uint32_t, Order> Base;

  1044   public:

  1045     MOZ_CONSTEXPR Atomic() : Base() {}

  1046     MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {}

  1048     // We provide boolean wrappers for the underlying AtomicBase methods.

  1049     operator bool() const { return Base::Intrinsics::load(Base::mValue); }

  1050     bool operator=(bool aValue) { return Base::operator=(aValue); }

  1051     bool exchange(bool aValue) { return Base::exchange(aValue); }

  1052     bool compareExchange(bool aOldValue, bool aNewValue) {

  1053       return Base::compareExchange(aOldValue, aNewValue);

  1054     }

  1056   private:

  1057     Atomic(Atomic<bool, Order>& aOther) MOZ_DELETE;

  1058 };

  1060 } // namespace mozilla

  1062 #endif /* mozilla_Atomics_h */

The Tor Browser / file revision

mfbt/Atomics.h@97036ab72558

mfbt/Atomics.h