Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * Implements (almost always) lock-free atomic operations. The operations here
9 * are a subset of that which can be found in C++11's <atomic> header, with a
10 * different API to enforce consistent memory ordering constraints.
11 *
12 * Anyone caught using |volatile| for inter-thread memory safety needs to be
13 * sent a copy of this header and the C++11 standard.
14 */
16 #ifndef mozilla_Atomics_h
17 #define mozilla_Atomics_h
19 #include "mozilla/Assertions.h"
20 #include "mozilla/Attributes.h"
21 #include "mozilla/Compiler.h"
22 #include "mozilla/TypeTraits.h"
24 #include <stdint.h>
26 /*
27 * Our minimum deployment target on clang/OS X is OS X 10.6, whose SDK
28 * does not have <atomic>. So be sure to check for <atomic> support
29 * along with C++0x support.
30 */
31 #if defined(__clang__) || defined(__GNUC__)
32 /*
33 * Clang doesn't like <atomic> from libstdc++ before 4.7 due to the
34 * loose typing of the atomic builtins. GCC 4.5 and 4.6 lacks inline
35 * definitions for unspecialized std::atomic and causes linking errors.
36 * Therefore, we require at least 4.7.0 for using libstdc++.
37 */
38 # if MOZ_USING_LIBSTDCXX && MOZ_LIBSTDCXX_VERSION_AT_LEAST(4, 7, 0)
39 # define MOZ_HAVE_CXX11_ATOMICS
40 # elif MOZ_USING_LIBCXX
41 # define MOZ_HAVE_CXX11_ATOMICS
42 # endif
43 #elif defined(_MSC_VER) && _MSC_VER >= 1700
44 # if defined(DEBUG)
45 /*
46 * Provide our own failure code since we're having trouble linking to
47 * std::_Debug_message (bug 982310).
48 */
49 # define _INVALID_MEMORY_ORDER MOZ_CRASH("Invalid memory order")
50 # endif
51 # define MOZ_HAVE_CXX11_ATOMICS
52 #endif
54 namespace mozilla {
56 /**
57 * An enum of memory ordering possibilities for atomics.
58 *
59 * Memory ordering is the observable state of distinct values in memory.
60 * (It's a separate concept from atomicity, which concerns whether an
61 * operation can ever be observed in an intermediate state. Don't
62 * conflate the two!) Given a sequence of operations in source code on
63 * memory, it is *not* always the case that, at all times and on all
64 * cores, those operations will appear to have occurred in that exact
65 * sequence. First, the compiler might reorder that sequence, if it
66 * thinks another ordering will be more efficient. Second, the CPU may
67 * not expose so consistent a view of memory. CPUs will often perform
68 * their own instruction reordering, above and beyond that performed by
69 * the compiler. And each core has its own memory caches, and accesses
70 * (reads and writes both) to "memory" may only resolve to out-of-date
71 * cache entries -- not to the "most recently" performed operation in
72 * some global sense. Any access to a value that may be used by
73 * multiple threads, potentially across multiple cores, must therefore
74 * have a memory ordering imposed on it, for all code on all
75 * threads/cores to have a sufficiently coherent worldview.
76 *
77 * http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync and
78 * http://en.cppreference.com/w/cpp/atomic/memory_order go into more
79 * detail on all this, including examples of how each mode works.
80 *
81 * Note that for simplicity and practicality, not all of the modes in
82 * C++11 are supported. The missing C++11 modes are either subsumed by
83 * the modes we provide below, or not relevant for the CPUs we support
84 * in Gecko. These three modes are confusing enough as it is!
85 */
86 enum MemoryOrdering {
87 /*
88 * Relaxed ordering is the simplest memory ordering: none at all.
89 * When the result of a write is observed, nothing may be inferred
90 * about other memory. Writes ostensibly performed "before" on the
91 * writing thread may not yet be visible. Writes performed "after" on
92 * the writing thread may already be visible, if the compiler or CPU
93 * reordered them. (The latter can happen if reads and/or writes get
94 * held up in per-processor caches.) Relaxed ordering means
95 * operations can always use cached values (as long as the actual
96 * updates to atomic values actually occur, correctly, eventually), so
97 * it's usually the fastest sort of atomic access. For this reason,
98 * *it's also the most dangerous kind of access*.
99 *
100 * Relaxed ordering is good for things like process-wide statistics
101 * counters that don't need to be consistent with anything else, so
102 * long as updates themselves are atomic. (And so long as any
103 * observations of that value can tolerate being out-of-date -- if you
104 * need some sort of up-to-date value, you need some sort of other
105 * synchronizing operation.) It's *not* good for locks, mutexes,
106 * reference counts, etc. that mediate access to other memory, or must
107 * be observably consistent with other memory.
108 *
109 * x86 architectures don't take advantage of the optimization
110 * opportunities that relaxed ordering permits. Thus it's possible
111 * that using relaxed ordering will "work" on x86 but fail elsewhere
112 * (ARM, say, which *does* implement non-sequentially-consistent
113 * relaxed ordering semantics). Be extra-careful using relaxed
114 * ordering if you can't easily test non-x86 architectures!
115 */
116 Relaxed,
117 /*
118 * When an atomic value is updated with ReleaseAcquire ordering, and
119 * that new value is observed with ReleaseAcquire ordering, prior
120 * writes (atomic or not) are also observable. What ReleaseAcquire
121 * *doesn't* give you is any observable ordering guarantees for
122 * ReleaseAcquire-ordered operations on different objects. For
123 * example, if there are two cores that each perform ReleaseAcquire
124 * operations on separate objects, each core may or may not observe
125 * the operations made by the other core. The only way the cores can
126 * be synchronized with ReleaseAcquire is if they both
127 * ReleaseAcquire-access the same object. This implies that you can't
128 * necessarily describe some global total ordering of ReleaseAcquire
129 * operations.
130 *
131 * ReleaseAcquire ordering is good for (as the name implies) atomic
132 * operations on values controlling ownership of things: reference
133 * counts, mutexes, and the like. However, if you are thinking about
134 * using these to implement your own locks or mutexes, you should take
135 * a good, hard look at actual lock or mutex primitives first.
136 */
137 ReleaseAcquire,
138 /*
139 * When an atomic value is updated with SequentiallyConsistent
140 * ordering, all writes observable when the update is observed, just
141 * as with ReleaseAcquire ordering. But, furthermore, a global total
142 * ordering of SequentiallyConsistent operations *can* be described.
143 * For example, if two cores perform SequentiallyConsistent operations
144 * on separate objects, one core will observably perform its update
145 * (and all previous operations will have completed), then the other
146 * core will observably perform its update (and all previous
147 * operations will have completed). (Although those previous
148 * operations aren't themselves ordered -- they could be intermixed,
149 * or ordered if they occur on atomic values with ordering
150 * requirements.) SequentiallyConsistent is the *simplest and safest*
151 * ordering of atomic operations -- it's always as if one operation
152 * happens, then another, then another, in some order -- and every
153 * core observes updates to happen in that single order. Because it
154 * has the most synchronization requirements, operations ordered this
155 * way also tend to be slowest.
156 *
157 * SequentiallyConsistent ordering can be desirable when multiple
158 * threads observe objects, and they all have to agree on the
159 * observable order of changes to them. People expect
160 * SequentiallyConsistent ordering, even if they shouldn't, when
161 * writing code, atomic or otherwise. SequentiallyConsistent is also
162 * the ordering of choice when designing lockless data structures. If
163 * you don't know what order to use, use this one.
164 */
165 SequentiallyConsistent,
166 };
168 } // namespace mozilla
170 // Build up the underlying intrinsics.
171 #ifdef MOZ_HAVE_CXX11_ATOMICS
173 # include <atomic>
175 namespace mozilla {
176 namespace detail {
178 /*
179 * We provide CompareExchangeFailureOrder to work around a bug in some
180 * versions of GCC's <atomic> header. See bug 898491.
181 */
182 template<MemoryOrdering Order> struct AtomicOrderConstraints;
184 template<>
185 struct AtomicOrderConstraints<Relaxed>
186 {
187 static const std::memory_order AtomicRMWOrder = std::memory_order_relaxed;
188 static const std::memory_order LoadOrder = std::memory_order_relaxed;
189 static const std::memory_order StoreOrder = std::memory_order_relaxed;
190 static const std::memory_order CompareExchangeFailureOrder =
191 std::memory_order_relaxed;
192 };
194 template<>
195 struct AtomicOrderConstraints<ReleaseAcquire>
196 {
197 static const std::memory_order AtomicRMWOrder = std::memory_order_acq_rel;
198 static const std::memory_order LoadOrder = std::memory_order_acquire;
199 static const std::memory_order StoreOrder = std::memory_order_release;
200 static const std::memory_order CompareExchangeFailureOrder =
201 std::memory_order_acquire;
202 };
204 template<>
205 struct AtomicOrderConstraints<SequentiallyConsistent>
206 {
207 static const std::memory_order AtomicRMWOrder = std::memory_order_seq_cst;
208 static const std::memory_order LoadOrder = std::memory_order_seq_cst;
209 static const std::memory_order StoreOrder = std::memory_order_seq_cst;
210 static const std::memory_order CompareExchangeFailureOrder =
211 std::memory_order_seq_cst;
212 };
214 template<typename T, MemoryOrdering Order>
215 struct IntrinsicBase
216 {
217 typedef std::atomic<T> ValueType;
218 typedef AtomicOrderConstraints<Order> OrderedOp;
219 };
221 template<typename T, MemoryOrdering Order>
222 struct IntrinsicMemoryOps : public IntrinsicBase<T, Order>
223 {
224 typedef IntrinsicBase<T, Order> Base;
225 static T load(const typename Base::ValueType& ptr) {
226 return ptr.load(Base::OrderedOp::LoadOrder);
227 }
228 static void store(typename Base::ValueType& ptr, T val) {
229 ptr.store(val, Base::OrderedOp::StoreOrder);
230 }
231 static T exchange(typename Base::ValueType& ptr, T val) {
232 return ptr.exchange(val, Base::OrderedOp::AtomicRMWOrder);
233 }
234 static bool compareExchange(typename Base::ValueType& ptr, T oldVal, T newVal) {
235 return ptr.compare_exchange_strong(oldVal, newVal,
236 Base::OrderedOp::AtomicRMWOrder,
237 Base::OrderedOp::CompareExchangeFailureOrder);
238 }
239 };
241 template<typename T, MemoryOrdering Order>
242 struct IntrinsicAddSub : public IntrinsicBase<T, Order>
243 {
244 typedef IntrinsicBase<T, Order> Base;
245 static T add(typename Base::ValueType& ptr, T val) {
246 return ptr.fetch_add(val, Base::OrderedOp::AtomicRMWOrder);
247 }
248 static T sub(typename Base::ValueType& ptr, T val) {
249 return ptr.fetch_sub(val, Base::OrderedOp::AtomicRMWOrder);
250 }
251 };
253 template<typename T, MemoryOrdering Order>
254 struct IntrinsicAddSub<T*, Order> : public IntrinsicBase<T*, Order>
255 {
256 typedef IntrinsicBase<T*, Order> Base;
257 static T* add(typename Base::ValueType& ptr, ptrdiff_t val) {
258 return ptr.fetch_add(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
259 }
260 static T* sub(typename Base::ValueType& ptr, ptrdiff_t val) {
261 return ptr.fetch_sub(fixupAddend(val), Base::OrderedOp::AtomicRMWOrder);
262 }
263 private:
264 /*
265 * GCC 4.6's <atomic> header has a bug where adding X to an
266 * atomic<T*> is not the same as adding X to a T*. Hence the need
267 * for this function to provide the correct addend.
268 */
269 static ptrdiff_t fixupAddend(ptrdiff_t val) {
270 #if defined(__clang__) || defined(_MSC_VER)
271 return val;
272 #elif defined(__GNUC__) && MOZ_GCC_VERSION_AT_LEAST(4, 6, 0) && \
273 !MOZ_GCC_VERSION_AT_LEAST(4, 7, 0)
274 return val * sizeof(T);
275 #else
276 return val;
277 #endif
278 }
279 };
281 template<typename T, MemoryOrdering Order>
282 struct IntrinsicIncDec : public IntrinsicAddSub<T, Order>
283 {
284 typedef IntrinsicBase<T, Order> Base;
285 static T inc(typename Base::ValueType& ptr) {
286 return IntrinsicAddSub<T, Order>::add(ptr, 1);
287 }
288 static T dec(typename Base::ValueType& ptr) {
289 return IntrinsicAddSub<T, Order>::sub(ptr, 1);
290 }
291 };
293 template<typename T, MemoryOrdering Order>
294 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
295 public IntrinsicIncDec<T, Order>
296 {
297 typedef IntrinsicBase<T, Order> Base;
298 static T or_(typename Base::ValueType& ptr, T val) {
299 return ptr.fetch_or(val, Base::OrderedOp::AtomicRMWOrder);
300 }
301 static T xor_(typename Base::ValueType& ptr, T val) {
302 return ptr.fetch_xor(val, Base::OrderedOp::AtomicRMWOrder);
303 }
304 static T and_(typename Base::ValueType& ptr, T val) {
305 return ptr.fetch_and(val, Base::OrderedOp::AtomicRMWOrder);
306 }
307 };
309 template<typename T, MemoryOrdering Order>
310 struct AtomicIntrinsics<T*, Order>
311 : public IntrinsicMemoryOps<T*, Order>, public IntrinsicIncDec<T*, Order>
312 {
313 };
315 } // namespace detail
316 } // namespace mozilla
318 #elif defined(__GNUC__)
320 namespace mozilla {
321 namespace detail {
323 /*
324 * The __sync_* family of intrinsics is documented here:
325 *
326 * http://gcc.gnu.org/onlinedocs/gcc-4.6.4/gcc/Atomic-Builtins.html
327 *
328 * While these intrinsics are deprecated in favor of the newer __atomic_*
329 * family of intrincs:
330 *
331 * http://gcc.gnu.org/onlinedocs/gcc-4.7.3/gcc/_005f_005fatomic-Builtins.html
332 *
333 * any GCC version that supports the __atomic_* intrinsics will also support
334 * the <atomic> header and so will be handled above. We provide a version of
335 * atomics using the __sync_* intrinsics to support older versions of GCC.
336 *
337 * All __sync_* intrinsics that we use below act as full memory barriers, for
338 * both compiler and hardware reordering, except for __sync_lock_test_and_set,
339 * which is a only an acquire barrier. When we call __sync_lock_test_and_set,
340 * we add a barrier above it as appropriate.
341 */
343 template<MemoryOrdering Order> struct Barrier;
345 /*
346 * Some processors (in particular, x86) don't require quite so many calls to
347 * __sync_sychronize as our specializations of Barrier produce. If
348 * performance turns out to be an issue, defining these specializations
349 * on a per-processor basis would be a good first tuning step.
350 */
352 template<>
353 struct Barrier<Relaxed>
354 {
355 static void beforeLoad() {}
356 static void afterLoad() {}
357 static void beforeStore() {}
358 static void afterStore() {}
359 };
361 template<>
362 struct Barrier<ReleaseAcquire>
363 {
364 static void beforeLoad() {}
365 static void afterLoad() { __sync_synchronize(); }
366 static void beforeStore() { __sync_synchronize(); }
367 static void afterStore() {}
368 };
370 template<>
371 struct Barrier<SequentiallyConsistent>
372 {
373 static void beforeLoad() { __sync_synchronize(); }
374 static void afterLoad() { __sync_synchronize(); }
375 static void beforeStore() { __sync_synchronize(); }
376 static void afterStore() { __sync_synchronize(); }
377 };
379 template<typename T, MemoryOrdering Order>
380 struct IntrinsicMemoryOps
381 {
382 static T load(const T& ptr) {
383 Barrier<Order>::beforeLoad();
384 T val = ptr;
385 Barrier<Order>::afterLoad();
386 return val;
387 }
388 static void store(T& ptr, T val) {
389 Barrier<Order>::beforeStore();
390 ptr = val;
391 Barrier<Order>::afterStore();
392 }
393 static T exchange(T& ptr, T val) {
394 // __sync_lock_test_and_set is only an acquire barrier; loads and stores
395 // can't be moved up from after to before it, but they can be moved down
396 // from before to after it. We may want a stricter ordering, so we need
397 // an explicit barrier.
399 Barrier<Order>::beforeStore();
400 return __sync_lock_test_and_set(&ptr, val);
401 }
402 static bool compareExchange(T& ptr, T oldVal, T newVal) {
403 return __sync_bool_compare_and_swap(&ptr, oldVal, newVal);
404 }
405 };
407 template<typename T>
408 struct IntrinsicAddSub
409 {
410 typedef T ValueType;
411 static T add(T& ptr, T val) {
412 return __sync_fetch_and_add(&ptr, val);
413 }
414 static T sub(T& ptr, T val) {
415 return __sync_fetch_and_sub(&ptr, val);
416 }
417 };
419 template<typename T>
420 struct IntrinsicAddSub<T*>
421 {
422 typedef T* ValueType;
423 /*
424 * The reinterpret_casts are needed so that
425 * __sync_fetch_and_{add,sub} will properly type-check.
426 *
427 * Also, these functions do not provide standard semantics for
428 * pointer types, so we need to adjust the addend.
429 */
430 static ValueType add(ValueType& ptr, ptrdiff_t val) {
431 ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
432 return __sync_fetch_and_add(&ptr, amount);
433 }
434 static ValueType sub(ValueType& ptr, ptrdiff_t val) {
435 ValueType amount = reinterpret_cast<ValueType>(val * sizeof(T));
436 return __sync_fetch_and_sub(&ptr, amount);
437 }
438 };
440 template<typename T>
441 struct IntrinsicIncDec : public IntrinsicAddSub<T>
442 {
443 static T inc(T& ptr) { return IntrinsicAddSub<T>::add(ptr, 1); }
444 static T dec(T& ptr) { return IntrinsicAddSub<T>::sub(ptr, 1); }
445 };
447 template<typename T, MemoryOrdering Order>
448 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
449 public IntrinsicIncDec<T>
450 {
451 static T or_(T& ptr, T val) {
452 return __sync_fetch_and_or(&ptr, val);
453 }
454 static T xor_(T& ptr, T val) {
455 return __sync_fetch_and_xor(&ptr, val);
456 }
457 static T and_(T& ptr, T val) {
458 return __sync_fetch_and_and(&ptr, val);
459 }
460 };
462 template<typename T, MemoryOrdering Order>
463 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
464 public IntrinsicIncDec<T*>
465 {
466 };
468 } // namespace detail
469 } // namespace mozilla
471 #elif defined(_MSC_VER)
473 /*
474 * Windows comes with a full complement of atomic operations.
475 * Unfortunately, most of those aren't available for Windows XP (even if
476 * the compiler supports intrinsics for them), which is the oldest
477 * version of Windows we support. Therefore, we only provide operations
478 * on 32-bit datatypes for 32-bit Windows versions; for 64-bit Windows
479 * versions, we support 64-bit datatypes as well.
480 *
481 * To avoid namespace pollution issues, we declare whatever functions we
482 * need ourselves.
483 */
485 extern "C" {
486 long __cdecl _InterlockedExchangeAdd(long volatile* dst, long value);
487 long __cdecl _InterlockedOr(long volatile* dst, long value);
488 long __cdecl _InterlockedXor(long volatile* dst, long value);
489 long __cdecl _InterlockedAnd(long volatile* dst, long value);
490 long __cdecl _InterlockedExchange(long volatile *dst, long value);
491 long __cdecl _InterlockedCompareExchange(long volatile *dst, long newVal, long oldVal);
492 }
494 # pragma intrinsic(_InterlockedExchangeAdd)
495 # pragma intrinsic(_InterlockedOr)
496 # pragma intrinsic(_InterlockedXor)
497 # pragma intrinsic(_InterlockedAnd)
498 # pragma intrinsic(_InterlockedExchange)
499 # pragma intrinsic(_InterlockedCompareExchange)
501 namespace mozilla {
502 namespace detail {
504 # if !defined(_M_IX86) && !defined(_M_X64)
505 /*
506 * The implementations below are optimized for x86ish systems. You
507 * will have to modify them if you are porting to Windows on a
508 * different architecture.
509 */
510 # error "Unknown CPU type"
511 # endif
513 /*
514 * The PrimitiveIntrinsics template should define |Type|, the datatype of size
515 * DataSize upon which we operate, and the following eight functions.
516 *
517 * static Type add(Type* ptr, Type val);
518 * static Type sub(Type* ptr, Type val);
519 * static Type or_(Type* ptr, Type val);
520 * static Type xor_(Type* ptr, Type val);
521 * static Type and_(Type* ptr, Type val);
522 *
523 * These functions perform the obvious operation on the value contained in
524 * |*ptr| combined with |val| and return the value previously stored in
525 * |*ptr|.
526 *
527 * static void store(Type* ptr, Type val);
528 *
529 * This function atomically stores |val| into |*ptr| and must provide a full
530 * memory fence after the store to prevent compiler and hardware instruction
531 * reordering. It should also act as a compiler barrier to prevent reads and
532 * writes from moving to after the store.
533 *
534 * static Type exchange(Type* ptr, Type val);
535 *
536 * This function atomically stores |val| into |*ptr| and returns the previous
537 * contents of *ptr;
538 *
539 * static bool compareExchange(Type* ptr, Type oldVal, Type newVal);
540 *
541 * This function atomically performs the following operation:
542 *
543 * if (*ptr == oldVal) {
544 * *ptr = newVal;
545 * return true;
546 * } else {
547 * return false;
548 * }
549 *
550 */
551 template<size_t DataSize> struct PrimitiveIntrinsics;
553 template<>
554 struct PrimitiveIntrinsics<4>
555 {
556 typedef long Type;
558 static Type add(Type* ptr, Type val) {
559 return _InterlockedExchangeAdd(ptr, val);
560 }
561 static Type sub(Type* ptr, Type val) {
562 /*
563 * _InterlockedExchangeSubtract isn't available before Windows 7,
564 * and we must support Windows XP.
565 */
566 return _InterlockedExchangeAdd(ptr, -val);
567 }
568 static Type or_(Type* ptr, Type val) {
569 return _InterlockedOr(ptr, val);
570 }
571 static Type xor_(Type* ptr, Type val) {
572 return _InterlockedXor(ptr, val);
573 }
574 static Type and_(Type* ptr, Type val) {
575 return _InterlockedAnd(ptr, val);
576 }
577 static void store(Type* ptr, Type val) {
578 _InterlockedExchange(ptr, val);
579 }
580 static Type exchange(Type* ptr, Type val) {
581 return _InterlockedExchange(ptr, val);
582 }
583 static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {
584 return _InterlockedCompareExchange(ptr, newVal, oldVal) == oldVal;
585 }
586 };
588 # if defined(_M_X64)
590 extern "C" {
591 long long __cdecl _InterlockedExchangeAdd64(long long volatile* dst,
592 long long value);
593 long long __cdecl _InterlockedOr64(long long volatile* dst,
594 long long value);
595 long long __cdecl _InterlockedXor64(long long volatile* dst,
596 long long value);
597 long long __cdecl _InterlockedAnd64(long long volatile* dst,
598 long long value);
599 long long __cdecl _InterlockedExchange64(long long volatile* dst,
600 long long value);
601 long long __cdecl _InterlockedCompareExchange64(long long volatile* dst,
602 long long newVal,
603 long long oldVal);
604 }
606 # pragma intrinsic(_InterlockedExchangeAdd64)
607 # pragma intrinsic(_InterlockedOr64)
608 # pragma intrinsic(_InterlockedXor64)
609 # pragma intrinsic(_InterlockedAnd64)
610 # pragma intrinsic(_InterlockedExchange64)
611 # pragma intrinsic(_InterlockedCompareExchange64)
613 template <>
614 struct PrimitiveIntrinsics<8>
615 {
616 typedef __int64 Type;
618 static Type add(Type* ptr, Type val) {
619 return _InterlockedExchangeAdd64(ptr, val);
620 }
621 static Type sub(Type* ptr, Type val) {
622 /*
623 * There is no _InterlockedExchangeSubtract64.
624 */
625 return _InterlockedExchangeAdd64(ptr, -val);
626 }
627 static Type or_(Type* ptr, Type val) {
628 return _InterlockedOr64(ptr, val);
629 }
630 static Type xor_(Type* ptr, Type val) {
631 return _InterlockedXor64(ptr, val);
632 }
633 static Type and_(Type* ptr, Type val) {
634 return _InterlockedAnd64(ptr, val);
635 }
636 static void store(Type* ptr, Type val) {
637 _InterlockedExchange64(ptr, val);
638 }
639 static Type exchange(Type* ptr, Type val) {
640 return _InterlockedExchange64(ptr, val);
641 }
642 static bool compareExchange(Type* ptr, Type oldVal, Type newVal) {
643 return _InterlockedCompareExchange64(ptr, newVal, oldVal) == oldVal;
644 }
645 };
647 # endif
649 extern "C" { void _ReadWriteBarrier(); }
651 # pragma intrinsic(_ReadWriteBarrier)
653 template<MemoryOrdering Order> struct Barrier;
655 /*
656 * We do not provide an afterStore method in Barrier, as Relaxed and
657 * ReleaseAcquire orderings do not require one, and the required barrier
658 * for SequentiallyConsistent is handled by PrimitiveIntrinsics.
659 */
661 template<>
662 struct Barrier<Relaxed>
663 {
664 static void beforeLoad() {}
665 static void afterLoad() {}
666 static void beforeStore() {}
667 };
669 template<>
670 struct Barrier<ReleaseAcquire>
671 {
672 static void beforeLoad() {}
673 static void afterLoad() { _ReadWriteBarrier(); }
674 static void beforeStore() { _ReadWriteBarrier(); }
675 };
677 template<>
678 struct Barrier<SequentiallyConsistent>
679 {
680 static void beforeLoad() { _ReadWriteBarrier(); }
681 static void afterLoad() { _ReadWriteBarrier(); }
682 static void beforeStore() { _ReadWriteBarrier(); }
683 };
685 template<typename PrimType, typename T>
686 struct CastHelper
687 {
688 static PrimType toPrimType(T val) { return static_cast<PrimType>(val); }
689 static T fromPrimType(PrimType val) { return static_cast<T>(val); }
690 };
692 template<typename PrimType, typename T>
693 struct CastHelper<PrimType, T*>
694 {
695 static PrimType toPrimType(T* val) { return reinterpret_cast<PrimType>(val); }
696 static T* fromPrimType(PrimType val) { return reinterpret_cast<T*>(val); }
697 };
699 template<typename T>
700 struct IntrinsicBase
701 {
702 typedef T ValueType;
703 typedef PrimitiveIntrinsics<sizeof(T)> Primitives;
704 typedef typename Primitives::Type PrimType;
705 static_assert(sizeof(PrimType) == sizeof(T),
706 "Selection of PrimitiveIntrinsics was wrong");
707 typedef CastHelper<PrimType, T> Cast;
708 };
710 template<typename T, MemoryOrdering Order>
711 struct IntrinsicMemoryOps : public IntrinsicBase<T>
712 {
713 typedef typename IntrinsicBase<T>::ValueType ValueType;
714 typedef typename IntrinsicBase<T>::Primitives Primitives;
715 typedef typename IntrinsicBase<T>::PrimType PrimType;
716 typedef typename IntrinsicBase<T>::Cast Cast;
717 static ValueType load(const ValueType& ptr) {
718 Barrier<Order>::beforeLoad();
719 ValueType val = ptr;
720 Barrier<Order>::afterLoad();
721 return val;
722 }
723 static void store(ValueType& ptr, ValueType val) {
724 // For SequentiallyConsistent, Primitives::store() will generate the
725 // proper memory fence. Everything else just needs a barrier before
726 // the store.
727 if (Order == SequentiallyConsistent) {
728 Primitives::store(reinterpret_cast<PrimType*>(&ptr),
729 Cast::toPrimType(val));
730 } else {
731 Barrier<Order>::beforeStore();
732 ptr = val;
733 }
734 }
735 static ValueType exchange(ValueType& ptr, ValueType val) {
736 PrimType oldval =
737 Primitives::exchange(reinterpret_cast<PrimType*>(&ptr),
738 Cast::toPrimType(val));
739 return Cast::fromPrimType(oldval);
740 }
741 static bool compareExchange(ValueType& ptr, ValueType oldVal, ValueType newVal) {
742 return Primitives::compareExchange(reinterpret_cast<PrimType*>(&ptr),
743 Cast::toPrimType(oldVal),
744 Cast::toPrimType(newVal));
745 }
746 };
748 template<typename T>
749 struct IntrinsicApplyHelper : public IntrinsicBase<T>
750 {
751 typedef typename IntrinsicBase<T>::ValueType ValueType;
752 typedef typename IntrinsicBase<T>::PrimType PrimType;
753 typedef typename IntrinsicBase<T>::Cast Cast;
754 typedef PrimType (*BinaryOp)(PrimType*, PrimType);
755 typedef PrimType (*UnaryOp)(PrimType*);
757 static ValueType applyBinaryFunction(BinaryOp op, ValueType& ptr,
758 ValueType val) {
759 PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
760 PrimType primTypeVal = Cast::toPrimType(val);
761 return Cast::fromPrimType(op(primTypePtr, primTypeVal));
762 }
764 static ValueType applyUnaryFunction(UnaryOp op, ValueType& ptr) {
765 PrimType* primTypePtr = reinterpret_cast<PrimType*>(&ptr);
766 return Cast::fromPrimType(op(primTypePtr));
767 }
768 };
770 template<typename T>
771 struct IntrinsicAddSub : public IntrinsicApplyHelper<T>
772 {
773 typedef typename IntrinsicApplyHelper<T>::ValueType ValueType;
774 typedef typename IntrinsicBase<T>::Primitives Primitives;
775 static ValueType add(ValueType& ptr, ValueType val) {
776 return applyBinaryFunction(&Primitives::add, ptr, val);
777 }
778 static ValueType sub(ValueType& ptr, ValueType val) {
779 return applyBinaryFunction(&Primitives::sub, ptr, val);
780 }
781 };
783 template<typename T>
784 struct IntrinsicAddSub<T*> : public IntrinsicApplyHelper<T*>
785 {
786 typedef typename IntrinsicApplyHelper<T*>::ValueType ValueType;
787 static ValueType add(ValueType& ptr, ptrdiff_t amount) {
788 return applyBinaryFunction(&Primitives::add, ptr,
789 (ValueType)(amount * sizeof(ValueType)));
790 }
791 static ValueType sub(ValueType& ptr, ptrdiff_t amount) {
792 return applyBinaryFunction(&Primitives::sub, ptr,
793 (ValueType)(amount * sizeof(ValueType)));
794 }
795 };
797 template<typename T>
798 struct IntrinsicIncDec : public IntrinsicAddSub<T>
799 {
800 typedef typename IntrinsicAddSub<T>::ValueType ValueType;
801 static ValueType inc(ValueType& ptr) { return add(ptr, 1); }
802 static ValueType dec(ValueType& ptr) { return sub(ptr, 1); }
803 };
805 template<typename T, MemoryOrdering Order>
806 struct AtomicIntrinsics : public IntrinsicMemoryOps<T, Order>,
807 public IntrinsicIncDec<T>
808 {
809 typedef typename IntrinsicIncDec<T>::ValueType ValueType;
810 static ValueType or_(ValueType& ptr, T val) {
811 return applyBinaryFunction(&Primitives::or_, ptr, val);
812 }
813 static ValueType xor_(ValueType& ptr, T val) {
814 return applyBinaryFunction(&Primitives::xor_, ptr, val);
815 }
816 static ValueType and_(ValueType& ptr, T val) {
817 return applyBinaryFunction(&Primitives::and_, ptr, val);
818 }
819 };
821 template<typename T, MemoryOrdering Order>
822 struct AtomicIntrinsics<T*, Order> : public IntrinsicMemoryOps<T*, Order>,
823 public IntrinsicIncDec<T*>
824 {
825 typedef typename IntrinsicMemoryOps<T*, Order>::ValueType ValueType;
826 };
828 } // namespace detail
829 } // namespace mozilla
831 #else
832 # error "Atomic compiler intrinsics are not supported on your platform"
833 #endif
835 namespace mozilla {
837 namespace detail {
839 template<typename T, MemoryOrdering Order>
840 class AtomicBase
841 {
842 // We only support 32-bit types on 32-bit Windows, which constrains our
843 // implementation elsewhere. But we support pointer-sized types everywhere.
844 static_assert(sizeof(T) == 4 || (sizeof(uintptr_t) == 8 && sizeof(T) == 8),
845 "mozilla/Atomics.h only supports 32-bit and pointer-sized types");
847 protected:
848 typedef typename detail::AtomicIntrinsics<T, Order> Intrinsics;
849 typename Intrinsics::ValueType mValue;
851 public:
852 MOZ_CONSTEXPR AtomicBase() : mValue() {}
853 MOZ_CONSTEXPR AtomicBase(T aInit) : mValue(aInit) {}
855 // Note: we can't provide operator T() here because Atomic<bool> inherits
856 // from AtomcBase with T=uint32_t and not T=bool. If we implemented
857 // operator T() here, it would cause errors when comparing Atomic<bool> with
858 // a regular bool.
860 T operator=(T aValue) {
861 Intrinsics::store(mValue, aValue);
862 return aValue;
863 }
865 /**
866 * Performs an atomic swap operation. aValue is stored and the previous
867 * value of this variable is returned.
868 */
869 T exchange(T aValue) {
870 return Intrinsics::exchange(mValue, aValue);
871 }
873 /**
874 * Performs an atomic compare-and-swap operation and returns true if it
875 * succeeded. This is equivalent to atomically doing
876 *
877 * if (mValue == aOldValue) {
878 * mValue = aNewValue;
879 * return true;
880 * } else {
881 * return false;
882 * }
883 */
884 bool compareExchange(T aOldValue, T aNewValue) {
885 return Intrinsics::compareExchange(mValue, aOldValue, aNewValue);
886 }
888 private:
889 template<MemoryOrdering AnyOrder>
890 AtomicBase(const AtomicBase<T, AnyOrder>& aCopy) MOZ_DELETE;
891 };
893 template<typename T, MemoryOrdering Order>
894 class AtomicBaseIncDec : public AtomicBase<T, Order>
895 {
896 typedef typename detail::AtomicBase<T, Order> Base;
898 public:
899 MOZ_CONSTEXPR AtomicBaseIncDec() : Base() {}
900 MOZ_CONSTEXPR AtomicBaseIncDec(T aInit) : Base(aInit) {}
902 using Base::operator=;
904 operator T() const { return Base::Intrinsics::load(Base::mValue); }
905 T operator++(int) { return Base::Intrinsics::inc(Base::mValue); }
906 T operator--(int) { return Base::Intrinsics::dec(Base::mValue); }
907 T operator++() { return Base::Intrinsics::inc(Base::mValue) + 1; }
908 T operator--() { return Base::Intrinsics::dec(Base::mValue) - 1; }
910 private:
911 template<MemoryOrdering AnyOrder>
912 AtomicBaseIncDec(const AtomicBaseIncDec<T, AnyOrder>& aCopy) MOZ_DELETE;
913 };
915 } // namespace detail
917 /**
918 * A wrapper for a type that enforces that all memory accesses are atomic.
919 *
920 * In general, where a variable |T foo| exists, |Atomic<T> foo| can be used in
921 * its place. Implementations for integral and pointer types are provided
922 * below.
923 *
924 * Atomic accesses are sequentially consistent by default. You should
925 * use the default unless you are tall enough to ride the
926 * memory-ordering roller coaster (if you're not sure, you aren't) and
927 * you have a compelling reason to do otherwise.
928 *
929 * There is one exception to the case of atomic memory accesses: providing an
930 * initial value of the atomic value is not guaranteed to be atomic. This is a
931 * deliberate design choice that enables static atomic variables to be declared
932 * without introducing extra static constructors.
933 */
934 template<typename T,
935 MemoryOrdering Order = SequentiallyConsistent,
936 typename Enable = void>
937 class Atomic;
939 /**
940 * Atomic<T> implementation for integral types.
941 *
942 * In addition to atomic store and load operations, compound assignment and
943 * increment/decrement operators are implemented which perform the
944 * corresponding read-modify-write operation atomically. Finally, an atomic
945 * swap method is provided.
946 */
947 template<typename T, MemoryOrdering Order>
948 class Atomic<T, Order, typename EnableIf<IsIntegral<T>::value && !IsSame<T, bool>::value>::Type>
949 : public detail::AtomicBaseIncDec<T, Order>
950 {
951 typedef typename detail::AtomicBaseIncDec<T, Order> Base;
953 public:
954 MOZ_CONSTEXPR Atomic() : Base() {}
955 MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
957 using Base::operator=;
959 T operator+=(T delta) { return Base::Intrinsics::add(Base::mValue, delta) + delta; }
960 T operator-=(T delta) { return Base::Intrinsics::sub(Base::mValue, delta) - delta; }
961 T operator|=(T val) { return Base::Intrinsics::or_(Base::mValue, val) | val; }
962 T operator^=(T val) { return Base::Intrinsics::xor_(Base::mValue, val) ^ val; }
963 T operator&=(T val) { return Base::Intrinsics::and_(Base::mValue, val) & val; }
965 private:
966 Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
967 };
969 /**
970 * Atomic<T> implementation for pointer types.
971 *
972 * An atomic compare-and-swap primitive for pointer variables is provided, as
973 * are atomic increment and decement operators. Also provided are the compound
974 * assignment operators for addition and subtraction. Atomic swap (via
975 * exchange()) is included as well.
976 */
977 template<typename T, MemoryOrdering Order>
978 class Atomic<T*, Order> : public detail::AtomicBaseIncDec<T*, Order>
979 {
980 typedef typename detail::AtomicBaseIncDec<T*, Order> Base;
982 public:
983 MOZ_CONSTEXPR Atomic() : Base() {}
984 MOZ_CONSTEXPR Atomic(T* aInit) : Base(aInit) {}
986 using Base::operator=;
988 T* operator+=(ptrdiff_t delta) {
989 return Base::Intrinsics::add(Base::mValue, delta) + delta;
990 }
991 T* operator-=(ptrdiff_t delta) {
992 return Base::Intrinsics::sub(Base::mValue, delta) - delta;
993 }
995 private:
996 Atomic(Atomic<T*, Order>& aOther) MOZ_DELETE;
997 };
999 /**
1000 * Atomic<T> implementation for enum types.
1001 *
1002 * The atomic store and load operations and the atomic swap method is provided.
1003 */
1004 template<typename T, MemoryOrdering Order>
1005 class Atomic<T, Order, typename EnableIf<IsEnum<T>::value>::Type>
1006 : public detail::AtomicBase<T, Order>
1007 {
1008 typedef typename detail::AtomicBase<T, Order> Base;
1010 public:
1011 MOZ_CONSTEXPR Atomic() : Base() {}
1012 MOZ_CONSTEXPR Atomic(T aInit) : Base(aInit) {}
1014 operator T() const { return Base::Intrinsics::load(Base::mValue); }
1016 using Base::operator=;
1018 private:
1019 Atomic(Atomic<T, Order>& aOther) MOZ_DELETE;
1020 };
1022 /**
1023 * Atomic<T> implementation for boolean types.
1024 *
1025 * The atomic store and load operations and the atomic swap method is provided.
1026 *
1027 * Note:
1028 *
1029 * - sizeof(Atomic<bool>) != sizeof(bool) for some implementations of
1030 * bool and/or some implementations of std::atomic. This is allowed in
1031 * [atomic.types.generic]p9.
1032 *
1033 * - It's not obvious whether the 8-bit atomic functions on Windows are always
1034 * inlined or not. If they are not inlined, the corresponding functions in the
1035 * runtime library are not available on Windows XP. This is why we implement
1036 * Atomic<bool> with an underlying type of uint32_t.
1037 */
1038 template<MemoryOrdering Order>
1039 class Atomic<bool, Order>
1040 : protected detail::AtomicBase<uint32_t, Order>
1041 {
1042 typedef typename detail::AtomicBase<uint32_t, Order> Base;
1044 public:
1045 MOZ_CONSTEXPR Atomic() : Base() {}
1046 MOZ_CONSTEXPR Atomic(bool aInit) : Base(aInit) {}
1048 // We provide boolean wrappers for the underlying AtomicBase methods.
1049 operator bool() const { return Base::Intrinsics::load(Base::mValue); }
1050 bool operator=(bool aValue) { return Base::operator=(aValue); }
1051 bool exchange(bool aValue) { return Base::exchange(aValue); }
1052 bool compareExchange(bool aOldValue, bool aNewValue) {
1053 return Base::compareExchange(aOldValue, aNewValue);
1054 }
1056 private:
1057 Atomic(Atomic<bool, Order>& aOther) MOZ_DELETE;
1058 };
1060 } // namespace mozilla
1062 #endif /* mozilla_Atomics_h */