michael@0: // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. michael@0: // Use of this source code is governed by a BSD-style license that can be michael@0: // found in the LICENSE file. michael@0: michael@0: // This file is an internal atomic implementation, use base/atomicops.h instead. michael@0: michael@0: #ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ michael@0: #define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ michael@0: michael@0: // This struct is not part of the public API of this module; clients may not michael@0: // use it. michael@0: // Features of this x86. Values may not be correct before main() is run, michael@0: // but are set conservatively. michael@0: struct AtomicOps_x86CPUFeatureStruct { michael@0: bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence michael@0: // after acquire compare-and-swap. michael@0: bool has_sse2; // Processor has SSE2. michael@0: }; michael@0: extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; michael@0: michael@0: #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") michael@0: michael@0: namespace base { michael@0: namespace subtle { michael@0: michael@0: // 32-bit low-level operations on any platform. michael@0: michael@0: inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, michael@0: Atomic32 old_value, michael@0: Atomic32 new_value) { michael@0: Atomic32 prev; michael@0: __asm__ __volatile__("lock; cmpxchgl %1,%2" michael@0: : "=a" (prev) michael@0: : "q" (new_value), "m" (*ptr), "0" (old_value) michael@0: : "memory"); michael@0: return prev; michael@0: } michael@0: michael@0: inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, michael@0: Atomic32 new_value) { michael@0: __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. michael@0: : "=r" (new_value) michael@0: : "m" (*ptr), "0" (new_value) michael@0: : "memory"); michael@0: return new_value; // Now it's the previous value. michael@0: } michael@0: michael@0: inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, michael@0: Atomic32 increment) { michael@0: Atomic32 temp = increment; michael@0: __asm__ __volatile__("lock; xaddl %0,%1" michael@0: : "+r" (temp), "+m" (*ptr) michael@0: : : "memory"); michael@0: // temp now holds the old value of *ptr michael@0: return temp + increment; michael@0: } michael@0: michael@0: inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, michael@0: Atomic32 increment) { michael@0: Atomic32 temp = increment; michael@0: __asm__ __volatile__("lock; xaddl %0,%1" michael@0: : "+r" (temp), "+m" (*ptr) michael@0: : : "memory"); michael@0: // temp now holds the old value of *ptr michael@0: if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { michael@0: __asm__ __volatile__("lfence" : : : "memory"); michael@0: } michael@0: return temp + increment; michael@0: } michael@0: michael@0: inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, michael@0: Atomic32 old_value, michael@0: Atomic32 new_value) { michael@0: Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); michael@0: if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { michael@0: __asm__ __volatile__("lfence" : : : "memory"); michael@0: } michael@0: return x; michael@0: } michael@0: michael@0: inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, michael@0: Atomic32 old_value, michael@0: Atomic32 new_value) { michael@0: return NoBarrier_CompareAndSwap(ptr, old_value, new_value); michael@0: } michael@0: michael@0: inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { michael@0: *ptr = value; michael@0: } michael@0: michael@0: #if defined(__x86_64__) michael@0: michael@0: // 64-bit implementations of memory barrier can be simpler, because it michael@0: // "mfence" is guaranteed to exist. michael@0: inline void MemoryBarrier() { michael@0: __asm__ __volatile__("mfence" : : : "memory"); michael@0: } michael@0: michael@0: inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { michael@0: *ptr = value; michael@0: MemoryBarrier(); michael@0: } michael@0: michael@0: #else michael@0: michael@0: inline void MemoryBarrier() { michael@0: if (AtomicOps_Internalx86CPUFeatures.has_sse2) { michael@0: __asm__ __volatile__("mfence" : : : "memory"); michael@0: } else { // mfence is faster but not present on PIII michael@0: Atomic32 x = 0; michael@0: NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII michael@0: } michael@0: } michael@0: michael@0: inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { michael@0: if (AtomicOps_Internalx86CPUFeatures.has_sse2) { michael@0: *ptr = value; michael@0: __asm__ __volatile__("mfence" : : : "memory"); michael@0: } else { michael@0: NoBarrier_AtomicExchange(ptr, value); michael@0: // acts as a barrier on PIII michael@0: } michael@0: } michael@0: #endif michael@0: michael@0: inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { michael@0: ATOMICOPS_COMPILER_BARRIER(); michael@0: *ptr = value; // An x86 store acts as a release barrier. michael@0: // See comments in Atomic64 version of Release_Store(), below. michael@0: } michael@0: michael@0: inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { michael@0: return *ptr; michael@0: } michael@0: michael@0: inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { michael@0: Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. michael@0: // See comments in Atomic64 version of Release_Store(), below. michael@0: ATOMICOPS_COMPILER_BARRIER(); michael@0: return value; michael@0: } michael@0: michael@0: inline Atomic32 Release_Load(volatile const Atomic32* ptr) { michael@0: MemoryBarrier(); michael@0: return *ptr; michael@0: } michael@0: michael@0: #if defined(__x86_64__) michael@0: michael@0: // 64-bit low-level operations on 64-bit platform. michael@0: michael@0: inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, michael@0: Atomic64 old_value, michael@0: Atomic64 new_value) { michael@0: Atomic64 prev; michael@0: __asm__ __volatile__("lock; cmpxchgq %1,%2" michael@0: : "=a" (prev) michael@0: : "q" (new_value), "m" (*ptr), "0" (old_value) michael@0: : "memory"); michael@0: return prev; michael@0: } michael@0: michael@0: inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, michael@0: Atomic64 new_value) { michael@0: __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. michael@0: : "=r" (new_value) michael@0: : "m" (*ptr), "0" (new_value) michael@0: : "memory"); michael@0: return new_value; // Now it's the previous value. michael@0: } michael@0: michael@0: inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, michael@0: Atomic64 increment) { michael@0: Atomic64 temp = increment; michael@0: __asm__ __volatile__("lock; xaddq %0,%1" michael@0: : "+r" (temp), "+m" (*ptr) michael@0: : : "memory"); michael@0: // temp now contains the previous value of *ptr michael@0: return temp + increment; michael@0: } michael@0: michael@0: inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, michael@0: Atomic64 increment) { michael@0: Atomic64 temp = increment; michael@0: __asm__ __volatile__("lock; xaddq %0,%1" michael@0: : "+r" (temp), "+m" (*ptr) michael@0: : : "memory"); michael@0: // temp now contains the previous value of *ptr michael@0: if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { michael@0: __asm__ __volatile__("lfence" : : : "memory"); michael@0: } michael@0: return temp + increment; michael@0: } michael@0: michael@0: inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { michael@0: *ptr = value; michael@0: } michael@0: michael@0: inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, michael@0: Atomic64 old_value, michael@0: Atomic64 new_value) { michael@0: Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); michael@0: /* XXX/cjones: no idea if this is necessary... */ michael@0: if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { michael@0: __asm__ __volatile__("lfence" : : : "memory"); michael@0: } michael@0: return x; michael@0: } michael@0: michael@0: inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { michael@0: *ptr = value; michael@0: MemoryBarrier(); michael@0: } michael@0: michael@0: inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { michael@0: ATOMICOPS_COMPILER_BARRIER(); michael@0: michael@0: *ptr = value; // An x86 store acts as a release barrier michael@0: // for current AMD/Intel chips as of Jan 2008. michael@0: // See also Acquire_Load(), below. michael@0: michael@0: // When new chips come out, check: michael@0: // IA-32 Intel Architecture Software Developer's Manual, Volume 3: michael@0: // System Programming Guide, Chatper 7: Multiple-processor management, michael@0: // Section 7.2, Memory Ordering. michael@0: // Last seen at: michael@0: // http://developer.intel.com/design/pentium4/manuals/index_new.htm michael@0: // michael@0: // x86 stores/loads fail to act as barriers for a few instructions (clflush michael@0: // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are michael@0: // not generated by the compiler, and are rare. Users of these instructions michael@0: // need to know about cache behaviour in any case since all of these involve michael@0: // either flushing cache lines or non-temporal cache hints. michael@0: } michael@0: michael@0: inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { michael@0: return *ptr; michael@0: } michael@0: michael@0: inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { michael@0: Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, michael@0: // for current AMD/Intel chips as of Jan 2008. michael@0: // See also Release_Store(), above. michael@0: ATOMICOPS_COMPILER_BARRIER(); michael@0: return value; michael@0: } michael@0: michael@0: inline Atomic64 Release_Load(volatile const Atomic64* ptr) { michael@0: MemoryBarrier(); michael@0: return *ptr; michael@0: } michael@0: #endif // defined(__x86_64__) michael@0: michael@0: } // namespace base::subtle michael@0: } // namespace base michael@0: michael@0: #undef ATOMICOPS_COMPILER_BARRIER michael@0: michael@0: #endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_