|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style license that can be |
|
3 // found in the LICENSE file. |
|
4 |
|
5 // This file is an internal atomic implementation, use base/atomicops.h instead. |
|
6 |
|
7 #ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ |
|
8 #define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ |
|
9 |
|
10 #include "base/base_export.h" |
|
11 |
|
12 // This struct is not part of the public API of this module; clients may not |
|
13 // use it. (However, it's exported via BASE_EXPORT because clients implicitly |
|
14 // do use it at link time by inlining these functions.) |
|
15 // Features of this x86. Values may not be correct before main() is run, |
|
16 // but are set conservatively. |
|
17 struct AtomicOps_x86CPUFeatureStruct { |
|
18 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence |
|
19 // after acquire compare-and-swap. |
|
20 bool has_sse2; // Processor has SSE2. |
|
21 }; |
|
22 BASE_EXPORT extern struct AtomicOps_x86CPUFeatureStruct |
|
23 AtomicOps_Internalx86CPUFeatures; |
|
24 |
|
25 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") |
|
26 |
|
27 namespace base { |
|
28 namespace subtle { |
|
29 |
|
30 // 32-bit low-level operations on any platform. |
|
31 |
|
32 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
|
33 Atomic32 old_value, |
|
34 Atomic32 new_value) { |
|
35 Atomic32 prev; |
|
36 __asm__ __volatile__("lock; cmpxchgl %1,%2" |
|
37 : "=a" (prev) |
|
38 : "q" (new_value), "m" (*ptr), "0" (old_value) |
|
39 : "memory"); |
|
40 return prev; |
|
41 } |
|
42 |
|
43 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
|
44 Atomic32 new_value) { |
|
45 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. |
|
46 : "=r" (new_value) |
|
47 : "m" (*ptr), "0" (new_value) |
|
48 : "memory"); |
|
49 return new_value; // Now it's the previous value. |
|
50 } |
|
51 |
|
52 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
|
53 Atomic32 increment) { |
|
54 Atomic32 temp = increment; |
|
55 __asm__ __volatile__("lock; xaddl %0,%1" |
|
56 : "+r" (temp), "+m" (*ptr) |
|
57 : : "memory"); |
|
58 // temp now holds the old value of *ptr |
|
59 return temp + increment; |
|
60 } |
|
61 |
|
62 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
|
63 Atomic32 increment) { |
|
64 Atomic32 temp = increment; |
|
65 __asm__ __volatile__("lock; xaddl %0,%1" |
|
66 : "+r" (temp), "+m" (*ptr) |
|
67 : : "memory"); |
|
68 // temp now holds the old value of *ptr |
|
69 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
|
70 __asm__ __volatile__("lfence" : : : "memory"); |
|
71 } |
|
72 return temp + increment; |
|
73 } |
|
74 |
|
75 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
|
76 Atomic32 old_value, |
|
77 Atomic32 new_value) { |
|
78 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
|
79 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
|
80 __asm__ __volatile__("lfence" : : : "memory"); |
|
81 } |
|
82 return x; |
|
83 } |
|
84 |
|
85 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
|
86 Atomic32 old_value, |
|
87 Atomic32 new_value) { |
|
88 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
|
89 } |
|
90 |
|
91 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { |
|
92 *ptr = value; |
|
93 } |
|
94 |
|
95 #if defined(__x86_64__) |
|
96 |
|
97 // 64-bit implementations of memory barrier can be simpler, because it |
|
98 // "mfence" is guaranteed to exist. |
|
99 inline void MemoryBarrier() { |
|
100 __asm__ __volatile__("mfence" : : : "memory"); |
|
101 } |
|
102 |
|
103 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
|
104 *ptr = value; |
|
105 MemoryBarrier(); |
|
106 } |
|
107 |
|
108 #else |
|
109 |
|
110 inline void MemoryBarrier() { |
|
111 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { |
|
112 __asm__ __volatile__("mfence" : : : "memory"); |
|
113 } else { // mfence is faster but not present on PIII |
|
114 Atomic32 x = 0; |
|
115 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII |
|
116 } |
|
117 } |
|
118 |
|
119 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
|
120 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { |
|
121 *ptr = value; |
|
122 __asm__ __volatile__("mfence" : : : "memory"); |
|
123 } else { |
|
124 NoBarrier_AtomicExchange(ptr, value); |
|
125 // acts as a barrier on PIII |
|
126 } |
|
127 } |
|
128 #endif |
|
129 |
|
130 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { |
|
131 ATOMICOPS_COMPILER_BARRIER(); |
|
132 *ptr = value; // An x86 store acts as a release barrier. |
|
133 // See comments in Atomic64 version of Release_Store(), below. |
|
134 } |
|
135 |
|
136 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { |
|
137 return *ptr; |
|
138 } |
|
139 |
|
140 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { |
|
141 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. |
|
142 // See comments in Atomic64 version of Release_Store(), below. |
|
143 ATOMICOPS_COMPILER_BARRIER(); |
|
144 return value; |
|
145 } |
|
146 |
|
147 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { |
|
148 MemoryBarrier(); |
|
149 return *ptr; |
|
150 } |
|
151 |
|
152 #if defined(__x86_64__) |
|
153 |
|
154 // 64-bit low-level operations on 64-bit platform. |
|
155 |
|
156 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, |
|
157 Atomic64 old_value, |
|
158 Atomic64 new_value) { |
|
159 Atomic64 prev; |
|
160 __asm__ __volatile__("lock; cmpxchgq %1,%2" |
|
161 : "=a" (prev) |
|
162 : "q" (new_value), "m" (*ptr), "0" (old_value) |
|
163 : "memory"); |
|
164 return prev; |
|
165 } |
|
166 |
|
167 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, |
|
168 Atomic64 new_value) { |
|
169 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. |
|
170 : "=r" (new_value) |
|
171 : "m" (*ptr), "0" (new_value) |
|
172 : "memory"); |
|
173 return new_value; // Now it's the previous value. |
|
174 } |
|
175 |
|
176 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, |
|
177 Atomic64 increment) { |
|
178 Atomic64 temp = increment; |
|
179 __asm__ __volatile__("lock; xaddq %0,%1" |
|
180 : "+r" (temp), "+m" (*ptr) |
|
181 : : "memory"); |
|
182 // temp now contains the previous value of *ptr |
|
183 return temp + increment; |
|
184 } |
|
185 |
|
186 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, |
|
187 Atomic64 increment) { |
|
188 Atomic64 temp = increment; |
|
189 __asm__ __volatile__("lock; xaddq %0,%1" |
|
190 : "+r" (temp), "+m" (*ptr) |
|
191 : : "memory"); |
|
192 // temp now contains the previous value of *ptr |
|
193 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
|
194 __asm__ __volatile__("lfence" : : : "memory"); |
|
195 } |
|
196 return temp + increment; |
|
197 } |
|
198 |
|
199 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { |
|
200 *ptr = value; |
|
201 } |
|
202 |
|
203 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { |
|
204 *ptr = value; |
|
205 MemoryBarrier(); |
|
206 } |
|
207 |
|
208 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { |
|
209 ATOMICOPS_COMPILER_BARRIER(); |
|
210 |
|
211 *ptr = value; // An x86 store acts as a release barrier |
|
212 // for current AMD/Intel chips as of Jan 2008. |
|
213 // See also Acquire_Load(), below. |
|
214 |
|
215 // When new chips come out, check: |
|
216 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: |
|
217 // System Programming Guide, Chatper 7: Multiple-processor management, |
|
218 // Section 7.2, Memory Ordering. |
|
219 // Last seen at: |
|
220 // http://developer.intel.com/design/pentium4/manuals/index_new.htm |
|
221 // |
|
222 // x86 stores/loads fail to act as barriers for a few instructions (clflush |
|
223 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are |
|
224 // not generated by the compiler, and are rare. Users of these instructions |
|
225 // need to know about cache behaviour in any case since all of these involve |
|
226 // either flushing cache lines or non-temporal cache hints. |
|
227 } |
|
228 |
|
229 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { |
|
230 return *ptr; |
|
231 } |
|
232 |
|
233 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { |
|
234 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, |
|
235 // for current AMD/Intel chips as of Jan 2008. |
|
236 // See also Release_Store(), above. |
|
237 ATOMICOPS_COMPILER_BARRIER(); |
|
238 return value; |
|
239 } |
|
240 |
|
241 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { |
|
242 MemoryBarrier(); |
|
243 return *ptr; |
|
244 } |
|
245 |
|
246 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, |
|
247 Atomic64 old_value, |
|
248 Atomic64 new_value) { |
|
249 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
|
250 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
|
251 __asm__ __volatile__("lfence" : : : "memory"); |
|
252 } |
|
253 return x; |
|
254 } |
|
255 |
|
256 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, |
|
257 Atomic64 old_value, |
|
258 Atomic64 new_value) { |
|
259 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
|
260 } |
|
261 |
|
262 #endif // defined(__x86_64__) |
|
263 |
|
264 } // namespace base::subtle |
|
265 } // namespace base |
|
266 |
|
267 #undef ATOMICOPS_COMPILER_BARRIER |
|
268 |
|
269 #endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ |