|
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style license that can be |
|
3 // found in the LICENSE file. |
|
4 |
|
5 // This file is an internal atomic implementation, use base/atomicops.h instead. |
|
6 |
|
7 #ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ |
|
8 #define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ |
|
9 |
|
10 // This struct is not part of the public API of this module; clients may not |
|
11 // use it. |
|
12 // Features of this x86. Values may not be correct before main() is run, |
|
13 // but are set conservatively. |
|
14 struct AtomicOps_x86CPUFeatureStruct { |
|
15 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence |
|
16 // after acquire compare-and-swap. |
|
17 bool has_sse2; // Processor has SSE2. |
|
18 }; |
|
19 extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; |
|
20 |
|
21 #define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") |
|
22 |
|
23 namespace base { |
|
24 namespace subtle { |
|
25 |
|
26 // 32-bit low-level operations on any platform. |
|
27 |
|
28 inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, |
|
29 Atomic32 old_value, |
|
30 Atomic32 new_value) { |
|
31 Atomic32 prev; |
|
32 __asm__ __volatile__("lock; cmpxchgl %1,%2" |
|
33 : "=a" (prev) |
|
34 : "q" (new_value), "m" (*ptr), "0" (old_value) |
|
35 : "memory"); |
|
36 return prev; |
|
37 } |
|
38 |
|
39 inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, |
|
40 Atomic32 new_value) { |
|
41 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. |
|
42 : "=r" (new_value) |
|
43 : "m" (*ptr), "0" (new_value) |
|
44 : "memory"); |
|
45 return new_value; // Now it's the previous value. |
|
46 } |
|
47 |
|
48 inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, |
|
49 Atomic32 increment) { |
|
50 Atomic32 temp = increment; |
|
51 __asm__ __volatile__("lock; xaddl %0,%1" |
|
52 : "+r" (temp), "+m" (*ptr) |
|
53 : : "memory"); |
|
54 // temp now holds the old value of *ptr |
|
55 return temp + increment; |
|
56 } |
|
57 |
|
58 inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, |
|
59 Atomic32 increment) { |
|
60 Atomic32 temp = increment; |
|
61 __asm__ __volatile__("lock; xaddl %0,%1" |
|
62 : "+r" (temp), "+m" (*ptr) |
|
63 : : "memory"); |
|
64 // temp now holds the old value of *ptr |
|
65 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
|
66 __asm__ __volatile__("lfence" : : : "memory"); |
|
67 } |
|
68 return temp + increment; |
|
69 } |
|
70 |
|
71 inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, |
|
72 Atomic32 old_value, |
|
73 Atomic32 new_value) { |
|
74 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
|
75 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
|
76 __asm__ __volatile__("lfence" : : : "memory"); |
|
77 } |
|
78 return x; |
|
79 } |
|
80 |
|
81 inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, |
|
82 Atomic32 old_value, |
|
83 Atomic32 new_value) { |
|
84 return NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
|
85 } |
|
86 |
|
87 inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { |
|
88 *ptr = value; |
|
89 } |
|
90 |
|
91 #if defined(__x86_64__) |
|
92 |
|
93 // 64-bit implementations of memory barrier can be simpler, because it |
|
94 // "mfence" is guaranteed to exist. |
|
95 inline void MemoryBarrier() { |
|
96 __asm__ __volatile__("mfence" : : : "memory"); |
|
97 } |
|
98 |
|
99 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
|
100 *ptr = value; |
|
101 MemoryBarrier(); |
|
102 } |
|
103 |
|
104 #else |
|
105 |
|
106 inline void MemoryBarrier() { |
|
107 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { |
|
108 __asm__ __volatile__("mfence" : : : "memory"); |
|
109 } else { // mfence is faster but not present on PIII |
|
110 Atomic32 x = 0; |
|
111 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII |
|
112 } |
|
113 } |
|
114 |
|
115 inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { |
|
116 if (AtomicOps_Internalx86CPUFeatures.has_sse2) { |
|
117 *ptr = value; |
|
118 __asm__ __volatile__("mfence" : : : "memory"); |
|
119 } else { |
|
120 NoBarrier_AtomicExchange(ptr, value); |
|
121 // acts as a barrier on PIII |
|
122 } |
|
123 } |
|
124 #endif |
|
125 |
|
126 inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { |
|
127 ATOMICOPS_COMPILER_BARRIER(); |
|
128 *ptr = value; // An x86 store acts as a release barrier. |
|
129 // See comments in Atomic64 version of Release_Store(), below. |
|
130 } |
|
131 |
|
132 inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { |
|
133 return *ptr; |
|
134 } |
|
135 |
|
136 inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { |
|
137 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. |
|
138 // See comments in Atomic64 version of Release_Store(), below. |
|
139 ATOMICOPS_COMPILER_BARRIER(); |
|
140 return value; |
|
141 } |
|
142 |
|
143 inline Atomic32 Release_Load(volatile const Atomic32* ptr) { |
|
144 MemoryBarrier(); |
|
145 return *ptr; |
|
146 } |
|
147 |
|
148 #if defined(__x86_64__) |
|
149 |
|
150 // 64-bit low-level operations on 64-bit platform. |
|
151 |
|
152 inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, |
|
153 Atomic64 old_value, |
|
154 Atomic64 new_value) { |
|
155 Atomic64 prev; |
|
156 __asm__ __volatile__("lock; cmpxchgq %1,%2" |
|
157 : "=a" (prev) |
|
158 : "q" (new_value), "m" (*ptr), "0" (old_value) |
|
159 : "memory"); |
|
160 return prev; |
|
161 } |
|
162 |
|
163 inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, |
|
164 Atomic64 new_value) { |
|
165 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. |
|
166 : "=r" (new_value) |
|
167 : "m" (*ptr), "0" (new_value) |
|
168 : "memory"); |
|
169 return new_value; // Now it's the previous value. |
|
170 } |
|
171 |
|
172 inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, |
|
173 Atomic64 increment) { |
|
174 Atomic64 temp = increment; |
|
175 __asm__ __volatile__("lock; xaddq %0,%1" |
|
176 : "+r" (temp), "+m" (*ptr) |
|
177 : : "memory"); |
|
178 // temp now contains the previous value of *ptr |
|
179 return temp + increment; |
|
180 } |
|
181 |
|
182 inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, |
|
183 Atomic64 increment) { |
|
184 Atomic64 temp = increment; |
|
185 __asm__ __volatile__("lock; xaddq %0,%1" |
|
186 : "+r" (temp), "+m" (*ptr) |
|
187 : : "memory"); |
|
188 // temp now contains the previous value of *ptr |
|
189 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
|
190 __asm__ __volatile__("lfence" : : : "memory"); |
|
191 } |
|
192 return temp + increment; |
|
193 } |
|
194 |
|
195 inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { |
|
196 *ptr = value; |
|
197 } |
|
198 |
|
199 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, |
|
200 Atomic64 old_value, |
|
201 Atomic64 new_value) { |
|
202 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); |
|
203 /* XXX/cjones: no idea if this is necessary... */ |
|
204 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { |
|
205 __asm__ __volatile__("lfence" : : : "memory"); |
|
206 } |
|
207 return x; |
|
208 } |
|
209 |
|
210 inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { |
|
211 *ptr = value; |
|
212 MemoryBarrier(); |
|
213 } |
|
214 |
|
215 inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { |
|
216 ATOMICOPS_COMPILER_BARRIER(); |
|
217 |
|
218 *ptr = value; // An x86 store acts as a release barrier |
|
219 // for current AMD/Intel chips as of Jan 2008. |
|
220 // See also Acquire_Load(), below. |
|
221 |
|
222 // When new chips come out, check: |
|
223 // IA-32 Intel Architecture Software Developer's Manual, Volume 3: |
|
224 // System Programming Guide, Chatper 7: Multiple-processor management, |
|
225 // Section 7.2, Memory Ordering. |
|
226 // Last seen at: |
|
227 // http://developer.intel.com/design/pentium4/manuals/index_new.htm |
|
228 // |
|
229 // x86 stores/loads fail to act as barriers for a few instructions (clflush |
|
230 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are |
|
231 // not generated by the compiler, and are rare. Users of these instructions |
|
232 // need to know about cache behaviour in any case since all of these involve |
|
233 // either flushing cache lines or non-temporal cache hints. |
|
234 } |
|
235 |
|
236 inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { |
|
237 return *ptr; |
|
238 } |
|
239 |
|
240 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { |
|
241 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, |
|
242 // for current AMD/Intel chips as of Jan 2008. |
|
243 // See also Release_Store(), above. |
|
244 ATOMICOPS_COMPILER_BARRIER(); |
|
245 return value; |
|
246 } |
|
247 |
|
248 inline Atomic64 Release_Load(volatile const Atomic64* ptr) { |
|
249 MemoryBarrier(); |
|
250 return *ptr; |
|
251 } |
|
252 #endif // defined(__x86_64__) |
|
253 |
|
254 } // namespace base::subtle |
|
255 } // namespace base |
|
256 |
|
257 #undef ATOMICOPS_COMPILER_BARRIER |
|
258 |
|
259 #endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_ |