1 |
/*************************************************************************** |
2 |
* * |
3 |
* Copyright (C) 2008-2013 Andreas Persson * |
4 |
* * |
5 |
* This program is free software; you can redistribute it and/or modify * |
6 |
* it under the terms of the GNU General Public License as published by * |
7 |
* the Free Software Foundation; either version 2 of the License, or * |
8 |
* (at your option) any later version. * |
9 |
* * |
10 |
* This program is distributed in the hope that it will be useful, * |
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
13 |
* GNU General Public License for more details. * |
14 |
* * |
15 |
* You should have received a copy of the GNU General Public License * |
16 |
* along with this program; if not, write to the Free Software * |
17 |
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, * |
18 |
* MA 02110-1301 USA * |
19 |
***************************************************************************/ |
20 |
|
21 |
#ifndef LSATOMIC_H |
22 |
#define LSATOMIC_H |
23 |
|
24 |
/** @file |
25 |
* |
26 |
* Implementation of a small subset of the C++11 atomic operations. |
27 |
* |
28 |
* Note: When working with multithreading on modern CPUs, it's |
29 |
* important not only to make sure that concurrent access to shared |
30 |
* variables is made atomically, but also to be aware of the order the |
31 |
* stores get visible to the loads in other threads. For example, if x |
32 |
* and y are shared variables with initial values of 0, the following |
33 |
* program: |
34 |
* |
35 |
* @code |
36 |
* // thread 1: |
37 |
* x.store(1, memory_order_relaxed); |
38 |
* r1 = y.load(memory_order_relaxed); |
39 |
* |
40 |
* // thread 2: |
41 |
* y.store(1, memory_order_relaxed); |
42 |
* r2 = x.load(memory_order_relaxed); |
43 |
* @endcode |
44 |
* |
45 |
* would have a possible outcome of r1 == 0 and r2 == 0. The threads |
46 |
* might for example run on separate CPU cores with separate caches, |
47 |
* and the propagation of the store to the other core might be delayed |
48 |
* and done after the loads. In that case, both loads will read the |
49 |
* original value of 0 from the core's own cache. |
50 |
* |
51 |
* The C++11 style operations use the memory_order parameter to let |
52 |
* the programmer control the way shared memory stores get visible to |
53 |
* loads in other threads. In the example above, relaxed order was |
54 |
* used, which allows the CPU and compiler to reorder the memory |
55 |
* accesses very freely. If memory_order_seq_cst had been used |
56 |
* instead, the r1 == 0 and r2 == 0 outcome would have been |
57 |
* impossible, as sequential consistency means that the execution of |
58 |
* the program can be modeled by simply interleaving the instructions |
59 |
* of the threads. |
60 |
* |
61 |
* The default order is memory_order_seq_cst, as it is the easiest one |
62 |
* to understand. It is however also the slowest. The relaxed order is |
63 |
* the fastest, but it can't be used if the shared variable is used to |
64 |
* synchronize threads for any other shared data. The third order is |
65 |
* acquire/release, where an acquire-load is synchronizing with a |
66 |
* release-store to the same variable. |
67 |
* |
68 |
* See for example http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync for |
69 |
* more information about the memory order parameter. |
70 |
* |
71 |
* The supported operations of the implementation in this file are: |
72 |
* |
73 |
* - fences (acquire, release and seq_cst) |
74 |
* |
75 |
* - load and store of atomic<int> with relaxed, acquire/release or |
76 |
* seq_cst memory ordering |
77 |
* |
78 |
* The supported architectures are x86, powerpc and ARMv7. |
79 |
*/ |
80 |
|
81 |
|
82 |
// if C++11 and gcc 4.7 or later is used, then use the standard |
83 |
// implementation |
84 |
#if __cplusplus >= 201103L && \ |
85 |
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) |
86 |
|
87 |
#include <atomic> |
88 |
|
89 |
namespace LinuxSampler { |
90 |
using std::memory_order_relaxed; |
91 |
using std::memory_order_acquire; |
92 |
using std::memory_order_release; |
93 |
using std::memory_order_seq_cst; |
94 |
using std::atomic_thread_fence; |
95 |
using std::atomic; |
96 |
} |
97 |
|
98 |
#else |
99 |
|
100 |
|
101 |
namespace LinuxSampler { |
102 |
enum memory_order { |
103 |
memory_order_relaxed, memory_order_acquire, |
104 |
memory_order_release, memory_order_seq_cst |
105 |
}; |
106 |
|
107 |
inline void atomic_thread_fence(memory_order order) { |
108 |
switch (order) { |
109 |
case memory_order_relaxed: |
110 |
break; |
111 |
|
112 |
case memory_order_acquire: |
113 |
case memory_order_release: |
114 |
#ifdef _ARCH_PPC64 |
115 |
asm volatile("lwsync" : : : "memory"); |
116 |
#elif defined(_ARCH_PPC) |
117 |
asm volatile("sync" : : : "memory"); |
118 |
#elif defined(__ARM_ARCH_7A__) |
119 |
asm volatile("dmb" : : : "memory"); |
120 |
#else |
121 |
asm volatile("" : : : "memory"); |
122 |
#endif |
123 |
break; |
124 |
|
125 |
case memory_order_seq_cst: |
126 |
#ifdef _ARCH_PPC |
127 |
asm volatile("sync" : : : "memory"); |
128 |
#elif defined(__i386__) |
129 |
asm volatile("lock; addl $0,0(%%esp)" : : : "memory"); |
130 |
#elif defined(__x86_64__) |
131 |
asm volatile("mfence" : : : "memory"); |
132 |
#elif defined(__ARM_ARCH_7A__) |
133 |
asm volatile("dmb" : : : "memory"); |
134 |
#else |
135 |
asm volatile("" : : : "memory"); |
136 |
#endif |
137 |
break; |
138 |
} |
139 |
} |
140 |
|
141 |
template<typename T> class atomic; |
142 |
template<> class atomic<int> { // int is the only implemented type |
143 |
public: |
144 |
atomic() { } |
145 |
explicit atomic(int m) : f(m) { } |
146 |
int load(memory_order order = memory_order_seq_cst) const volatile { |
147 |
int m; |
148 |
switch (order) { |
149 |
case memory_order_relaxed: |
150 |
m = f; |
151 |
break; |
152 |
|
153 |
case memory_order_seq_cst: |
154 |
case memory_order_release: // (invalid) |
155 |
#ifdef _ARCH_PPC |
156 |
atomic_thread_fence(memory_order_seq_cst); |
157 |
#endif |
158 |
// fall-through |
159 |
|
160 |
case memory_order_acquire: |
161 |
#ifdef _ARCH_PPC |
162 |
// PPC load-acquire: artificial dependency + isync |
163 |
asm volatile( |
164 |
"lwz%U1%X1 %0,%1\n\t" |
165 |
"cmpw %0,%0\n\t" |
166 |
"bne- 1f\n\t" |
167 |
"1: isync" |
168 |
: "=r" (m) |
169 |
: "m" (f) |
170 |
: "memory", "cr0"); |
171 |
#else |
172 |
m = f; |
173 |
atomic_thread_fence(memory_order_acquire); |
174 |
#endif |
175 |
break; |
176 |
} |
177 |
return m; |
178 |
} |
179 |
|
180 |
void store(int m, memory_order order = memory_order_seq_cst) volatile { |
181 |
switch (order) { |
182 |
case memory_order_relaxed: |
183 |
f = m; |
184 |
break; |
185 |
|
186 |
case memory_order_release: |
187 |
atomic_thread_fence(memory_order_release); |
188 |
f = m; |
189 |
break; |
190 |
|
191 |
case memory_order_seq_cst: |
192 |
case memory_order_acquire: // (invalid) |
193 |
#ifdef _ARCH_PPC |
194 |
atomic_thread_fence(memory_order_seq_cst); |
195 |
f = m; |
196 |
#else |
197 |
atomic_thread_fence(memory_order_release); |
198 |
f = m; |
199 |
atomic_thread_fence(memory_order_seq_cst); |
200 |
#endif |
201 |
break; |
202 |
} |
203 |
} |
204 |
private: |
205 |
int f; |
206 |
atomic(const atomic&); // not allowed |
207 |
atomic& operator=(const atomic&); // not allowed |
208 |
}; |
209 |
} |
210 |
#endif |
211 |
#endif |