1 |
persson |
1790 |
/*************************************************************************** |
2 |
|
|
* * |
3 |
persson |
2452 |
* Copyright (C) 2008-2013 Andreas Persson * |
4 |
persson |
1790 |
* * |
5 |
|
|
* This program is free software; you can redistribute it and/or modify * |
6 |
|
|
* it under the terms of the GNU General Public License as published by * |
7 |
|
|
* the Free Software Foundation; either version 2 of the License, or * |
8 |
|
|
* (at your option) any later version. * |
9 |
|
|
* * |
10 |
|
|
* This program is distributed in the hope that it will be useful, * |
11 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
12 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
13 |
|
|
* GNU General Public License for more details. * |
14 |
|
|
* * |
15 |
|
|
* You should have received a copy of the GNU General Public License * |
16 |
|
|
* along with this program; if not, write to the Free Software * |
17 |
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, * |
18 |
|
|
* MA 02110-1301 USA * |
19 |
|
|
***************************************************************************/ |
20 |
|
|
|
21 |
|
|
#ifndef LSATOMIC_H |
22 |
|
|
#define LSATOMIC_H |
23 |
|
|
|
24 |
persson |
2343 |
/** @file |
25 |
persson |
1790 |
* |
26 |
persson |
2343 |
* Implementation of a small subset of the C++11 atomic operations. |
27 |
persson |
1790 |
* |
28 |
persson |
2343 |
* Note: When working with multithreading on modern CPUs, it's |
29 |
|
|
* important not only to make sure that concurrent access to shared |
30 |
|
|
* variables is made atomically, but also to be aware of the order the |
31 |
|
|
* stores get visible to the loads in other threads. For example, if x |
32 |
|
|
* and y are shared variables with initial values of 0, the following |
33 |
|
|
* program: |
34 |
|
|
* |
35 |
|
|
* @code |
36 |
|
|
* // thread 1: |
37 |
|
|
* x.store(1, memory_order_relaxed); |
38 |
|
|
* r1 = y.load(memory_order_relaxed); |
39 |
|
|
* |
40 |
|
|
* // thread 2: |
41 |
|
|
* y.store(1, memory_order_relaxed); |
42 |
|
|
* r2 = x.load(memory_order_relaxed); |
43 |
|
|
* @endcode |
44 |
|
|
* |
45 |
|
|
* would have a possible outcome of r1 == 0 and r2 == 0. The threads |
46 |
|
|
* might for example run on separate CPU cores with separate caches, |
47 |
|
|
* and the propagation of the store to the other core might be delayed |
48 |
|
|
* and done after the loads. In that case, both loads will read the |
49 |
|
|
* original value of 0 from the core's own cache. |
50 |
|
|
* |
51 |
|
|
* The C++11 style operations use the memory_order parameter to let |
52 |
|
|
* the programmer control the way shared memory stores get visible to |
53 |
|
|
* loads in other threads. In the example above, relaxed order was |
54 |
|
|
* used, which allows the CPU and compiler to reorder the memory |
55 |
|
|
* accesses very freely. If memory_order_seq_cst had been used |
56 |
|
|
* instead, the r1 == 0 and r2 == 0 outcome would have been |
57 |
|
|
* impossible, as sequential consistency means that the execution of |
58 |
|
|
* the program can be modeled by simply interleaving the instructions |
59 |
|
|
* of the threads. |
60 |
|
|
* |
61 |
|
|
* The default order is memory_order_seq_cst, as it is the easiest one |
62 |
|
|
* to understand. It is however also the slowest. The relaxed order is |
63 |
|
|
* the fastest, but it can't be used if the shared variable is used to |
64 |
|
|
* synchronize threads for any other shared data. The third order is |
65 |
|
|
* acquire/release, where an acquire-load is synchronizing with a |
66 |
|
|
* release-store to the same variable. |
67 |
|
|
* |
68 |
|
|
* See for example http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync for |
69 |
|
|
* more information about the memory order parameter. |
70 |
|
|
* |
71 |
|
|
* The supported operations of the implementation in this file are: |
72 |
|
|
* |
73 |
persson |
1887 |
* - fences (acquire, release and seq_cst) |
74 |
persson |
1790 |
* |
75 |
|
|
* - load and store of atomic<int> with relaxed, acquire/release or |
76 |
|
|
* seq_cst memory ordering |
77 |
|
|
* |
78 |
persson |
2452 |
* The supported architectures are x86, powerpc and ARMv7. |
79 |
persson |
1790 |
*/ |
80 |
persson |
2343 |
|
81 |
|
|
|
82 |
|
|
// if C++11 and gcc 4.7 or later is used, then use the standard |
83 |
|
|
// implementation |
84 |
|
|
#if __cplusplus >= 201103L && \ |
85 |
|
|
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) |
86 |
|
|
|
87 |
|
|
#include <atomic> |
88 |
|
|
|
89 |
persson |
1790 |
namespace LinuxSampler { |
90 |
persson |
2343 |
using std::memory_order_relaxed; |
91 |
|
|
using std::memory_order_acquire; |
92 |
|
|
using std::memory_order_release; |
93 |
|
|
using std::memory_order_seq_cst; |
94 |
|
|
using std::atomic_thread_fence; |
95 |
|
|
using std::atomic; |
96 |
|
|
} |
97 |
|
|
|
98 |
|
|
#else |
99 |
|
|
|
100 |
|
|
|
101 |
|
|
namespace LinuxSampler { |
102 |
persson |
1790 |
enum memory_order { |
103 |
|
|
memory_order_relaxed, memory_order_acquire, |
104 |
|
|
memory_order_release, memory_order_seq_cst |
105 |
|
|
}; |
106 |
|
|
|
107 |
|
|
inline void atomic_thread_fence(memory_order order) { |
108 |
|
|
switch (order) { |
109 |
|
|
case memory_order_relaxed: |
110 |
|
|
break; |
111 |
|
|
|
112 |
|
|
case memory_order_acquire: |
113 |
|
|
case memory_order_release: |
114 |
|
|
#ifdef _ARCH_PPC64 |
115 |
|
|
asm volatile("lwsync" : : : "memory"); |
116 |
persson |
1792 |
#elif defined(_ARCH_PPC) |
117 |
persson |
1790 |
asm volatile("sync" : : : "memory"); |
118 |
persson |
2452 |
#elif defined(__ARM_ARCH_7A__) |
119 |
|
|
asm volatile("dmb" : : : "memory"); |
120 |
persson |
1790 |
#else |
121 |
|
|
asm volatile("" : : : "memory"); |
122 |
|
|
#endif |
123 |
|
|
break; |
124 |
|
|
|
125 |
|
|
case memory_order_seq_cst: |
126 |
persson |
1792 |
#ifdef _ARCH_PPC |
127 |
persson |
1790 |
asm volatile("sync" : : : "memory"); |
128 |
persson |
1794 |
#elif defined(__i386__) |
129 |
persson |
1790 |
asm volatile("lock; addl $0,0(%%esp)" : : : "memory"); |
130 |
persson |
1794 |
#elif defined(__x86_64__) |
131 |
|
|
asm volatile("mfence" : : : "memory"); |
132 |
persson |
2452 |
#elif defined(__ARM_ARCH_7A__) |
133 |
|
|
asm volatile("dmb" : : : "memory"); |
134 |
persson |
1790 |
#else |
135 |
|
|
asm volatile("" : : : "memory"); |
136 |
|
|
#endif |
137 |
|
|
break; |
138 |
|
|
} |
139 |
|
|
} |
140 |
|
|
|
141 |
persson |
1870 |
template<typename T> class atomic; |
142 |
|
|
template<> class atomic<int> { // int is the only implemented type |
143 |
persson |
1790 |
public: |
144 |
|
|
atomic() { } |
145 |
persson |
1870 |
explicit atomic(int m) : f(m) { } |
146 |
|
|
int load(memory_order order = memory_order_seq_cst) const volatile { |
147 |
|
|
int m; |
148 |
persson |
1790 |
switch (order) { |
149 |
|
|
case memory_order_relaxed: |
150 |
|
|
m = f; |
151 |
|
|
break; |
152 |
|
|
|
153 |
|
|
case memory_order_seq_cst: |
154 |
|
|
case memory_order_release: // (invalid) |
155 |
persson |
2452 |
#ifdef _ARCH_PPC |
156 |
persson |
1790 |
atomic_thread_fence(memory_order_seq_cst); |
157 |
persson |
2452 |
#endif |
158 |
persson |
1790 |
// fall-through |
159 |
|
|
|
160 |
|
|
case memory_order_acquire: |
161 |
persson |
1792 |
#ifdef _ARCH_PPC |
162 |
persson |
1790 |
// PPC load-acquire: artificial dependency + isync |
163 |
|
|
asm volatile( |
164 |
|
|
"lwz%U1%X1 %0,%1\n\t" |
165 |
|
|
"cmpw %0,%0\n\t" |
166 |
|
|
"bne- 1f\n\t" |
167 |
|
|
"1: isync" |
168 |
|
|
: "=r" (m) |
169 |
|
|
: "m" (f) |
170 |
persson |
1870 |
: "memory", "cr0"); |
171 |
persson |
1790 |
#else |
172 |
|
|
m = f; |
173 |
persson |
2452 |
atomic_thread_fence(memory_order_acquire); |
174 |
persson |
1790 |
#endif |
175 |
|
|
break; |
176 |
|
|
} |
177 |
|
|
return m; |
178 |
|
|
} |
179 |
|
|
|
180 |
persson |
1870 |
void store(int m, memory_order order = memory_order_seq_cst) volatile { |
181 |
persson |
1790 |
switch (order) { |
182 |
|
|
case memory_order_relaxed: |
183 |
|
|
f = m; |
184 |
|
|
break; |
185 |
|
|
|
186 |
|
|
case memory_order_release: |
187 |
|
|
atomic_thread_fence(memory_order_release); |
188 |
|
|
f = m; |
189 |
|
|
break; |
190 |
|
|
|
191 |
|
|
case memory_order_seq_cst: |
192 |
|
|
case memory_order_acquire: // (invalid) |
193 |
persson |
2452 |
#ifdef _ARCH_PPC |
194 |
|
|
atomic_thread_fence(memory_order_seq_cst); |
195 |
|
|
f = m; |
196 |
|
|
#else |
197 |
persson |
1790 |
atomic_thread_fence(memory_order_release); |
198 |
|
|
f = m; |
199 |
|
|
atomic_thread_fence(memory_order_seq_cst); |
200 |
persson |
2452 |
#endif |
201 |
persson |
1790 |
break; |
202 |
|
|
} |
203 |
|
|
} |
204 |
|
|
private: |
205 |
persson |
1870 |
int f; |
206 |
|
|
atomic(const atomic&); // not allowed |
207 |
|
|
atomic& operator=(const atomic&); // not allowed |
208 |
persson |
1790 |
}; |
209 |
|
|
} |
210 |
|
|
#endif |
211 |
persson |
2343 |
#endif |