/[svn]/linuxsampler/trunk/src/common/lsatomic.h
ViewVC logotype

Contents of /linuxsampler/trunk/src/common/lsatomic.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2452 - (show annotations) (download) (as text)
Mon May 13 17:26:29 2013 UTC (7 years, 2 months ago) by persson
File MIME type: text/x-c++hdr
File size: 7446 byte(s)
* lsatomic.h fixes: seq_cst load and store were suboptimal for x86 and
  broken for ppc64. (Seq_cst loads and stores are actually not used in
  LS, so the bug wasn't noticable.)
* lsatomic.h: added ARMv7 support

1 /***************************************************************************
2 * *
3 * Copyright (C) 2008-2013 Andreas Persson *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the Free Software *
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, *
18 * MA 02110-1301 USA *
19 ***************************************************************************/
20
21 #ifndef LSATOMIC_H
22 #define LSATOMIC_H
23
24 /** @file
25 *
26 * Implementation of a small subset of the C++11 atomic operations.
27 *
28 * Note: When working with multithreading on modern CPUs, it's
29 * important not only to make sure that concurrent access to shared
30 * variables is made atomically, but also to be aware of the order the
31 * stores get visible to the loads in other threads. For example, if x
32 * and y are shared variables with initial values of 0, the following
33 * program:
34 *
35 * @code
36 * // thread 1:
37 * x.store(1, memory_order_relaxed);
38 * r1 = y.load(memory_order_relaxed);
39 *
40 * // thread 2:
41 * y.store(1, memory_order_relaxed);
42 * r2 = x.load(memory_order_relaxed);
43 * @endcode
44 *
45 * would have a possible outcome of r1 == 0 and r2 == 0. The threads
46 * might for example run on separate CPU cores with separate caches,
47 * and the propagation of the store to the other core might be delayed
48 * and done after the loads. In that case, both loads will read the
49 * original value of 0 from the core's own cache.
50 *
51 * The C++11 style operations use the memory_order parameter to let
52 * the programmer control the way shared memory stores get visible to
53 * loads in other threads. In the example above, relaxed order was
54 * used, which allows the CPU and compiler to reorder the memory
55 * accesses very freely. If memory_order_seq_cst had been used
56 * instead, the r1 == 0 and r2 == 0 outcome would have been
57 * impossible, as sequential consistency means that the execution of
58 * the program can be modeled by simply interleaving the instructions
59 * of the threads.
60 *
61 * The default order is memory_order_seq_cst, as it is the easiest one
62 * to understand. It is however also the slowest. The relaxed order is
63 * the fastest, but it can't be used if the shared variable is used to
64 * synchronize threads for any other shared data. The third order is
65 * acquire/release, where an acquire-load is synchronizing with a
66 * release-store to the same variable.
67 *
68 * See for example http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync for
69 * more information about the memory order parameter.
70 *
71 * The supported operations of the implementation in this file are:
72 *
73 * - fences (acquire, release and seq_cst)
74 *
75 * - load and store of atomic<int> with relaxed, acquire/release or
76 * seq_cst memory ordering
77 *
78 * The supported architectures are x86, powerpc and ARMv7.
79 */
80
81
82 // if C++11 and gcc 4.7 or later is used, then use the standard
83 // implementation
84 #if __cplusplus >= 201103L && \
85 (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
86
87 #include <atomic>
88
89 namespace LinuxSampler {
90 using std::memory_order_relaxed;
91 using std::memory_order_acquire;
92 using std::memory_order_release;
93 using std::memory_order_seq_cst;
94 using std::atomic_thread_fence;
95 using std::atomic;
96 }
97
98 #else
99
100
101 namespace LinuxSampler {
102 enum memory_order {
103 memory_order_relaxed, memory_order_acquire,
104 memory_order_release, memory_order_seq_cst
105 };
106
107 inline void atomic_thread_fence(memory_order order) {
108 switch (order) {
109 case memory_order_relaxed:
110 break;
111
112 case memory_order_acquire:
113 case memory_order_release:
114 #ifdef _ARCH_PPC64
115 asm volatile("lwsync" : : : "memory");
116 #elif defined(_ARCH_PPC)
117 asm volatile("sync" : : : "memory");
118 #elif defined(__ARM_ARCH_7A__)
119 asm volatile("dmb" : : : "memory");
120 #else
121 asm volatile("" : : : "memory");
122 #endif
123 break;
124
125 case memory_order_seq_cst:
126 #ifdef _ARCH_PPC
127 asm volatile("sync" : : : "memory");
128 #elif defined(__i386__)
129 asm volatile("lock; addl $0,0(%%esp)" : : : "memory");
130 #elif defined(__x86_64__)
131 asm volatile("mfence" : : : "memory");
132 #elif defined(__ARM_ARCH_7A__)
133 asm volatile("dmb" : : : "memory");
134 #else
135 asm volatile("" : : : "memory");
136 #endif
137 break;
138 }
139 }
140
141 template<typename T> class atomic;
142 template<> class atomic<int> { // int is the only implemented type
143 public:
144 atomic() { }
145 explicit atomic(int m) : f(m) { }
146 int load(memory_order order = memory_order_seq_cst) const volatile {
147 int m;
148 switch (order) {
149 case memory_order_relaxed:
150 m = f;
151 break;
152
153 case memory_order_seq_cst:
154 case memory_order_release: // (invalid)
155 #ifdef _ARCH_PPC
156 atomic_thread_fence(memory_order_seq_cst);
157 #endif
158 // fall-through
159
160 case memory_order_acquire:
161 #ifdef _ARCH_PPC
162 // PPC load-acquire: artificial dependency + isync
163 asm volatile(
164 "lwz%U1%X1 %0,%1\n\t"
165 "cmpw %0,%0\n\t"
166 "bne- 1f\n\t"
167 "1: isync"
168 : "=r" (m)
169 : "m" (f)
170 : "memory", "cr0");
171 #else
172 m = f;
173 atomic_thread_fence(memory_order_acquire);
174 #endif
175 break;
176 }
177 return m;
178 }
179
180 void store(int m, memory_order order = memory_order_seq_cst) volatile {
181 switch (order) {
182 case memory_order_relaxed:
183 f = m;
184 break;
185
186 case memory_order_release:
187 atomic_thread_fence(memory_order_release);
188 f = m;
189 break;
190
191 case memory_order_seq_cst:
192 case memory_order_acquire: // (invalid)
193 #ifdef _ARCH_PPC
194 atomic_thread_fence(memory_order_seq_cst);
195 f = m;
196 #else
197 atomic_thread_fence(memory_order_release);
198 f = m;
199 atomic_thread_fence(memory_order_seq_cst);
200 #endif
201 break;
202 }
203 }
204 private:
205 int f;
206 atomic(const atomic&); // not allowed
207 atomic& operator=(const atomic&); // not allowed
208 };
209 }
210 #endif
211 #endif

  ViewVC Help
Powered by ViewVC