/[svn]/linuxsampler/trunk/src/common/lsatomic.h
ViewVC logotype

Annotation of /linuxsampler/trunk/src/common/lsatomic.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2343 - (hide annotations) (download) (as text)
Sun Apr 29 16:14:45 2012 UTC (11 years, 11 months ago) by persson
File MIME type: text/x-c++hdr
File size: 7135 byte(s)
* fixed configure script error with old autoconf versions
* LV2: use the new lv2 package if present
* lsatomic.h: use gcc provided atomic functions if building with gcc
  4.7 and C++11
* added comments in lsatomic.h

1 persson 1790 /***************************************************************************
2     * *
3 persson 2343 * Copyright (C) 2008-2012 Andreas Persson *
4 persson 1790 * *
5     * This program is free software; you can redistribute it and/or modify *
6     * it under the terms of the GNU General Public License as published by *
7     * the Free Software Foundation; either version 2 of the License, or *
8     * (at your option) any later version. *
9     * *
10     * This program is distributed in the hope that it will be useful, *
11     * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13     * GNU General Public License for more details. *
14     * *
15     * You should have received a copy of the GNU General Public License *
16     * along with this program; if not, write to the Free Software *
17     * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, *
18     * MA 02110-1301 USA *
19     ***************************************************************************/
20    
21     #ifndef LSATOMIC_H
22     #define LSATOMIC_H
23    
24 persson 2343 /** @file
25 persson 1790 *
26 persson 2343 * Implementation of a small subset of the C++11 atomic operations.
27 persson 1790 *
28 persson 2343 * Note: When working with multithreading on modern CPUs, it's
29     * important not only to make sure that concurrent access to shared
30     * variables is made atomically, but also to be aware of the order the
31     * stores get visible to the loads in other threads. For example, if x
32     * and y are shared variables with initial values of 0, the following
33     * program:
34     *
35     * @code
36     * // thread 1:
37     * x.store(1, memory_order_relaxed);
38     * r1 = y.load(memory_order_relaxed);
39     *
40     * // thread 2:
41     * y.store(1, memory_order_relaxed);
42     * r2 = x.load(memory_order_relaxed);
43     * @endcode
44     *
45     * would have a possible outcome of r1 == 0 and r2 == 0. The threads
46     * might for example run on separate CPU cores with separate caches,
47     * and the propagation of the store to the other core might be delayed
48     * and done after the loads. In that case, both loads will read the
49     * original value of 0 from the core's own cache.
50     *
51     * The C++11 style operations use the memory_order parameter to let
52     * the programmer control the way shared memory stores get visible to
53     * loads in other threads. In the example above, relaxed order was
54     * used, which allows the CPU and compiler to reorder the memory
55     * accesses very freely. If memory_order_seq_cst had been used
56     * instead, the r1 == 0 and r2 == 0 outcome would have been
57     * impossible, as sequential consistency means that the execution of
58     * the program can be modeled by simply interleaving the instructions
59     * of the threads.
60     *
61     * The default order is memory_order_seq_cst, as it is the easiest one
62     * to understand. It is however also the slowest. The relaxed order is
63     * the fastest, but it can't be used if the shared variable is used to
64     * synchronize threads for any other shared data. The third order is
65     * acquire/release, where an acquire-load is synchronizing with a
66     * release-store to the same variable.
67     *
68     * See for example http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync for
69     * more information about the memory order parameter.
70     *
71     * The supported operations of the implementation in this file are:
72     *
73 persson 1887 * - fences (acquire, release and seq_cst)
74 persson 1790 *
75     * - load and store of atomic<int> with relaxed, acquire/release or
76     * seq_cst memory ordering
77     *
78     * The supported architectures are x86 and powerpc.
79     */
80 persson 2343
81    
82     // if C++11 and gcc 4.7 or later is used, then use the standard
83     // implementation
84     #if __cplusplus >= 201103L && \
85     (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
86    
87     #include <atomic>
88    
89 persson 1790 namespace LinuxSampler {
90 persson 2343 using std::memory_order_relaxed;
91     using std::memory_order_acquire;
92     using std::memory_order_release;
93     using std::memory_order_seq_cst;
94     using std::atomic_thread_fence;
95     using std::atomic;
96     }
97    
98     #else
99    
100    
101     namespace LinuxSampler {
102 persson 1790 enum memory_order {
103     memory_order_relaxed, memory_order_acquire,
104     memory_order_release, memory_order_seq_cst
105     };
106    
107     inline void atomic_thread_fence(memory_order order) {
108     switch (order) {
109     case memory_order_relaxed:
110     break;
111    
112     case memory_order_acquire:
113     case memory_order_release:
114     #ifdef _ARCH_PPC64
115     asm volatile("lwsync" : : : "memory");
116 persson 1792 #elif defined(_ARCH_PPC)
117 persson 1790 asm volatile("sync" : : : "memory");
118     #else
119     asm volatile("" : : : "memory");
120     #endif
121     break;
122    
123     case memory_order_seq_cst:
124 persson 1792 #ifdef _ARCH_PPC
125 persson 1790 asm volatile("sync" : : : "memory");
126 persson 1794 #elif defined(__i386__)
127 persson 1790 asm volatile("lock; addl $0,0(%%esp)" : : : "memory");
128 persson 1794 #elif defined(__x86_64__)
129     asm volatile("mfence" : : : "memory");
130 persson 1790 #else
131     asm volatile("" : : : "memory");
132     #endif
133     break;
134     }
135     }
136    
137 persson 1870 template<typename T> class atomic;
138     template<> class atomic<int> { // int is the only implemented type
139 persson 1790 public:
140     atomic() { }
141 persson 1870 explicit atomic(int m) : f(m) { }
142     int load(memory_order order = memory_order_seq_cst) const volatile {
143     int m;
144 persson 1790 switch (order) {
145     case memory_order_relaxed:
146     m = f;
147     break;
148    
149     case memory_order_seq_cst:
150     case memory_order_release: // (invalid)
151     atomic_thread_fence(memory_order_seq_cst);
152     // fall-through
153    
154     case memory_order_acquire:
155 persson 1792 #ifdef _ARCH_PPC
156 persson 1790 // PPC load-acquire: artificial dependency + isync
157     asm volatile(
158     "lwz%U1%X1 %0,%1\n\t"
159     "cmpw %0,%0\n\t"
160     "bne- 1f\n\t"
161     "1: isync"
162     : "=r" (m)
163     : "m" (f)
164 persson 1870 : "memory", "cr0");
165 persson 1790 #else
166     m = f;
167     asm volatile("" : : : "memory");
168     #endif
169     break;
170     }
171     return m;
172     }
173    
174 persson 1870 void store(int m, memory_order order = memory_order_seq_cst) volatile {
175 persson 1790 switch (order) {
176     case memory_order_relaxed:
177     f = m;
178     break;
179    
180     case memory_order_release:
181     atomic_thread_fence(memory_order_release);
182     f = m;
183     break;
184    
185     case memory_order_seq_cst:
186     case memory_order_acquire: // (invalid)
187     atomic_thread_fence(memory_order_release);
188     f = m;
189     atomic_thread_fence(memory_order_seq_cst);
190     break;
191     }
192     }
193     private:
194 persson 1870 int f;
195     atomic(const atomic&); // not allowed
196     atomic& operator=(const atomic&); // not allowed
197 persson 1790 };
198     }
199     #endif
200 persson 2343 #endif

  ViewVC Help
Powered by ViewVC