src/common/lsatomic.h

/***************************************************************************
 *                                                                         *
 *   Copyright (C) 2008-2013 Andreas Persson                               *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the Free Software           *
 *   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,                *
 *   MA  02110-1301  USA                                                   *
 ***************************************************************************/

#ifndef LSATOMIC_H
#define LSATOMIC_H

/** @file
 *
 * Implementation of a small subset of the C++11 atomic operations.
 *
 * Note: When working with multithreading on modern CPUs, it's
 * important not only to make sure that concurrent access to shared
 * variables is made atomically, but also to be aware of the order the
 * stores get visible to the loads in other threads. For example, if x
 * and y are shared variables with initial values of 0, the following
 * program:
 *
 * @code
 *  // thread 1:
 *  x.store(1, memory_order_relaxed);
 *  r1 = y.load(memory_order_relaxed);
 *
 *  // thread 2:
 *  y.store(1, memory_order_relaxed);
 *  r2 = x.load(memory_order_relaxed);
 * @endcode
 *
 * would have a possible outcome of r1 == 0 and r2 == 0. The threads
 * might for example run on separate CPU cores with separate caches,
 * and the propagation of the store to the other core might be delayed
 * and done after the loads. In that case, both loads will read the
 * original value of 0 from the core's own cache.
 *
 * The C++11 style operations use the memory_order parameter to let
 * the programmer control the way shared memory stores get visible to
 * loads in other threads. In the example above, relaxed order was
 * used, which allows the CPU and compiler to reorder the memory
 * accesses very freely. If memory_order_seq_cst had been used
 * instead, the r1 == 0 and r2 == 0 outcome would have been
 * impossible, as sequential consistency means that the execution of
 * the program can be modeled by simply interleaving the instructions
 * of the threads.
 *
 * The default order is memory_order_seq_cst, as it is the easiest one
 * to understand. It is however also the slowest. The relaxed order is
 * the fastest, but it can't be used if the shared variable is used to
 * synchronize threads for any other shared data. The third order is
 * acquire/release, where an acquire-load is synchronizing with a
 * release-store to the same variable.
 *
 * See for example http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync for
 * more information about the memory order parameter.
 *
 * The supported operations of the implementation in this file are:
 *
 * - fences (acquire, release and seq_cst)
 *
 * - load and store of atomic<int> with relaxed, acquire/release or
 *   seq_cst memory ordering
 *
 * The supported architectures are x86, powerpc and ARMv7.
 */


// if C++11 and gcc 4.7 or later is used, then use the standard
// implementation
#if __cplusplus >= 201103L && \
    (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))

#include <atomic>

namespace LinuxSampler {
    using std::memory_order_relaxed;
    using std::memory_order_acquire;
    using std::memory_order_release;
    using std::memory_order_seq_cst;
    using std::atomic_thread_fence;
    using std::atomic;
}

#else


namespace LinuxSampler {
    enum memory_order {
        memory_order_relaxed, memory_order_acquire,
        memory_order_release, memory_order_seq_cst
    };

    inline void atomic_thread_fence(memory_order order) {
        switch (order) {
        case memory_order_relaxed:
            break;

        case memory_order_acquire:
        case memory_order_release:
#ifdef _ARCH_PPC64
            asm volatile("lwsync" : : : "memory");
#elif defined(_ARCH_PPC)
            asm volatile("sync" : : : "memory");
#elif defined(__ARM_ARCH_7A__)
            asm volatile("dmb" : : : "memory");
#else
            asm volatile("" : : : "memory");
#endif
            break;

        case memory_order_seq_cst:
#ifdef _ARCH_PPC
            asm volatile("sync" : : : "memory");
#elif defined(__i386__)
            asm volatile("lock; addl $0,0(%%esp)" : : : "memory");
#elif defined(__x86_64__)
            asm volatile("mfence" : : : "memory");
#elif defined(__ARM_ARCH_7A__)
            asm volatile("dmb" : : : "memory");
#else
            asm volatile("" : : : "memory");
#endif
            break;
        }
    }

    template<typename T> class atomic;
    template<> class atomic<int> { // int is the only implemented type
    public:
        atomic() { }
        explicit atomic(int m) : f(m) { }
        int load(memory_order order = memory_order_seq_cst) const volatile {
            int m;
            switch (order) {
            case memory_order_relaxed:
                m = f;
                break;

            case memory_order_seq_cst:
            case memory_order_release: // (invalid)
#ifdef _ARCH_PPC
                atomic_thread_fence(memory_order_seq_cst);
#endif
                // fall-through

            case memory_order_acquire:
#ifdef _ARCH_PPC
                // PPC load-acquire: artificial dependency + isync
                asm volatile(
                    "lwz%U1%X1 %0,%1\n\t"
                    "cmpw %0,%0\n\t"
                    "bne- 1f\n\t"
                    "1: isync"
                    : "=r" (m)
                    : "m"  (f)
                    : "memory", "cr0");
#else
                m = f;
                atomic_thread_fence(memory_order_acquire);
#endif
                break;
            }
            return m;
        }

        void store(int m, memory_order order = memory_order_seq_cst) volatile {
            switch (order) {
            case memory_order_relaxed:
                f = m;
                break;

            case memory_order_release:
                atomic_thread_fence(memory_order_release);
                f = m;
                break;

            case memory_order_seq_cst:
            case memory_order_acquire: // (invalid)
#ifdef _ARCH_PPC
                atomic_thread_fence(memory_order_seq_cst);
                f = m;
#else
                atomic_thread_fence(memory_order_release);
                f = m;
                atomic_thread_fence(memory_order_seq_cst);
#endif
                break;
            }
        }
    private:
        int f;
        atomic(const atomic&); // not allowed
        atomic& operator=(const atomic&); // not allowed
    };
}
#endif
#endif
1	/***************************************************************************
2	* *
3	* Copyright (C) 2008-2013 Andreas Persson *
4	* *
5	* This program is free software; you can redistribute it and/or modify *
6	* it under the terms of the GNU General Public License as published by *
7	* the Free Software Foundation; either version 2 of the License, or *
8	* (at your option) any later version. *
9	* *
10	* This program is distributed in the hope that it will be useful, *
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13	* GNU General Public License for more details. *
14	* *
15	* You should have received a copy of the GNU General Public License *
16	* along with this program; if not, write to the Free Software *
17	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, *
18	* MA 02110-1301 USA *
19	***************************************************************************/
20
21	#ifndef LSATOMIC_H
22	#define LSATOMIC_H
23
24	/** @file
25	*
26	* Implementation of a small subset of the C++11 atomic operations.
27	*
28	* Note: When working with multithreading on modern CPUs, it's
29	* important not only to make sure that concurrent access to shared
30	* variables is made atomically, but also to be aware of the order the
31	* stores get visible to the loads in other threads. For example, if x
32	* and y are shared variables with initial values of 0, the following
33	* program:
34	*
35	* @code
36	* // thread 1:
37	* x.store(1, memory_order_relaxed);
38	* r1 = y.load(memory_order_relaxed);
39	*
40	* // thread 2:
41	* y.store(1, memory_order_relaxed);
42	* r2 = x.load(memory_order_relaxed);
43	* @endcode
44	*
45	* would have a possible outcome of r1 == 0 and r2 == 0. The threads
46	* might for example run on separate CPU cores with separate caches,
47	* and the propagation of the store to the other core might be delayed
48	* and done after the loads. In that case, both loads will read the
49	* original value of 0 from the core's own cache.
50	*
51	* The C++11 style operations use the memory_order parameter to let
52	* the programmer control the way shared memory stores get visible to
53	* loads in other threads. In the example above, relaxed order was
54	* used, which allows the CPU and compiler to reorder the memory
55	* accesses very freely. If memory_order_seq_cst had been used
56	* instead, the r1 == 0 and r2 == 0 outcome would have been
57	* impossible, as sequential consistency means that the execution of
58	* the program can be modeled by simply interleaving the instructions
59	* of the threads.
60	*
61	* The default order is memory_order_seq_cst, as it is the easiest one
62	* to understand. It is however also the slowest. The relaxed order is
63	* the fastest, but it can't be used if the shared variable is used to
64	* synchronize threads for any other shared data. The third order is
65	* acquire/release, where an acquire-load is synchronizing with a
66	* release-store to the same variable.
67	*
68	* See for example http://gcc.gnu.org/wiki/Atomic/GCCMM/AtomicSync for
69	* more information about the memory order parameter.
70	*
71	* The supported operations of the implementation in this file are:
72	*
73	* - fences (acquire, release and seq_cst)
74	*
75	* - load and store of atomic<int> with relaxed, acquire/release or
76	* seq_cst memory ordering
77	*
78	* The supported architectures are x86, powerpc and ARMv7.
79	*/
80
81
82	// if C++11 and gcc 4.7 or later is used, then use the standard
83	// implementation
84	#if __cplusplus >= 201103L && \
85	(__GNUC__ > 4 \|\| (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
86
87	#include <atomic>
88
89	namespace LinuxSampler {
90	using std::memory_order_relaxed;
91	using std::memory_order_acquire;
92	using std::memory_order_release;
93	using std::memory_order_seq_cst;
94	using std::atomic_thread_fence;
95	using std::atomic;
96	}
97
98	#else
99
100
101	namespace LinuxSampler {
102	enum memory_order {
103	memory_order_relaxed, memory_order_acquire,
104	memory_order_release, memory_order_seq_cst
105	};
106
107	inline void atomic_thread_fence(memory_order order) {
108	switch (order) {
109	case memory_order_relaxed:
110	break;
111
112	case memory_order_acquire:
113	case memory_order_release:
114	#ifdef _ARCH_PPC64
115	asm volatile("lwsync" : : : "memory");
116	#elif defined(_ARCH_PPC)
117	asm volatile("sync" : : : "memory");
118	#elif defined(__ARM_ARCH_7A__)
119	asm volatile("dmb" : : : "memory");
120	#else
121	asm volatile("" : : : "memory");
122	#endif
123	break;
124
125	case memory_order_seq_cst:
126	#ifdef _ARCH_PPC
127	asm volatile("sync" : : : "memory");
128	#elif defined(__i386__)
129	asm volatile("lock; addl $0,0(%%esp)" : : : "memory");
130	#elif defined(__x86_64__)
131	asm volatile("mfence" : : : "memory");
132	#elif defined(__ARM_ARCH_7A__)
133	asm volatile("dmb" : : : "memory");
134	#else
135	asm volatile("" : : : "memory");
136	#endif
137	break;
138	}
139	}
140
141	template<typename T> class atomic;
142	template<> class atomic<int> { // int is the only implemented type
143	public:
144	atomic() { }
145	explicit atomic(int m) : f(m) { }
146	int load(memory_order order = memory_order_seq_cst) const volatile {
147	int m;
148	switch (order) {
149	case memory_order_relaxed:
150	m = f;
151	break;
152
153	case memory_order_seq_cst:
154	case memory_order_release: // (invalid)
155	#ifdef _ARCH_PPC
156	atomic_thread_fence(memory_order_seq_cst);
157	#endif
158	// fall-through
159
160	case memory_order_acquire:
161	#ifdef _ARCH_PPC
162	// PPC load-acquire: artificial dependency + isync
163	asm volatile(
164	"lwz%U1%X1 %0,%1\n\t"
165	"cmpw %0,%0\n\t"
166	"bne- 1f\n\t"
167	"1: isync"
168	: "=r" (m)
169	: "m" (f)
170	: "memory", "cr0");
171	#else
172	m = f;
173	atomic_thread_fence(memory_order_acquire);
174	#endif
175	break;
176	}
177	return m;
178	}
179
180	void store(int m, memory_order order = memory_order_seq_cst) volatile {
181	switch (order) {
182	case memory_order_relaxed:
183	f = m;
184	break;
185
186	case memory_order_release:
187	atomic_thread_fence(memory_order_release);
188	f = m;
189	break;
190
191	case memory_order_seq_cst:
192	case memory_order_acquire: // (invalid)
193	#ifdef _ARCH_PPC
194	atomic_thread_fence(memory_order_seq_cst);
195	f = m;
196	#else
197	atomic_thread_fence(memory_order_release);
198	f = m;
199	atomic_thread_fence(memory_order_seq_cst);
200	#endif
201	break;
202	}
203	}
204	private:
205	int f;
206	atomic(const atomic&); // not allowed
207	atomic& operator=(const atomic&); // not allowed
208	};
209	}
210	#endif
211	#endif