src/common/RTMath.h

/***************************************************************************
 *                                                                         *
 *   LinuxSampler - modular, streaming capable sampler                     *
 *                                                                         *
 *   Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck   *
 *   Copyright (C) 2005 - 2016 Christian Schoenebeck                       *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the Free Software           *
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston,                 *
 *   MA  02111-1307  USA                                                   *
 ***************************************************************************/

#ifndef __RT_MATH_H__
#define __RT_MATH_H__

#include <math.h>
#include <stdint.h>
#include "global_private.h"

/// Needed for calculating frequency ratio used to pitch a sample
#define TWELVEHUNDREDTH_ROOT_OF_TWO     1.000577789506555

enum implementation_t {
    CPP,
    ASM_X86_MMX_SSE
};

/** @brief Real Time Math Base Class
 *
 * Math functions for real time operation. This base class contains all
 * non-template methods.
 */
class RTMathBase {
    public:
        /**
         * High resolution time stamp.
         */
        typedef uint32_t time_stamp_t;

        typedef uint64_t usecs_t;

        /**
         * We read the processor's cycle count register as a reference
         * for the real time. These are of course only abstract values
         * with arbitrary time entity, but that's not a problem as long
         * as we calculate relatively.
         *
         * @see unsafeMicroSeconds()
         */
        static time_stamp_t CreateTimeStamp();

        /**
         * Calculates the frequency ratio for a pitch value given in cents
         * (assuming equal tempered scale of course, divided into 12
         * semitones per octave and 100 cents per semitone).
         *
         * Note: CONFIG_MAX_PITCH (defined in config.h) has to be defined to an
         * appropriate value, otherwise the behavior of this function is
         * undefined, but most probably if CONFIG_MAX_PITCH is too small, the
         * application will crash due to segmentation fault here.
         *
         * @param cents - pitch value in cents (+1200 cents means +1 octave)
         * @returns  frequency ratio (e.g. +2.0 for +1 octave)
         */
        inline static double CentsToFreqRatio(double Cents) {
            int   index_int   = (int) (Cents);      // integer index
            float index_fract = Cents - index_int;  // fractional part of index
            return pCentsToFreqTable[index_int] + index_fract * (pCentsToFreqTable[index_int+1] - pCentsToFreqTable[index_int]);
        }

        /**
         * Slower version of CentsToFreqRatio, for big values.
         *
         * @param cents - pitch value in cents (+1200 cents means +1 octave)
         * @returns  frequency ratio (e.g. +2.0 for +1 octave)
         */
        static double CentsToFreqRatioUnlimited(double Cents) {
            int octaves = int(Cents / 1200);
            double x = CentsToFreqRatio(Cents - octaves * 1200);
            return  octaves < 0 ? x / (1 << -octaves) : x * (1 << octaves);
        }

        /**
         * Inverse function to CentsToFreqRatio(). This function is a bit
         * slow, so it should not be called too frequently.
         */
        static double FreqRatioToCents(double FreqRatio) {
            return log(FreqRatio) / log(TWELVEHUNDREDTH_ROOT_OF_TWO);
        }

        /**
         * Calculates the line ratio value representation (linear scale)
         * of the @a decibel value provided (exponential scale).
         *
         * The context of audio acoustic sound pressure levels is assumed, and
         * hence the field version of the dB unit is used here (which uses a
         * linear factor of 20). This function is a bit slow, so it should
         * not be called too frequently.
         *
         * @param decibel - sound pressure level in dB
         * @returns linear ratio of the supplied dB value
         */
        static float DecibelToLinRatio(float decibel) {
            return powf(10.f, decibel / 20.f);
        }

        /**
         * Calculates the relatively summed average of a set of values.
         *
         * @param current - the current avaerage value of all previously summed values
         * @param sample - new value to be applied as summed average to the existing values
         * @param n - amount of sample values applied so far
         * @returns new average value of all summed values (including the new @a sample)
         */
        template<typename T_int>
        inline static float RelativeSummedAvg(float current, float sample, T_int n) {
            return current + (sample - current) / float(n);
        }

        /**
         * Clock source to use for getting the current time.
         */
        enum clock_source_t {
            real_clock,    ///< Use this to measure time that passed in reality (no matter if process got suspended).
            process_clock, ///< Use this to measure only the CPU execution time of the current process (if the process got suspended, the clock is paused as well).
            thread_clock,  ///< Use this to measure only the CPU execution time of the current thread (if the process got suspended or another thread is executed, the clock is paused as well).
        };

        /**
         * Returns a time stamp of the current time in microseconds (in
         * probably real-time @b unsafe way). There is no guarantee about
         * what the returned amount of microseconds relates to (i.e.
         * microseconds since epoch, microseconds since system uptime, ...).
         * So you should only use it to calculate time differences between
         * values taken with this method.
         *
         * @b CAUTION: This method may not @b NOT be real-time safe! On some
         * systems it could be RT safe, but there is no guarantee whatsoever!
         * So this method should only be used for debugging, benchmarking and
         * other developing purposes !
         *
         * For creating time stamps in real-time context, use
         * CreateTimeStamp() instead.
         *
         * @param source - the actual clock to use for getting the current
         *                 time, note that the various clock sources may not
         *                 be implemented on all systems
         * @returns time stamp in microseconds
         *
         * @see CreateTimeStamp()
         */
        static usecs_t unsafeMicroSeconds(clock_source_t source);

    private:
        static float* pCentsToFreqTable;

        static float* InitCentsToFreqTable();
};

/** @brief Real Time Math
 *
 * This is a template which provides customized methods for the desired low
 * level implementation. The ASM_X86_MMX_SSE implementation of each method
 * for example doesn't use 387 FPU instruction. This is needed for MMX
 * algorithms which do not allow mixed MMX and 387 instructions.
 */
template<implementation_t IMPL = CPP>
class __RTMath : public RTMathBase {
    public:
        // conversion using truncate
        inline static int Int(const float a) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    int ret;
                    asm (
                        "cvttss2si %1, %0  # convert to int\n\t"
                        : "=r" (ret)
                        : "m" (a)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (int) a;
                }
            }
        }

        //for doubles and everything else except floats
        template<class T_a> inline static int Int(const T_a a) {
            return (int) a;
        }

        inline static float Float(const int a) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "cvtsi2ss %1, %%xmm0  # convert to float\n\t"
                        "movss    %%xmm0,%0   # output\n\t"
                        : "=m" (ret)
                        : "r" (a)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (float) a;
                }
            }
        }

#if 0
        //for everything except ints
        template<class T_a> inline static float Float(T_a a) {
            return (float) a;
        }
#endif

        inline static float Sum(const float& a, const float& b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "addss    %2, %%xmm0  # a + b\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (a + b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Sum(const T_a a, const T_b b) {
            return (a + b);
        }

        inline static float Sub(const float& a, const float& b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "subss    %2, %%xmm0  # a - b\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (a - b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Sub(const T_a a, const T_b b) {
            return (a - b);
        }

        inline static float Mul(const float a, const float b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "mulss    %2, %%xmm0  # a * b\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (a * b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Mul(const T_a a, const T_b b) {
            return (a * b);
        }

        inline static float Div(const float a, const float b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "divss    %2, %%xmm0  # a / b\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (a / b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Div(const T_a a, const T_b b) {
            return (a / b);
        }

        inline static float Min(const float a, const float b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "minss    %2, %%xmm0  # Minimum(a, b)\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return std::min(a, b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Min(const T_a a, const T_b b) {
            return (b < a) ? b : a;
        }

        inline static float Max(const float a, const float b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "maxss    %2, %%xmm0  # Maximum(a, b)\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return std::max(a, b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Max(const T_a a, const T_b b) {
            return (b > a) ? b : a;
        }

        inline static float Fmodf(const float &a, const float &b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "movss    %2, %%xmm1  # load b\n\t"
                        "movss    %%xmm0,%%xmm2\n\t"
                        "divss    %%xmm1, %%xmm2  # xmm2 = a / b\n\t"
                        "cvttss2si %%xmm2, %%ecx  #convert to int\n\t"
                        "cvtsi2ss %%ecx, %%xmm2  #convert back to float\n\t"
                        "mulss    %%xmm1, %%xmm2  # xmm2 = b * int(a/b)\n\t"
                        "subss    %%xmm2, %%xmm0  #sub a\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                        : "%ecx"
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return fmodf(a, b);
                }
            }
        }
};

/// convenience typedef for using the default implementation (which is CPP)
typedef __RTMath<> RTMath;

#endif // __RT_MATH_H__
1	/***************************************************************************
2	* *
3	* LinuxSampler - modular, streaming capable sampler *
4	* *
5	* Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6	* Copyright (C) 2005 - 2016 Christian Schoenebeck *
7	* *
8	* This program is free software; you can redistribute it and/or modify *
9	* it under the terms of the GNU General Public License as published by *
10	* the Free Software Foundation; either version 2 of the License, or *
11	* (at your option) any later version. *
12	* *
13	* This program is distributed in the hope that it will be useful, *
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of *
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16	* GNU General Public License for more details. *
17	* *
18	* You should have received a copy of the GNU General Public License *
19	* along with this program; if not, write to the Free Software *
20	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21	* MA 02111-1307 USA *
22	***************************************************************************/
23
24	#ifndef __RT_MATH_H__
25	#define __RT_MATH_H__
26
27	#include <math.h>
28	#include <stdint.h>
29	#include "global_private.h"
30
31	/// Needed for calculating frequency ratio used to pitch a sample
32	#define TWELVEHUNDREDTH_ROOT_OF_TWO 1.000577789506555
33
34	enum implementation_t {
35	CPP,
36	ASM_X86_MMX_SSE
37	};
38
39	/** @brief Real Time Math Base Class
40	*
41	* Math functions for real time operation. This base class contains all
42	* non-template methods.
43	*/
44	class RTMathBase {
45	public:
46	/**
47	* High resolution time stamp.
48	*/
49	typedef uint32_t time_stamp_t;
50
51	typedef uint64_t usecs_t;
52
53	/**
54	* We read the processor's cycle count register as a reference
55	* for the real time. These are of course only abstract values
56	* with arbitrary time entity, but that's not a problem as long
57	* as we calculate relatively.
58	*
59	* @see unsafeMicroSeconds()
60	*/
61	static time_stamp_t CreateTimeStamp();
62
63	/**
64	* Calculates the frequency ratio for a pitch value given in cents
65	* (assuming equal tempered scale of course, divided into 12
66	* semitones per octave and 100 cents per semitone).
67	*
68	* Note: CONFIG_MAX_PITCH (defined in config.h) has to be defined to an
69	* appropriate value, otherwise the behavior of this function is
70	* undefined, but most probably if CONFIG_MAX_PITCH is too small, the
71	* application will crash due to segmentation fault here.
72	*
73	* @param cents - pitch value in cents (+1200 cents means +1 octave)
74	* @returns frequency ratio (e.g. +2.0 for +1 octave)
75	*/
76	inline static double CentsToFreqRatio(double Cents) {
77	int index_int = (int) (Cents); // integer index
78	float index_fract = Cents - index_int; // fractional part of index
79	return pCentsToFreqTable[index_int] + index_fract * (pCentsToFreqTable[index_int+1] - pCentsToFreqTable[index_int]);
80	}
81
82	/**
83	* Slower version of CentsToFreqRatio, for big values.
84	*
85	* @param cents - pitch value in cents (+1200 cents means +1 octave)
86	* @returns frequency ratio (e.g. +2.0 for +1 octave)
87	*/
88	static double CentsToFreqRatioUnlimited(double Cents) {
89	int octaves = int(Cents / 1200);
90	double x = CentsToFreqRatio(Cents - octaves * 1200);
91	return octaves < 0 ? x / (1 << -octaves) : x * (1 << octaves);
92	}
93
94	/**
95	* Inverse function to CentsToFreqRatio(). This function is a bit
96	* slow, so it should not be called too frequently.
97	*/
98	static double FreqRatioToCents(double FreqRatio) {
99	return log(FreqRatio) / log(TWELVEHUNDREDTH_ROOT_OF_TWO);
100	}
101
102	/**
103	* Calculates the line ratio value representation (linear scale)
104	* of the @a decibel value provided (exponential scale).
105	*
106	* The context of audio acoustic sound pressure levels is assumed, and
107	* hence the field version of the dB unit is used here (which uses a
108	* linear factor of 20). This function is a bit slow, so it should
109	* not be called too frequently.
110	*
111	* @param decibel - sound pressure level in dB
112	* @returns linear ratio of the supplied dB value
113	*/
114	static float DecibelToLinRatio(float decibel) {
115	return powf(10.f, decibel / 20.f);
116	}
117
118	/**
119	* Calculates the relatively summed average of a set of values.
120	*
121	* @param current - the current avaerage value of all previously summed values
122	* @param sample - new value to be applied as summed average to the existing values
123	* @param n - amount of sample values applied so far
124	* @returns new average value of all summed values (including the new @a sample)
125	*/
126	template<typename T_int>
127	inline static float RelativeSummedAvg(float current, float sample, T_int n) {
128	return current + (sample - current) / float(n);
129	}
130
131	/**
132	* Clock source to use for getting the current time.
133	*/
134	enum clock_source_t {
135	real_clock, ///< Use this to measure time that passed in reality (no matter if process got suspended).
136	process_clock, ///< Use this to measure only the CPU execution time of the current process (if the process got suspended, the clock is paused as well).
137	thread_clock, ///< Use this to measure only the CPU execution time of the current thread (if the process got suspended or another thread is executed, the clock is paused as well).
138	};
139
140	/**
141	* Returns a time stamp of the current time in microseconds (in
142	* probably real-time @b unsafe way). There is no guarantee about
143	* what the returned amount of microseconds relates to (i.e.
144	* microseconds since epoch, microseconds since system uptime, ...).
145	* So you should only use it to calculate time differences between
146	* values taken with this method.
147	*
148	* @b CAUTION: This method may not @b NOT be real-time safe! On some
149	* systems it could be RT safe, but there is no guarantee whatsoever!
150	* So this method should only be used for debugging, benchmarking and
151	* other developing purposes !
152	*
153	* For creating time stamps in real-time context, use
154	* CreateTimeStamp() instead.
155	*
156	* @param source - the actual clock to use for getting the current
157	* time, note that the various clock sources may not
158	* be implemented on all systems
159	* @returns time stamp in microseconds
160	*
161	* @see CreateTimeStamp()
162	*/
163	static usecs_t unsafeMicroSeconds(clock_source_t source);
164
165	private:
166	static float* pCentsToFreqTable;
167
168	static float* InitCentsToFreqTable();
169	};
170
171	/** @brief Real Time Math
172	*
173	* This is a template which provides customized methods for the desired low
174	* level implementation. The ASM_X86_MMX_SSE implementation of each method
175	* for example doesn't use 387 FPU instruction. This is needed for MMX
176	* algorithms which do not allow mixed MMX and 387 instructions.
177	*/
178	template<implementation_t IMPL = CPP>
179	class __RTMath : public RTMathBase {
180	public:
181	// conversion using truncate
182	inline static int Int(const float a) {
183	switch (IMPL) {
184	#if CONFIG_ASM && ARCH_X86
185	case ASM_X86_MMX_SSE: {
186	int ret;
187	asm (
188	"cvttss2si %1, %0 # convert to int\n\t"
189	: "=r" (ret)
190	: "m" (a)
191	);
192	return ret;
193	}
194	#endif // CONFIG_ASM && ARCH_X86
195	default: {
196	return (int) a;
197	}
198	}
199	}
200
201	//for doubles and everything else except floats
202	template<class T_a> inline static int Int(const T_a a) {
203	return (int) a;
204	}
205
206	inline static float Float(const int a) {
207	switch (IMPL) {
208	#if CONFIG_ASM && ARCH_X86
209	case ASM_X86_MMX_SSE: {
210	float ret;
211	asm (
212	"cvtsi2ss %1, %%xmm0 # convert to float\n\t"
213	"movss %%xmm0,%0 # output\n\t"
214	: "=m" (ret)
215	: "r" (a)
216	);
217	return ret;
218	}
219	#endif // CONFIG_ASM && ARCH_X86
220	default: {
221	return (float) a;
222	}
223	}
224	}
225
226	#if 0
227	//for everything except ints
228	template<class T_a> inline static float Float(T_a a) {
229	return (float) a;
230	}
231	#endif
232
233	inline static float Sum(const float& a, const float& b) {
234	switch (IMPL) {
235	#if CONFIG_ASM && ARCH_X86
236	case ASM_X86_MMX_SSE: {
237	float ret;
238	asm (
239	"movss %1, %%xmm0 # load a\n\t"
240	"addss %2, %%xmm0 # a + b\n\t"
241	"movss %%xmm0, %0 # output\n\t"
242	: "=m" (ret)
243	: "m" (a), "m" (b)
244	);
245	return ret;
246	}
247	#endif // CONFIG_ASM && ARCH_X86
248	default: {
249	return (a + b);
250	}
251	}
252	}
253
254	template<class T_a, class T_b> inline static T_a Sum(const T_a a, const T_b b) {
255	return (a + b);
256	}
257
258	inline static float Sub(const float& a, const float& b) {
259	switch (IMPL) {
260	#if CONFIG_ASM && ARCH_X86
261	case ASM_X86_MMX_SSE: {
262	float ret;
263	asm (
264	"movss %1, %%xmm0 # load a\n\t"
265	"subss %2, %%xmm0 # a - b\n\t"
266	"movss %%xmm0, %0 # output\n\t"
267	: "=m" (ret)
268	: "m" (a), "m" (b)
269	);
270	return ret;
271	}
272	#endif // CONFIG_ASM && ARCH_X86
273	default: {
274	return (a - b);
275	}
276	}
277	}
278
279	template<class T_a, class T_b> inline static T_a Sub(const T_a a, const T_b b) {
280	return (a - b);
281	}
282
283	inline static float Mul(const float a, const float b) {
284	switch (IMPL) {
285	#if CONFIG_ASM && ARCH_X86
286	case ASM_X86_MMX_SSE: {
287	float ret;
288	asm (
289	"movss %1, %%xmm0 # load a\n\t"
290	"mulss %2, %%xmm0 # a * b\n\t"
291	"movss %%xmm0, %0 # output\n\t"
292	: "=m" (ret)
293	: "m" (a), "m" (b)
294	);
295	return ret;
296	}
297	#endif // CONFIG_ASM && ARCH_X86
298	default: {
299	return (a * b);
300	}
301	}
302	}
303
304	template<class T_a, class T_b> inline static T_a Mul(const T_a a, const T_b b) {
305	return (a * b);
306	}
307
308	inline static float Div(const float a, const float b) {
309	switch (IMPL) {
310	#if CONFIG_ASM && ARCH_X86
311	case ASM_X86_MMX_SSE: {
312	float ret;
313	asm (
314	"movss %1, %%xmm0 # load a\n\t"
315	"divss %2, %%xmm0 # a / b\n\t"
316	"movss %%xmm0, %0 # output\n\t"
317	: "=m" (ret)
318	: "m" (a), "m" (b)
319	);
320	return ret;
321	}
322	#endif // CONFIG_ASM && ARCH_X86
323	default: {
324	return (a / b);
325	}
326	}
327	}
328
329	template<class T_a, class T_b> inline static T_a Div(const T_a a, const T_b b) {
330	return (a / b);
331	}
332
333	inline static float Min(const float a, const float b) {
334	switch (IMPL) {
335	#if CONFIG_ASM && ARCH_X86
336	case ASM_X86_MMX_SSE: {
337	float ret;
338	asm (
339	"movss %1, %%xmm0 # load a\n\t"
340	"minss %2, %%xmm0 # Minimum(a, b)\n\t"
341	"movss %%xmm0, %0 # output\n\t"
342	: "=m" (ret)
343	: "m" (a), "m" (b)
344	);
345	return ret;
346	}
347	#endif // CONFIG_ASM && ARCH_X86
348	default: {
349	return std::min(a, b);
350	}
351	}
352	}
353
354	template<class T_a, class T_b> inline static T_a Min(const T_a a, const T_b b) {
355	return (b < a) ? b : a;
356	}
357
358	inline static float Max(const float a, const float b) {
359	switch (IMPL) {
360	#if CONFIG_ASM && ARCH_X86
361	case ASM_X86_MMX_SSE: {
362	float ret;
363	asm (
364	"movss %1, %%xmm0 # load a\n\t"
365	"maxss %2, %%xmm0 # Maximum(a, b)\n\t"
366	"movss %%xmm0, %0 # output\n\t"
367	: "=m" (ret)
368	: "m" (a), "m" (b)
369	);
370	return ret;
371	}
372	#endif // CONFIG_ASM && ARCH_X86
373	default: {
374	return std::max(a, b);
375	}
376	}
377	}
378
379	template<class T_a, class T_b> inline static T_a Max(const T_a a, const T_b b) {
380	return (b > a) ? b : a;
381	}
382
383	inline static float Fmodf(const float &a, const float &b) {
384	switch (IMPL) {
385	#if CONFIG_ASM && ARCH_X86
386	case ASM_X86_MMX_SSE: {
387	float ret;
388	asm (
389	"movss %1, %%xmm0 # load a\n\t"
390	"movss %2, %%xmm1 # load b\n\t"
391	"movss %%xmm0,%%xmm2\n\t"
392	"divss %%xmm1, %%xmm2 # xmm2 = a / b\n\t"
393	"cvttss2si %%xmm2, %%ecx #convert to int\n\t"
394	"cvtsi2ss %%ecx, %%xmm2 #convert back to float\n\t"
395	"mulss %%xmm1, %%xmm2 # xmm2 = b * int(a/b)\n\t"
396	"subss %%xmm2, %%xmm0 #sub a\n\t"
397	"movss %%xmm0, %0 # output\n\t"
398	: "=m" (ret)
399	: "m" (a), "m" (b)
400	: "%ecx"
401	);
402	return ret;
403	}
404	#endif // CONFIG_ASM && ARCH_X86
405	default: {
406	return fmodf(a, b);
407	}
408	}
409	}
410	};
411
412	/// convenience typedef for using the default implementation (which is CPP)
413	typedef __RTMath<> RTMath;
414
415	#endif // __RT_MATH_H__