src/common/RTMath.h

/***************************************************************************
 *                                                                         *
 *   LinuxSampler - modular, streaming capable sampler                     *
 *                                                                         *
 *   Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck   *
 *   Copyright (C) 2005 - 2017 Christian Schoenebeck                       *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the Free Software           *
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston,                 *
 *   MA  02111-1307  USA                                                   *
 ***************************************************************************/

#ifndef __RT_MATH_H__
#define __RT_MATH_H__

#include <math.h>
#include <stdint.h>
#include "global_private.h"

/// Needed for calculating frequency ratio used to pitch a sample
#define TWELVEHUNDREDTH_ROOT_OF_TWO     1.000577789506555

enum implementation_t {
    CPP,
    ASM_X86_MMX_SSE
};

/** @brief Real Time Math Base Class
 *
 * Math functions for real time operation. This base class contains all
 * non-template methods.
 */
class RTMathBase {
    public:
        /**
         * High resolution time stamp.
         */
        typedef uint32_t time_stamp_t;

        typedef uint64_t usecs_t;

        /**
         * We read the processor's cycle count register as a reference
         * for the real time. These are of course only abstract values
         * with arbitrary time entity, but that's not a problem as long
         * as we calculate relatively.
         *
         * @see unsafeMicroSeconds()
         */
        static time_stamp_t CreateTimeStamp();

        /**
         * Calculates the frequency ratio for a pitch value given in cents
         * (assuming equal tempered scale of course, divided into 12
         * semitones per octave and 100 cents per semitone).
         *
         * Note: CONFIG_MAX_PITCH (defined in config.h) has to be defined to an
         * appropriate value, otherwise the behavior of this function is
         * undefined, but most probably if CONFIG_MAX_PITCH is too small, the
         * application will crash due to segmentation fault here.
         *
         * @param cents - pitch value in cents (+1200 cents means +1 octave)
         * @returns  frequency ratio (e.g. +2.0 for +1 octave)
         */
        inline static double CentsToFreqRatio(double Cents) {
            int   index_int   = (int) (Cents);      // integer index
            float index_fract = Cents - index_int;  // fractional part of index
            return pCentsToFreqTable[index_int] + index_fract * (pCentsToFreqTable[index_int+1] - pCentsToFreqTable[index_int]);
        }

        /**
         * Slower version of CentsToFreqRatio, for big values.
         *
         * @param cents - pitch value in cents (+1200 cents means +1 octave)
         * @returns  frequency ratio (e.g. +2.0 for +1 octave)
         */
        static double CentsToFreqRatioUnlimited(double Cents) {
            int octaves = int(Cents / 1200);
            double x = CentsToFreqRatio(Cents - octaves * 1200);
            return  octaves < 0 ? x / (1 << -octaves) : x * (1 << octaves);
        }

        /**
         * Inverse function to CentsToFreqRatio(). This function is a bit
         * slow, so it should not be called too frequently.
         */
        static double FreqRatioToCents(double FreqRatio) {
            return log(FreqRatio) / log(TWELVEHUNDREDTH_ROOT_OF_TWO);
        }

        /**
         * Calculates the linear ratio value representation (linear scale)
         * of the @a decibel value provided (exponential scale).
         *
         * The context of audio acoustic sound pressure levels is assumed, and
         * hence the field version of the dB unit is used here (which uses a
         * linear factor of 20). This function is a bit slow, so it should
         * not be called too frequently.
         *
         * @param decibel - sound pressure level in dB
         * @returns linear ratio of the supplied dB value
         * @see LinRatioToDecibel() as inverse function
         */
        static float DecibelToLinRatio(float decibel) {
            return powf(10.f, decibel / 20.f);
        }

        /**
         * Calculates the decibel value (exponential scale) of the @a linear
         * ratio value representation (linear scale) provided.
         *
         * The context of audio acoustic sound pressure levels is assumed, and
         * hence the field version of the dB unit is used here (which uses a
         * linear factor of 20). This function is a bit slow, so it should
         * not be called too frequently.
         *
         * @param linear - sound pressure level as linear ratio value (linear scale)
         * @returns dB value representation
         * @see DecibelToLinRatio() as inverse function
         */
        static float LinRatioToDecibel(float linear) {
            return 20.f * log10f(linear);
        }

        /**
         * Calculates the relatively summed average of a set of values.
         *
         * @param current - the current avaerage value of all previously summed values
         * @param sample - new value to be applied as summed average to the existing values
         * @param n - amount of sample values applied so far
         * @returns new average value of all summed values (including the new @a sample)
         */
        template<typename T_int>
        inline static float RelativeSummedAvg(float current, float sample, T_int n) {
            return current + (sample - current) / float(n);
        }

        /**
         * Clock source to use for getting the current time.
         */
        enum clock_source_t {
            real_clock,    ///< Use this to measure time that passed in reality (no matter if process got suspended).
            process_clock, ///< Use this to measure only the CPU execution time of the current process (if the process got suspended, the clock is paused as well).
            thread_clock,  ///< Use this to measure only the CPU execution time of the current thread (if the process got suspended or another thread is executed, the clock is paused as well).
        };

        /**
         * Returns a time stamp of the current time in microseconds (in
         * probably real-time @b unsafe way). There is no guarantee about
         * what the returned amount of microseconds relates to (i.e.
         * microseconds since epoch, microseconds since system uptime, ...).
         * So you should only use it to calculate time differences between
         * values taken with this method.
         *
         * @b CAUTION: This method may not @b NOT be real-time safe! On some
         * systems it could be RT safe, but there is no guarantee whatsoever!
         * So this method should only be used for debugging, benchmarking and
         * other developing purposes !
         *
         * For creating time stamps in real-time context, use
         * CreateTimeStamp() instead.
         *
         * @param source - the actual clock to use for getting the current
         *                 time, note that the various clock sources may not
         *                 be implemented on all systems
         * @returns time stamp in microseconds
         *
         * @see CreateTimeStamp()
         */
        static usecs_t unsafeMicroSeconds(clock_source_t source);

    private:
        static float* pCentsToFreqTable;

        static float* InitCentsToFreqTable();
};

/** @brief Real Time Math
 *
 * This is a template which provides customized methods for the desired low
 * level implementation. The ASM_X86_MMX_SSE implementation of each method
 * for example doesn't use 387 FPU instruction. This is needed for MMX
 * algorithms which do not allow mixed MMX and 387 instructions.
 */
template<implementation_t IMPL = CPP>
class __RTMath : public RTMathBase {
    public:
        // conversion using truncate
        inline static int Int(const float a) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    int ret;
                    asm (
                        "cvttss2si %1, %0  # convert to int\n\t"
                        : "=r" (ret)
                        : "m" (a)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (int) a;
                }
            }
        }

        //for doubles and everything else except floats
        template<class T_a> inline static int Int(const T_a a) {
            return (int) a;
        }

        inline static float Float(const int a) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "cvtsi2ss %1, %%xmm0  # convert to float\n\t"
                        "movss    %%xmm0,%0   # output\n\t"
                        : "=m" (ret)
                        : "r" (a)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (float) a;
                }
            }
        }

#if 0
        //for everything except ints
        template<class T_a> inline static float Float(T_a a) {
            return (float) a;
        }
#endif

        inline static float Sum(const float& a, const float& b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "addss    %2, %%xmm0  # a + b\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (a + b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Sum(const T_a a, const T_b b) {
            return (a + b);
        }

        inline static float Sub(const float& a, const float& b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "subss    %2, %%xmm0  # a - b\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (a - b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Sub(const T_a a, const T_b b) {
            return (a - b);
        }

        inline static float Mul(const float a, const float b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "mulss    %2, %%xmm0  # a * b\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (a * b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Mul(const T_a a, const T_b b) {
            return (a * b);
        }

        inline static float Div(const float a, const float b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "divss    %2, %%xmm0  # a / b\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return (a / b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Div(const T_a a, const T_b b) {
            return (a / b);
        }

        inline static float Min(const float a, const float b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "minss    %2, %%xmm0  # Minimum(a, b)\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return std::min(a, b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Min(const T_a a, const T_b b) {
            return (b < a) ? b : a;
        }

        inline static float Max(const float a, const float b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "maxss    %2, %%xmm0  # Maximum(a, b)\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return std::max(a, b);
                }
            }
        }

        template<class T_a, class T_b> inline static T_a Max(const T_a a, const T_b b) {
            return (b > a) ? b : a;
        }

        inline static float Fmodf(const float &a, const float &b) {
            switch (IMPL) {
                #if CONFIG_ASM && ARCH_X86
                case ASM_X86_MMX_SSE: {
                    float ret;
                    asm (
                        "movss    %1, %%xmm0  # load a\n\t"
                        "movss    %2, %%xmm1  # load b\n\t"
                        "movss    %%xmm0,%%xmm2\n\t"
                        "divss    %%xmm1, %%xmm2  # xmm2 = a / b\n\t"
                        "cvttss2si %%xmm2, %%ecx  #convert to int\n\t"
                        "cvtsi2ss %%ecx, %%xmm2  #convert back to float\n\t"
                        "mulss    %%xmm1, %%xmm2  # xmm2 = b * int(a/b)\n\t"
                        "subss    %%xmm2, %%xmm0  #sub a\n\t"
                        "movss    %%xmm0, %0  # output\n\t"
                        : "=m" (ret)
                        : "m" (a), "m" (b)
                        : "%ecx"
                    );
                    return ret;
                }
                #endif // CONFIG_ASM && ARCH_X86
                default: {
                    return fmodf(a, b);
                }
            }
        }
};

/// convenience typedef for using the default implementation (which is CPP)
typedef __RTMath<> RTMath;

#endif // __RT_MATH_H__
1	schoenebeck	53	/***************************************************************************
2			* *
3			* LinuxSampler - modular, streaming capable sampler *
4			* *
5	schoenebeck	56	* Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6	schoenebeck	3193	* Copyright (C) 2005 - 2017 Christian Schoenebeck *
7	schoenebeck	53	* *
8			* This program is free software; you can redistribute it and/or modify *
9			* it under the terms of the GNU General Public License as published by *
10			* the Free Software Foundation; either version 2 of the License, or *
11			* (at your option) any later version. *
12			* *
13			* This program is distributed in the hope that it will be useful, *
14			* but WITHOUT ANY WARRANTY; without even the implied warranty of *
15			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16			* GNU General Public License for more details. *
17			* *
18			* You should have received a copy of the GNU General Public License *
19			* along with this program; if not, write to the Free Software *
20			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21			* MA 02111-1307 USA *
22			***************************************************************************/
23
24			#ifndef __RT_MATH_H__
25			#define __RT_MATH_H__
26
27			#include <math.h>
28	schoenebeck	328	#include <stdint.h>
29	schoenebeck	1424	#include "global_private.h"
30	schoenebeck	53
31			/// Needed for calculating frequency ratio used to pitch a sample
32			#define TWELVEHUNDREDTH_ROOT_OF_TWO 1.000577789506555
33
34	schoenebeck	319	enum implementation_t {
35	schoenebeck	361	CPP,
36			ASM_X86_MMX_SSE
37	schoenebeck	319	};
38
39	schoenebeck	563	/** @brief Real Time Math Base Class
40			*
41			* Math functions for real time operation. This base class contains all
42			* non-template methods.
43			*/
44	schoenebeck	319	class RTMathBase {
45	schoenebeck	53	public:
46			/**
47	schoenebeck	2942	* High resolution time stamp.
48	schoenebeck	328	*/
49			typedef uint32_t time_stamp_t;
50
51	schoenebeck	2942	typedef uint64_t usecs_t;
52
53	schoenebeck	328	/**
54			* We read the processor's cycle count register as a reference
55			* for the real time. These are of course only abstract values
56			* with arbitrary time entity, but that's not a problem as long
57			* as we calculate relatively.
58	schoenebeck	2942	*
59			* @see unsafeMicroSeconds()
60	schoenebeck	328	*/
61			static time_stamp_t CreateTimeStamp();
62
63			/**
64	schoenebeck	53	* Calculates the frequency ratio for a pitch value given in cents
65			* (assuming equal tempered scale of course, divided into 12
66			* semitones per octave and 100 cents per semitone).
67			*
68	schoenebeck	554	* Note: CONFIG_MAX_PITCH (defined in config.h) has to be defined to an
69	schoenebeck	53	* appropriate value, otherwise the behavior of this function is
70	schoenebeck	554	* undefined, but most probably if CONFIG_MAX_PITCH is too small, the
71	schoenebeck	53	* application will crash due to segmentation fault here.
72			*
73			* @param cents - pitch value in cents (+1200 cents means +1 octave)
74			* @returns frequency ratio (e.g. +2.0 for +1 octave)
75			*/
76			inline static double CentsToFreqRatio(double Cents) {
77	schoenebeck	319	int index_int = (int) (Cents); // integer index
78	schoenebeck	53	float index_fract = Cents - index_int; // fractional part of index
79			return pCentsToFreqTable[index_int] + index_fract * (pCentsToFreqTable[index_int+1] - pCentsToFreqTable[index_int]);
80			}
81
82	schoenebeck	829	/**
83	persson	1862	* Slower version of CentsToFreqRatio, for big values.
84			*
85			* @param cents - pitch value in cents (+1200 cents means +1 octave)
86			* @returns frequency ratio (e.g. +2.0 for +1 octave)
87			*/
88			static double CentsToFreqRatioUnlimited(double Cents) {
89			int octaves = int(Cents / 1200);
90			double x = CentsToFreqRatio(Cents - octaves * 1200);
91			return octaves < 0 ? x / (1 << -octaves) : x * (1 << octaves);
92			}
93
94			/**
95	schoenebeck	829	* Inverse function to CentsToFreqRatio(). This function is a bit
96			* slow, so it should not be called too frequently.
97			*/
98			static double FreqRatioToCents(double FreqRatio) {
99			return log(FreqRatio) / log(TWELVEHUNDREDTH_ROOT_OF_TWO);
100			}
101
102	schoenebeck	2931	/**
103	schoenebeck	3193	* Calculates the linear ratio value representation (linear scale)
104	schoenebeck	2931	* of the @a decibel value provided (exponential scale).
105			*
106			* The context of audio acoustic sound pressure levels is assumed, and
107			* hence the field version of the dB unit is used here (which uses a
108			* linear factor of 20). This function is a bit slow, so it should
109			* not be called too frequently.
110			*
111			* @param decibel - sound pressure level in dB
112			* @returns linear ratio of the supplied dB value
113	schoenebeck	3193	* @see LinRatioToDecibel() as inverse function
114	schoenebeck	2931	*/
115			static float DecibelToLinRatio(float decibel) {
116			return powf(10.f, decibel / 20.f);
117			}
118
119			/**
120	schoenebeck	3193	* Calculates the decibel value (exponential scale) of the @a linear
121			* ratio value representation (linear scale) provided.
122			*
123			* The context of audio acoustic sound pressure levels is assumed, and
124			* hence the field version of the dB unit is used here (which uses a
125			* linear factor of 20). This function is a bit slow, so it should
126			* not be called too frequently.
127			*
128			* @param linear - sound pressure level as linear ratio value (linear scale)
129			* @returns dB value representation
130			* @see DecibelToLinRatio() as inverse function
131			*/
132			static float LinRatioToDecibel(float linear) {
133			return 20.f * log10f(linear);
134			}
135
136			/**
137	schoenebeck	2931	* Calculates the relatively summed average of a set of values.
138			*
139			* @param current - the current avaerage value of all previously summed values
140			* @param sample - new value to be applied as summed average to the existing values
141			* @param n - amount of sample values applied so far
142			* @returns new average value of all summed values (including the new @a sample)
143			*/
144	schoenebeck	3054	template<typename T_int>
145			inline static float RelativeSummedAvg(float current, float sample, T_int n) {
146	schoenebeck	2931	return current + (sample - current) / float(n);
147			}
148
149	schoenebeck	2942	/**
150			* Clock source to use for getting the current time.
151			*/
152			enum clock_source_t {
153			real_clock, ///< Use this to measure time that passed in reality (no matter if process got suspended).
154			process_clock, ///< Use this to measure only the CPU execution time of the current process (if the process got suspended, the clock is paused as well).
155			thread_clock, ///< Use this to measure only the CPU execution time of the current thread (if the process got suspended or another thread is executed, the clock is paused as well).
156			};
157
158			/**
159			* Returns a time stamp of the current time in microseconds (in
160			* probably real-time @b unsafe way). There is no guarantee about
161			* what the returned amount of microseconds relates to (i.e.
162			* microseconds since epoch, microseconds since system uptime, ...).
163			* So you should only use it to calculate time differences between
164			* values taken with this method.
165			*
166			* @b CAUTION: This method may not @b NOT be real-time safe! On some
167			* systems it could be RT safe, but there is no guarantee whatsoever!
168			* So this method should only be used for debugging, benchmarking and
169			* other developing purposes !
170			*
171			* For creating time stamps in real-time context, use
172			* CreateTimeStamp() instead.
173			*
174			* @param source - the actual clock to use for getting the current
175			* time, note that the various clock sources may not
176			* be implemented on all systems
177			* @returns time stamp in microseconds
178			*
179			* @see CreateTimeStamp()
180			*/
181			static usecs_t unsafeMicroSeconds(clock_source_t source);
182
183	schoenebeck	319	private:
184			static float* pCentsToFreqTable;
185
186			static float* InitCentsToFreqTable();
187			};
188
189	schoenebeck	563	/** @brief Real Time Math
190	schoenebeck	319	*
191	schoenebeck	563	* This is a template which provides customized methods for the desired low
192			* level implementation. The ASM_X86_MMX_SSE implementation of each method
193			* for example doesn't use 387 FPU instruction. This is needed for MMX
194			* algorithms which do not allow mixed MMX and 387 instructions.
195	schoenebeck	319	*/
196			template<implementation_t IMPL = CPP>
197			class __RTMath : public RTMathBase {
198			public:
199			// conversion using truncate
200			inline static int Int(const float a) {
201			switch (IMPL) {
202	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
203	schoenebeck	319	case ASM_X86_MMX_SSE: {
204			int ret;
205			asm (
206			"cvttss2si %1, %0 # convert to int\n\t"
207			: "=r" (ret)
208			: "m" (a)
209			);
210			return ret;
211			}
212	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
213	persson	685	default: {
214			return (int) a;
215			}
216	schoenebeck	319	}
217			}
218
219			//for doubles and everything else except floats
220			template<class T_a> inline static int Int(const T_a a) {
221			return (int) a;
222			}
223
224			inline static float Float(const int a) {
225			switch (IMPL) {
226	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
227	schoenebeck	319	case ASM_X86_MMX_SSE: {
228			float ret;
229			asm (
230			"cvtsi2ss %1, %%xmm0 # convert to float\n\t"
231			"movss %%xmm0,%0 # output\n\t"
232			: "=m" (ret)
233			: "r" (a)
234			);
235			return ret;
236			}
237	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
238	persson	685	default: {
239			return (float) a;
240			}
241	schoenebeck	319	}
242			}
243
244			#if 0
245			//for everything except ints
246			template<class T_a> inline static float Float(T_a a) {
247			return (float) a;
248			}
249			#endif
250
251			inline static float Sum(const float& a, const float& b) {
252			switch (IMPL) {
253	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
254	schoenebeck	319	case ASM_X86_MMX_SSE: {
255			float ret;
256			asm (
257			"movss %1, %%xmm0 # load a\n\t"
258			"addss %2, %%xmm0 # a + b\n\t"
259			"movss %%xmm0, %0 # output\n\t"
260			: "=m" (ret)
261			: "m" (a), "m" (b)
262			);
263			return ret;
264			}
265	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
266	persson	685	default: {
267			return (a + b);
268			}
269	schoenebeck	319	}
270			}
271
272			template<class T_a, class T_b> inline static T_a Sum(const T_a a, const T_b b) {
273			return (a + b);
274			}
275
276			inline static float Sub(const float& a, const float& b) {
277			switch (IMPL) {
278	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
279	schoenebeck	319	case ASM_X86_MMX_SSE: {
280			float ret;
281			asm (
282			"movss %1, %%xmm0 # load a\n\t"
283			"subss %2, %%xmm0 # a - b\n\t"
284			"movss %%xmm0, %0 # output\n\t"
285			: "=m" (ret)
286			: "m" (a), "m" (b)
287			);
288			return ret;
289			}
290	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
291	persson	685	default: {
292			return (a - b);
293			}
294	schoenebeck	319	}
295			}
296
297			template<class T_a, class T_b> inline static T_a Sub(const T_a a, const T_b b) {
298			return (a - b);
299			}
300
301			inline static float Mul(const float a, const float b) {
302			switch (IMPL) {
303	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
304	schoenebeck	319	case ASM_X86_MMX_SSE: {
305			float ret;
306			asm (
307			"movss %1, %%xmm0 # load a\n\t"
308			"mulss %2, %%xmm0 # a * b\n\t"
309			"movss %%xmm0, %0 # output\n\t"
310			: "=m" (ret)
311			: "m" (a), "m" (b)
312			);
313			return ret;
314			}
315	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
316	persson	685	default: {
317			return (a * b);
318			}
319	schoenebeck	319	}
320			}
321
322			template<class T_a, class T_b> inline static T_a Mul(const T_a a, const T_b b) {
323			return (a * b);
324			}
325
326			inline static float Div(const float a, const float b) {
327			switch (IMPL) {
328	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
329	schoenebeck	319	case ASM_X86_MMX_SSE: {
330			float ret;
331			asm (
332			"movss %1, %%xmm0 # load a\n\t"
333			"divss %2, %%xmm0 # a / b\n\t"
334			"movss %%xmm0, %0 # output\n\t"
335			: "=m" (ret)
336			: "m" (a), "m" (b)
337			);
338			return ret;
339			}
340	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
341	persson	685	default: {
342			return (a / b);
343			}
344	schoenebeck	319	}
345			}
346
347			template<class T_a, class T_b> inline static T_a Div(const T_a a, const T_b b) {
348			return (a / b);
349			}
350
351			inline static float Min(const float a, const float b) {
352			switch (IMPL) {
353	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
354	schoenebeck	319	case ASM_X86_MMX_SSE: {
355			float ret;
356			asm (
357			"movss %1, %%xmm0 # load a\n\t"
358			"minss %2, %%xmm0 # Minimum(a, b)\n\t"
359			"movss %%xmm0, %0 # output\n\t"
360			: "=m" (ret)
361			: "m" (a), "m" (b)
362			);
363			return ret;
364			}
365	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
366	persson	685	default: {
367	wylder	818	return std::min(a, b);
368	persson	685	}
369	schoenebeck	319	}
370			}
371
372			template<class T_a, class T_b> inline static T_a Min(const T_a a, const T_b b) {
373	schoenebeck	53	return (b < a) ? b : a;
374			}
375
376	schoenebeck	319	inline static float Max(const float a, const float b) {
377			switch (IMPL) {
378	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
379	schoenebeck	319	case ASM_X86_MMX_SSE: {
380			float ret;
381			asm (
382			"movss %1, %%xmm0 # load a\n\t"
383			"maxss %2, %%xmm0 # Maximum(a, b)\n\t"
384			"movss %%xmm0, %0 # output\n\t"
385			: "=m" (ret)
386			: "m" (a), "m" (b)
387			);
388			return ret;
389			}
390	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
391	persson	685	default: {
392	wylder	818	return std::max(a, b);
393	persson	685	}
394	schoenebeck	319	}
395			}
396
397			template<class T_a, class T_b> inline static T_a Max(const T_a a, const T_b b) {
398	schoenebeck	53	return (b > a) ? b : a;
399			}
400
401	schoenebeck	319	inline static float Fmodf(const float &a, const float &b) {
402			switch (IMPL) {
403	schoenebeck	617	#if CONFIG_ASM && ARCH_X86
404	schoenebeck	319	case ASM_X86_MMX_SSE: {
405			float ret;
406			asm (
407			"movss %1, %%xmm0 # load a\n\t"
408			"movss %2, %%xmm1 # load b\n\t"
409			"movss %%xmm0,%%xmm2\n\t"
410			"divss %%xmm1, %%xmm2 # xmm2 = a / b\n\t"
411			"cvttss2si %%xmm2, %%ecx #convert to int\n\t"
412			"cvtsi2ss %%ecx, %%xmm2 #convert back to float\n\t"
413			"mulss %%xmm1, %%xmm2 # xmm2 = b * int(a/b)\n\t"
414			"subss %%xmm2, %%xmm0 #sub a\n\t"
415			"movss %%xmm0, %0 # output\n\t"
416			: "=m" (ret)
417			: "m" (a), "m" (b)
418			: "%ecx"
419			);
420			return ret;
421			}
422	schoenebeck	617	#endif // CONFIG_ASM && ARCH_X86
423	persson	685	default: {
424			return fmodf(a, b);
425			}
426	schoenebeck	319	}
427			}
428	schoenebeck	53	};
429
430	schoenebeck	319	/// convenience typedef for using the default implementation (which is CPP)
431			typedef __RTMath<> RTMath;
432
433	schoenebeck	53	#endif // __RT_MATH_H__