engines/gig/Synthesizer.h

/***************************************************************************
 *                                                                         *
 *   LinuxSampler - modular, streaming capable sampler                     *
 *                                                                         *
 *   Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck   *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the Free Software           *
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston,                 *
 *   MA  02111-1307  USA                                                   *
 ***************************************************************************/

#ifndef __LS_GIG_SYNTHESIZER_H__
#define __LS_GIG_SYNTHESIZER_H__

#include "../../common/RTMath.h"
#include "../common/Resampler.h"
#include "../common/BiquadFilter.h"
#include "Filter.h"
#include "Voice.h"

#define SYNTHESIS_MODE_SET_CONSTPITCH(iMode,bVal)       if (bVal) iMode |= 0x01; else iMode &= 0xfe   /* (un)set mode bit 0 */
#define SYNTHESIS_MODE_SET_LOOP(iMode,bVal)             if (bVal) iMode |= 0x02; else iMode &= 0xfd   /* (un)set mode bit 1 */
#define SYNTHESIS_MODE_SET_INTERPOLATE(iMode,bVal)      if (bVal) iMode |= 0x04; else iMode &= 0xfb   /* (un)set mode bit 2 */
#define SYNTHESIS_MODE_SET_FILTER(iMode,bVal)           if (bVal) iMode |= 0x08; else iMode &= 0xf7   /* (un)set mode bit 3 */
#define SYNTHESIS_MODE_SET_CHANNELS(iMode,bVal)         if (bVal) iMode |= 0x10; else iMode &= 0xef   /* (un)set mode bit 4 */
#define SYNTHESIS_MODE_SET_IMPLEMENTATION(iMode,bVal)   if (bVal) iMode |= 0x20; else iMode &= 0xdf   /* (un)set mode bit 5 */

#define SYNTHESIS_MODE_GET_CONSTPITCH(iMode)            iMode & 0x01
#define SYNTHESIS_MODE_GET_LOOP(iMode)                  iMode & 0x02
#define SYNTHESIS_MODE_GET_INTERPOLATE(iMode)           iMode & 0x04
#define SYNTHESIS_MODE_GET_FILTER(iMode)                iMode & 0x08
#define SYNTHESIS_MODE_GET_CHANNELS(iMode)              iMode & 0x10
#define SYNTHESIS_MODE_GET_IMPLEMENTATION(iMode)        iMode & 0x20

// that's usually gig::Voice of course, but we make it a macro so we can
// include this code for our synthesis benchmark which uses fake data
// structures
#ifndef VOICE
# define VOICE Voice
#endif // VOICE

namespace LinuxSampler { namespace gig {

    typedef void SynthesizeFragment_Fn(VOICE&, uint, sample_t*, int);

    void* GetSynthesisFunction(const int SynthesisMode);

    enum channels_t {
        MONO,
        STEREO
    };

    template<implementation_t IMPLEMENTATION, channels_t CHANNELS, bool USEFILTER, bool INTERPOLATE, bool DOLOOP, bool CONSTPITCH>
    class Synthesizer : public __RTMath<IMPLEMENTATION>, public LinuxSampler::Resampler<INTERPOLATE> {
        public:
            template<typename VOICE_T>
            inline static void SynthesizeFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, int& i) {
                if (IMPLEMENTATION == ASM_X86_MMX_SSE) {
                    float fPos = (float) Voice.Pos;
                    SynthesizeFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
                                       Voice.pSample->LoopStart,
                                       Voice.pSample->LoopEnd,
                                       Voice.pSample->LoopSize,
                                       Voice.LoopCyclesLeft,
                                       (void *)&fPos,
                                       Voice.PitchBase,
                                       Voice.PitchBend);
                    if (INTERPOLATE) EMMS;
                    Voice.Pos = (double) fPos;
                } else {
                    SynthesizeFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
                                       Voice.pSample->LoopStart,
                                       Voice.pSample->LoopEnd,
                                       Voice.pSample->LoopSize,
                                       Voice.LoopCyclesLeft,
                                       (void *)&Voice.Pos,
                                       Voice.PitchBase,
                                       Voice.PitchBend);
                }
            }

        //protected:

            template<typename VOICE_T>
            inline static void SynthesizeFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, int& i, uint& LoopPlayCount, uint LoopStart, uint LoopEnd, uint LoopSize, uint& LoopCyclesLeft, void* Pos, float& PitchBase, float& PitchBend) {
                const float loopEnd = Float(LoopEnd);
                const float PBbyPB = Mul(PitchBase, PitchBend);
                const float f_LoopStart = Float(LoopStart);
                const float f_LoopSize = Float(LoopSize);
                if (DOLOOP) {
                    if (LoopPlayCount) {
                        // render loop (loop count limited)
                        while (i < Samples && LoopCyclesLeft) {
                            if (CONSTPITCH) {
                                const int processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd,Pos, PBbyPB) + 1); //TODO: instead of +1 we could also round up
                                while (i < processEnd) Synthesize(Voice, Pos, pSrc, i);
                            }
                            else Synthesize(Voice, Pos, pSrc, i);
                            if (WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos)) LoopCyclesLeft--;
                        }
                        // render on without loop
                        while (i < Samples) Synthesize(Voice, Pos, pSrc, i);
                    }
                    else { // render loop (endless loop)
                        while (i < Samples) {
                            if (CONSTPITCH) {
                                const int processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd, Pos, PBbyPB) + 1); //TODO: instead of +1 we could also round up
                                while (i < processEnd) Synthesize(Voice, Pos, pSrc, i);
                            }
                            else Synthesize(Voice, Pos, pSrc, i);
                            WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos);
                        }
                    }
                }
                else { // no looping
                    while (i < Samples) { Synthesize(Voice, Pos, pSrc, i);}
                }
            }

            template<typename VOICE_T>
            inline static void Synthesize(VOICE_T& Voice, void* Pos, sample_t* pSrc, int& i) {
                Synthesize(pSrc, Pos,
                           Voice.pEngine->pSynthesisParameters[Event::destination_vco][i],
                           Voice.pEngine->pOutputLeft,
                           Voice.pEngine->pOutputRight,
                           i,
                           Voice.pEngine->pSynthesisParameters[Event::destination_vca],
                           &Voice.PanLeft,
                           &Voice.PanRight,
                           Voice.FilterLeft,
                           Voice.FilterRight,
                           Voice.pEngine->pBasicFilterParameters[i],
                           Voice.pEngine->pMainFilterParameters[i]);
            }

            inline static int DiffToLoopEnd(const float& LoopEnd, const void* Pos, const float& Pitch) {
                switch (IMPLEMENTATION) {
                    // pure C++ implementation (thus platform independent)
                    case CPP: {
                        return int((LoopEnd - *((double *)Pos)) / Pitch);
                    }
                    case ASM_X86_MMX_SSE: {
                        int result;
                        __asm__ __volatile__ (
                            "movss    (%1), %%xmm0  #read loopend\n\t"
                            "subss    (%2), %%xmm0  #sub  pos\n\t"
                            "divss    (%3), %%xmm0  #div  by pitch\n\t"
                            "cvtss2si %%xmm0, %0    #convert to int\n\t"
                            : "=r" (result)   /* %0 */
                            : "r" (&LoopEnd), /* %1 */
                              "r" (Pos),      /* %2 */
                              "r" (&Pitch)    /* %3 */
                        );
                        return result;
                    }
                }
            }

            inline static int WrapLoop(const float& LoopStart, const float& LoopSize, const float& LoopEnd, void* vPos) {
                switch (IMPLEMENTATION) {
                    // pure C++ implementation (thus platform independent)
                    case CPP: {
                        double * Pos = (double *)vPos;
                        if (*Pos < LoopEnd) return 0;
                        *Pos = fmod(*Pos - LoopEnd, LoopSize) + LoopStart;
                        return 1;
                    }
                    case ASM_X86_MMX_SSE: {
                        int result;
                        __asm__ __volatile__ (
                            "movss  (%2), %%xmm0          # load LoopEnd\n\t"
                            "movss  (%1), %%xmm1          # load Pos\n\t"
                            "comiss %%xmm0, %%xmm1      # LoopEnd <> Pos\n\t"
                            "movl    $0,%%eax            # result = 0\n\t"
                            "jb     1f                  # jump if no work needs to be done\n\t"
                            "movss    (%3), %%xmm2        # load LoopSize\n\t"
                            "subss    %%xmm0, %%xmm1    # Pos - LoopEnd\n\t"
                            //now the fmodf
                            "movss    %%xmm1, %%xmm3    # xmm3 = (Pos - LoopEnd)\n\t"
                            "divss    %%xmm2, %%xmm1    # (Pos - LoopEnd) / LoopSize\n\t"
                            "cvttss2si %%xmm1, %%eax    # convert to int\n\t"
                            "cvtsi2ss  %%eax, %%xmm1    # convert back to float\n\t"
                            "movss    (%4), %%xmm0      # load LoopStart\n\t"
                            "mulss    %%xmm2, %%xmm1    # LoopSize * int((Pos-LoopEnd)/LoopSize)\n\t"
                            "subss    %%xmm1, %%xmm3    # xmm2 = fmodf(Pos - LoopEnd, LoopSize)\n\t"
                            //done with fmodf
                            "addss    %%xmm0, %%xmm3      # add LoopStart\n\t"
                            "movss    %%xmm3, (%1)        # update Pos\n\t"
                            "movl    $1,%%eax            # result = 1\n\t"
                            ".balign 16 \n\t"
                            "1:\n\t"
                            : "=a" (result)     /* %0 */
                            : "r"  (vPos),      /* %1 */
                              "r"  (&LoopEnd),  /* %2 */
                              "r"  (&LoopSize), /* %3 */
                              "r"  (&LoopStart) /* %4 */
                        );
                        return result;
                    }
                }
            }

            inline static void Synthesize(sample_t* pSrc, void* Pos, float& Pitch, float* pOutL, float* pOutR, int& i, float* Volume, float* PanL, float* PanR, Filter& FilterL, Filter& FilterR, biquad_param_t& bqBase, biquad_param_t& bqMain) {
                switch (IMPLEMENTATION) {
                    // pure C++ implementation (thus platform independent)
                    case CPP: {
                        switch (CHANNELS) {
                            case MONO: {
                                float samplePoint = GetNextSampleMonoCPP(pSrc, (double *)Pos, Pitch);
                                if (USEFILTER) samplePoint = FilterL.Apply(&bqBase, &bqMain, samplePoint);
                                pOutL[i] += samplePoint * Volume[i] * *PanL;
                                pOutR[i] += samplePoint * Volume[i] * *PanR;
                                i++;
                                break;
                            }
                            case STEREO: {
                                stereo_sample_t samplePoint = GetNextSampleStereoCPP(pSrc, (double *)Pos, Pitch);
                                if (USEFILTER) {
                                    samplePoint.left  = FilterL.Apply(&bqBase, &bqMain, samplePoint.left);
                                    samplePoint.right = FilterR.Apply(&bqBase, &bqMain, samplePoint.right);
                                }
                                pOutL[i] += samplePoint.left  * Volume[i] * *PanL;
                                pOutR[i] += samplePoint.right * Volume[i] * *PanR;
                                i++;
                                break;
                            }
                        }
                        break;
                    }
                    // Assembly optimization using the MMX & SSE(1) instruction set (thus only for x86)
                    case ASM_X86_MMX_SSE: {
                        const int ii = i & 0xfffffffc;
                        i += 4;
                        switch (CHANNELS) {
                            case MONO: {
                                GetNext4SamplesMonoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2
                                if (USEFILTER) {
                                    /* prepare filter input */
                                    __asm__ __volatile__ (
                                        "movaps %xmm2,%xmm0"
                                    );
                                    FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
                                    __asm__ __volatile__ (
                                        "movaps %xmm7,%xmm2       # mono filter result -> xmm2"
                                    );
                                }
                                /* apply panorama and volume factors */
                                __asm__ __volatile__ (
                                    "movss    (%1),%%xmm0             # load pan left\n\t"
                                    "movss    (%2),%%xmm1             # load pan right\n\t"
                                    "movaps   (%0),%%xmm4             # load vca\n\t"
                                    "shufps   $0x00,%%xmm0,%%xmm0     # copy pan left to the other 3 cells\n\t"
                                    "shufps   $0x00,%%xmm1,%%xmm1     # copy pan right to the other 3 cells\n\t"
                                    "mulps    %%xmm2,%%xmm0           # left  = sample * pan_left\n\t"
                                    "mulps    %%xmm2,%%xmm1           # right = sample * pan_right\n\t"
                                    "mulps    %%xmm4,%%xmm0           # left  = vca * (sample * pan_left)\n\t"
                                    "mulps    %%xmm4,%%xmm1           # right = vca * (sample * pan_right)\n\t"
                                    : /* no output */
                                    : "r" (&Volume[ii]), /* %0 */
                                      "r" (PanL),   /* %1 */
                                      "r" (PanR)    /* %2 */
                                    : "xmm0", /* holds final left  sample (for the 4 samples) at the end */
                                      "xmm1"  /* holds final right sample (for the 4 samples) at the end */
                                );
                                break;
                            }
                            case STEREO: {
                                GetNext4SamplesStereoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2 (left channel) and xmm3 (right channel)
                                if (USEFILTER) {
                                    __asm__ __volatile__ (
                                        "movaps %xmm2,%xmm0     # prepare left channel for filter\n\t"
                                        "movaps %xmm3,%xmm1     # save right channel not to get overwritten by filter algorithms\n\t"
                                    );
                                    FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
                                    __asm__ __volatile__ (
                                        "movaps %xmm1,%xmm0     # prepare right channel for filter\n\t"
                                        "movaps %xmm7,%xmm1     # save filter output for left channel\n\t"
                                    );
                                    FilterR.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
                                    __asm__ __volatile__ (
                                        "movaps %xmm1,%xmm2     # result left channel -> xmm2\n\t"
                                        "movaps %xmm7,%xmm3     # result right channel -> xmm3\n\t"
                                    );
                                }
                                /* apply panorama and volume factors */
                                __asm__ __volatile__ (
                                    "movss    (%1),%%xmm0             # load pan left\n\t"
                                    "movss    (%2),%%xmm1             # load pan right\n\t"
                                    "movaps   (%0),%%xmm4             # load vca\n\t"
                                    "shufps   $0x00,%%xmm0,%%xmm0     # copy pan left to the other 3 cells\n\t"
                                    "shufps   $0x00,%%xmm1,%%xmm1     # copy pan right to the other 3 cells\n\t"
                                    "mulps    %%xmm2,%%xmm0           # left  = sample_left  * pan_left\n\t"
                                    "mulps    %%xmm3,%%xmm1           # right = sample_right * pan_right\n\t"
                                    "mulps    %%xmm4,%%xmm0           # left  = vca * (sample_left  * pan_left)\n\t"
                                    "mulps    %%xmm4,%%xmm1           # right = vca * (sample_right * pan_right)\n\t"
                                    : /* no output */
                                    : "r" (&Volume[ii]), /* %0 */
                                      "r" (PanL),   /* %1 */
                                      "r" (PanR)    /* %2 */
                                );
                                break;
                            }
                        }
                        /* mix the 4 samples to the output channels */
                        __asm__ __volatile__ (
                            "addps  (%0),%%xmm0       # mix calculated sample(s) to output left\n\t"
                            "movaps %%xmm0,(%0)       # output to left channel\n\t"
                            "addps  (%1),%%xmm1       # mix calculated sample(s) to output right\n\t"
                            "movaps %%xmm1,(%1)       # output to right channel\n\t"
                            : /* no output */
                            : "r" (&pOutL[ii]), /* %0 - must be 16 byte aligned ! */
                              "r" (&pOutR[ii])  /* %1 - must be 16 byte aligned ! */
                        );
                    }
                }
            }
    };

}} // namespace LinuxSampler::gig

#endif // __LS_GIG_SYNTHESIZER_H__
1	schoenebeck	320	/***************************************************************************
2			* *
3			* LinuxSampler - modular, streaming capable sampler *
4			* *
5			* Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6			* *
7			* This program is free software; you can redistribute it and/or modify *
8			* it under the terms of the GNU General Public License as published by *
9			* the Free Software Foundation; either version 2 of the License, or *
10			* (at your option) any later version. *
11			* *
12			* This program is distributed in the hope that it will be useful, *
13			* but WITHOUT ANY WARRANTY; without even the implied warranty of *
14			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15			* GNU General Public License for more details. *
16			* *
17			* You should have received a copy of the GNU General Public License *
18			* along with this program; if not, write to the Free Software *
19			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
20			* MA 02111-1307 USA *
21			***************************************************************************/
22
23			#ifndef __LS_GIG_SYNTHESIZER_H__
24			#define __LS_GIG_SYNTHESIZER_H__
25
26			#include "../../common/RTMath.h"
27			#include "../common/Resampler.h"
28			#include "../common/BiquadFilter.h"
29			#include "Filter.h"
30			#include "Voice.h"
31
32			#define SYNTHESIS_MODE_SET_CONSTPITCH(iMode,bVal) if (bVal) iMode \|= 0x01; else iMode &= 0xfe /* (un)set mode bit 0 */
33			#define SYNTHESIS_MODE_SET_LOOP(iMode,bVal) if (bVal) iMode \|= 0x02; else iMode &= 0xfd /* (un)set mode bit 1 */
34			#define SYNTHESIS_MODE_SET_INTERPOLATE(iMode,bVal) if (bVal) iMode \|= 0x04; else iMode &= 0xfb /* (un)set mode bit 2 */
35			#define SYNTHESIS_MODE_SET_FILTER(iMode,bVal) if (bVal) iMode \|= 0x08; else iMode &= 0xf7 /* (un)set mode bit 3 */
36			#define SYNTHESIS_MODE_SET_CHANNELS(iMode,bVal) if (bVal) iMode \|= 0x10; else iMode &= 0xef /* (un)set mode bit 4 */
37			#define SYNTHESIS_MODE_SET_IMPLEMENTATION(iMode,bVal) if (bVal) iMode \|= 0x20; else iMode &= 0xdf /* (un)set mode bit 5 */
38
39			#define SYNTHESIS_MODE_GET_CONSTPITCH(iMode) iMode & 0x01
40			#define SYNTHESIS_MODE_GET_LOOP(iMode) iMode & 0x02
41			#define SYNTHESIS_MODE_GET_INTERPOLATE(iMode) iMode & 0x04
42			#define SYNTHESIS_MODE_GET_FILTER(iMode) iMode & 0x08
43			#define SYNTHESIS_MODE_GET_CHANNELS(iMode) iMode & 0x10
44			#define SYNTHESIS_MODE_GET_IMPLEMENTATION(iMode) iMode & 0x20
45
46			// that's usually gig::Voice of course, but we make it a macro so we can
47			// include this code for our synthesis benchmark which uses fake data
48			// structures
49			#ifndef VOICE
50			# define VOICE Voice
51			#endif // VOICE
52
53			namespace LinuxSampler { namespace gig {
54
55			typedef void SynthesizeFragment_Fn(VOICE&, uint, sample_t*, int);
56
57			void* GetSynthesisFunction(const int SynthesisMode);
58
59			enum channels_t {
60			MONO,
61			STEREO
62			};
63
64			template<implementation_t IMPLEMENTATION, channels_t CHANNELS, bool USEFILTER, bool INTERPOLATE, bool DOLOOP, bool CONSTPITCH>
65			class Synthesizer : public __RTMath<IMPLEMENTATION>, public LinuxSampler::Resampler<INTERPOLATE> {
66			public:
67			template<typename VOICE_T>
68			inline static void SynthesizeFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, int& i) {
69			if (IMPLEMENTATION == ASM_X86_MMX_SSE) {
70			float fPos = (float) Voice.Pos;
71			SynthesizeFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
72			Voice.pSample->LoopStart,
73			Voice.pSample->LoopEnd,
74			Voice.pSample->LoopSize,
75			Voice.LoopCyclesLeft,
76			(void *)&fPos,
77			Voice.PitchBase,
78			Voice.PitchBend);
79			if (INTERPOLATE) EMMS;
80			Voice.Pos = (double) fPos;
81			} else {
82			SynthesizeFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
83			Voice.pSample->LoopStart,
84			Voice.pSample->LoopEnd,
85			Voice.pSample->LoopSize,
86			Voice.LoopCyclesLeft,
87			(void *)&Voice.Pos,
88			Voice.PitchBase,
89			Voice.PitchBend);
90			}
91			}
92
93			//protected:
94
95			template<typename VOICE_T>
96			inline static void SynthesizeFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, int& i, uint& LoopPlayCount, uint LoopStart, uint LoopEnd, uint LoopSize, uint& LoopCyclesLeft, void* Pos, float& PitchBase, float& PitchBend) {
97			const float loopEnd = Float(LoopEnd);
98			const float PBbyPB = Mul(PitchBase, PitchBend);
99			const float f_LoopStart = Float(LoopStart);
100			const float f_LoopSize = Float(LoopSize);
101			if (DOLOOP) {
102			if (LoopPlayCount) {
103			// render loop (loop count limited)
104			while (i < Samples && LoopCyclesLeft) {
105			if (CONSTPITCH) {
106			const int processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd,Pos, PBbyPB) + 1); //TODO: instead of +1 we could also round up
107			while (i < processEnd) Synthesize(Voice, Pos, pSrc, i);
108			}
109			else Synthesize(Voice, Pos, pSrc, i);
110			if (WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos)) LoopCyclesLeft--;
111			}
112			// render on without loop
113			while (i < Samples) Synthesize(Voice, Pos, pSrc, i);
114			}
115			else { // render loop (endless loop)
116			while (i < Samples) {
117			if (CONSTPITCH) {
118			const int processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd, Pos, PBbyPB) + 1); //TODO: instead of +1 we could also round up
119			while (i < processEnd) Synthesize(Voice, Pos, pSrc, i);
120			}
121			else Synthesize(Voice, Pos, pSrc, i);
122			WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos);
123			}
124			}
125			}
126			else { // no looping
127			while (i < Samples) { Synthesize(Voice, Pos, pSrc, i);}
128			}
129			}
130
131			template<typename VOICE_T>
132			inline static void Synthesize(VOICE_T& Voice, void* Pos, sample_t* pSrc, int& i) {
133			Synthesize(pSrc, Pos,
134			Voice.pEngine->pSynthesisParameters[Event::destination_vco][i],
135			Voice.pEngine->pOutputLeft,
136			Voice.pEngine->pOutputRight,
137			i,
138			Voice.pEngine->pSynthesisParameters[Event::destination_vca],
139			&Voice.PanLeft,
140			&Voice.PanRight,
141			Voice.FilterLeft,
142			Voice.FilterRight,
143			Voice.pEngine->pBasicFilterParameters[i],
144			Voice.pEngine->pMainFilterParameters[i]);
145			}
146
147			inline static int DiffToLoopEnd(const float& LoopEnd, const void* Pos, const float& Pitch) {
148			switch (IMPLEMENTATION) {
149			// pure C++ implementation (thus platform independent)
150			case CPP: {
151			return int((LoopEnd - ((double )Pos)) / Pitch);
152			}
153			case ASM_X86_MMX_SSE: {
154			int result;
155			__asm__ __volatile__ (
156			"movss (%1), %%xmm0 #read loopend\n\t"
157			"subss (%2), %%xmm0 #sub pos\n\t"
158			"divss (%3), %%xmm0 #div by pitch\n\t"
159			"cvtss2si %%xmm0, %0 #convert to int\n\t"
160			: "=r" (result) /* %0 */
161			: "r" (&LoopEnd), /* %1 */
162			"r" (Pos), /* %2 */
163			"r" (&Pitch) /* %3 */
164			);
165			return result;
166			}
167			}
168			}
169
170			inline static int WrapLoop(const float& LoopStart, const float& LoopSize, const float& LoopEnd, void* vPos) {
171			switch (IMPLEMENTATION) {
172			// pure C++ implementation (thus platform independent)
173			case CPP: {
174			double * Pos = (double *)vPos;
175			if (*Pos < LoopEnd) return 0;
176			Pos = fmod(Pos - LoopEnd, LoopSize) + LoopStart;
177			return 1;
178			}
179			case ASM_X86_MMX_SSE: {
180			int result;
181			__asm__ __volatile__ (
182			"movss (%2), %%xmm0 # load LoopEnd\n\t"
183			"movss (%1), %%xmm1 # load Pos\n\t"
184			"comiss %%xmm0, %%xmm1 # LoopEnd <> Pos\n\t"
185			"movl $0,%%eax # result = 0\n\t"
186			"jb 1f # jump if no work needs to be done\n\t"
187			"movss (%3), %%xmm2 # load LoopSize\n\t"
188			"subss %%xmm0, %%xmm1 # Pos - LoopEnd\n\t"
189			//now the fmodf
190			"movss %%xmm1, %%xmm3 # xmm3 = (Pos - LoopEnd)\n\t"
191			"divss %%xmm2, %%xmm1 # (Pos - LoopEnd) / LoopSize\n\t"
192			"cvttss2si %%xmm1, %%eax # convert to int\n\t"
193			"cvtsi2ss %%eax, %%xmm1 # convert back to float\n\t"
194			"movss (%4), %%xmm0 # load LoopStart\n\t"
195			"mulss %%xmm2, %%xmm1 # LoopSize * int((Pos-LoopEnd)/LoopSize)\n\t"
196			"subss %%xmm1, %%xmm3 # xmm2 = fmodf(Pos - LoopEnd, LoopSize)\n\t"
197			//done with fmodf
198			"addss %%xmm0, %%xmm3 # add LoopStart\n\t"
199			"movss %%xmm3, (%1) # update Pos\n\t"
200			"movl $1,%%eax # result = 1\n\t"
201			".balign 16 \n\t"
202			"1:\n\t"
203			: "=a" (result) /* %0 */
204			: "r" (vPos), /* %1 */
205			"r" (&LoopEnd), /* %2 */
206			"r" (&LoopSize), /* %3 */
207			"r" (&LoopStart) /* %4 */
208			);
209			return result;
210			}
211			}
212			}
213
214			inline static void Synthesize(sample_t* pSrc, void* Pos, float& Pitch, float* pOutL, float* pOutR, int& i, float* Volume, float* PanL, float* PanR, Filter& FilterL, Filter& FilterR, biquad_param_t& bqBase, biquad_param_t& bqMain) {
215			switch (IMPLEMENTATION) {
216			// pure C++ implementation (thus platform independent)
217			case CPP: {
218			switch (CHANNELS) {
219			case MONO: {
220			float samplePoint = GetNextSampleMonoCPP(pSrc, (double *)Pos, Pitch);
221			if (USEFILTER) samplePoint = FilterL.Apply(&bqBase, &bqMain, samplePoint);
222			pOutL[i] += samplePoint * Volume[i] * *PanL;
223			pOutR[i] += samplePoint * Volume[i] * *PanR;
224			i++;
225			break;
226			}
227			case STEREO: {
228			stereo_sample_t samplePoint = GetNextSampleStereoCPP(pSrc, (double *)Pos, Pitch);
229			if (USEFILTER) {
230			samplePoint.left = FilterL.Apply(&bqBase, &bqMain, samplePoint.left);
231			samplePoint.right = FilterR.Apply(&bqBase, &bqMain, samplePoint.right);
232			}
233			pOutL[i] += samplePoint.left * Volume[i] * *PanL;
234			pOutR[i] += samplePoint.right * Volume[i] * *PanR;
235			i++;
236			break;
237			}
238			}
239			break;
240			}
241			// Assembly optimization using the MMX & SSE(1) instruction set (thus only for x86)
242			case ASM_X86_MMX_SSE: {
243			const int ii = i & 0xfffffffc;
244			i += 4;
245			switch (CHANNELS) {
246			case MONO: {
247			GetNext4SamplesMonoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2
248			if (USEFILTER) {
249			/* prepare filter input */
250			__asm__ __volatile__ (
251			"movaps %xmm2,%xmm0"
252			);
253			FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
254			__asm__ __volatile__ (
255			"movaps %xmm7,%xmm2 # mono filter result -> xmm2"
256			);
257			}
258			/* apply panorama and volume factors */
259			__asm__ __volatile__ (
260			"movss (%1),%%xmm0 # load pan left\n\t"
261			"movss (%2),%%xmm1 # load pan right\n\t"
262			"movaps (%0),%%xmm4 # load vca\n\t"
263			"shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t"
264			"shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t"
265			"mulps %%xmm2,%%xmm0 # left = sample * pan_left\n\t"
266			"mulps %%xmm2,%%xmm1 # right = sample * pan_right\n\t"
267			"mulps %%xmm4,%%xmm0 # left = vca * (sample * pan_left)\n\t"
268			"mulps %%xmm4,%%xmm1 # right = vca * (sample * pan_right)\n\t"
269			: /* no output */
270			: "r" (&Volume[ii]), /* %0 */
271			"r" (PanL), /* %1 */
272			"r" (PanR) /* %2 */
273			: "xmm0", /* holds final left sample (for the 4 samples) at the end */
274			"xmm1" /* holds final right sample (for the 4 samples) at the end */
275			);
276			break;
277			}
278			case STEREO: {
279			GetNext4SamplesStereoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2 (left channel) and xmm3 (right channel)
280			if (USEFILTER) {
281			__asm__ __volatile__ (
282			"movaps %xmm2,%xmm0 # prepare left channel for filter\n\t"
283			"movaps %xmm3,%xmm1 # save right channel not to get overwritten by filter algorithms\n\t"
284			);
285			FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
286			__asm__ __volatile__ (
287			"movaps %xmm1,%xmm0 # prepare right channel for filter\n\t"
288			"movaps %xmm7,%xmm1 # save filter output for left channel\n\t"
289			);
290			FilterR.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
291			__asm__ __volatile__ (
292			"movaps %xmm1,%xmm2 # result left channel -> xmm2\n\t"
293			"movaps %xmm7,%xmm3 # result right channel -> xmm3\n\t"
294			);
295			}
296			/* apply panorama and volume factors */
297			__asm__ __volatile__ (
298			"movss (%1),%%xmm0 # load pan left\n\t"
299			"movss (%2),%%xmm1 # load pan right\n\t"
300			"movaps (%0),%%xmm4 # load vca\n\t"
301			"shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t"
302			"shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t"
303			"mulps %%xmm2,%%xmm0 # left = sample_left * pan_left\n\t"
304			"mulps %%xmm3,%%xmm1 # right = sample_right * pan_right\n\t"
305			"mulps %%xmm4,%%xmm0 # left = vca * (sample_left * pan_left)\n\t"
306			"mulps %%xmm4,%%xmm1 # right = vca * (sample_right * pan_right)\n\t"
307			: /* no output */
308			: "r" (&Volume[ii]), /* %0 */
309			"r" (PanL), /* %1 */
310			"r" (PanR) /* %2 */
311			);
312			break;
313			}
314			}
315			/* mix the 4 samples to the output channels */
316			__asm__ __volatile__ (
317			"addps (%0),%%xmm0 # mix calculated sample(s) to output left\n\t"
318			"movaps %%xmm0,(%0) # output to left channel\n\t"
319			"addps (%1),%%xmm1 # mix calculated sample(s) to output right\n\t"
320			"movaps %%xmm1,(%1) # output to right channel\n\t"
321			: /* no output */
322			: "r" (&pOutL[ii]), /* %0 - must be 16 byte aligned ! */
323			"r" (&pOutR[ii]) /* %1 - must be 16 byte aligned ! */
324			);
325			}
326			}
327			}
328			};
329
330			}} // namespace LinuxSampler::gig
331
332			#endif // __LS_GIG_SYNTHESIZER_H__