/[svn]/linuxsampler/trunk/src/engines/gig/Synthesizer.h
ViewVC logotype

Annotation of /linuxsampler/trunk/src/engines/gig/Synthesizer.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 617 - (hide annotations) (download) (as text)
Wed Jun 8 21:00:06 2005 UTC (18 years, 10 months ago) by schoenebeck
File MIME type: text/x-c++hdr
File size: 23568 byte(s)
* hand-crafted assembly optimization code can be disabled with
  './configure --disable-asm' (definitely not recommended)

1 schoenebeck 320 /***************************************************************************
2     * *
3     * LinuxSampler - modular, streaming capable sampler *
4     * *
5     * Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6 schoenebeck 411 * Copyright (C) 2005 Christian Schoenebeck *
7 schoenebeck 320 * *
8     * This program is free software; you can redistribute it and/or modify *
9     * it under the terms of the GNU General Public License as published by *
10     * the Free Software Foundation; either version 2 of the License, or *
11     * (at your option) any later version. *
12     * *
13     * This program is distributed in the hope that it will be useful, *
14     * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16     * GNU General Public License for more details. *
17     * *
18     * You should have received a copy of the GNU General Public License *
19     * along with this program; if not, write to the Free Software *
20     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21     * MA 02111-1307 USA *
22     ***************************************************************************/
23    
24     #ifndef __LS_GIG_SYNTHESIZER_H__
25     #define __LS_GIG_SYNTHESIZER_H__
26    
27 schoenebeck 328 #include "../../common/global.h"
28 schoenebeck 320 #include "../../common/RTMath.h"
29     #include "../common/Resampler.h"
30     #include "../common/BiquadFilter.h"
31     #include "Filter.h"
32     #include "Voice.h"
33    
34 senkov 332 #define SYNTHESIS_MODE_SET_CONSTPITCH(iMode,bVal) if (bVal) iMode |= 0x01; else iMode &= ~0x01 /* (un)set mode bit 0 */
35     #define SYNTHESIS_MODE_SET_LOOP(iMode,bVal) if (bVal) iMode |= 0x02; else iMode &= ~0x02 /* (un)set mode bit 1 */
36     #define SYNTHESIS_MODE_SET_INTERPOLATE(iMode,bVal) if (bVal) iMode |= 0x04; else iMode &= ~0x04 /* (un)set mode bit 2 */
37     #define SYNTHESIS_MODE_SET_FILTER(iMode,bVal) if (bVal) iMode |= 0x08; else iMode &= ~0x08 /* (un)set mode bit 3 */
38     #define SYNTHESIS_MODE_SET_CHANNELS(iMode,bVal) if (bVal) iMode |= 0x10; else iMode &= ~0x10 /* (un)set mode bit 4 */
39     #define SYNTHESIS_MODE_SET_IMPLEMENTATION(iMode,bVal) if (bVal) iMode |= 0x20; else iMode &= ~0x20 /* (un)set mode bit 5 */
40 senkov 325 #define SYNTHESIS_MODE_SET_PROFILING(iMode,bVal) if (bVal) iMode |= 0x40; else iMode &= ~0x40 /* (un)set mode bit 6 */
41 schoenebeck 320
42     #define SYNTHESIS_MODE_GET_CONSTPITCH(iMode) iMode & 0x01
43     #define SYNTHESIS_MODE_GET_LOOP(iMode) iMode & 0x02
44     #define SYNTHESIS_MODE_GET_INTERPOLATE(iMode) iMode & 0x04
45     #define SYNTHESIS_MODE_GET_FILTER(iMode) iMode & 0x08
46     #define SYNTHESIS_MODE_GET_CHANNELS(iMode) iMode & 0x10
47     #define SYNTHESIS_MODE_GET_IMPLEMENTATION(iMode) iMode & 0x20
48    
49     // that's usually gig::Voice of course, but we make it a macro so we can
50     // include this code for our synthesis benchmark which uses fake data
51     // structures
52     #ifndef VOICE
53     # define VOICE Voice
54     #endif // VOICE
55    
56     namespace LinuxSampler { namespace gig {
57    
58 senkov 325 typedef void SynthesizeFragment_Fn(VOICE&, uint, sample_t*, uint);
59 schoenebeck 320
60     void* GetSynthesisFunction(const int SynthesisMode);
61 senkov 325 void RunSynthesisFunction(const int SynthesisMode, VOICE& voice, uint Samples, sample_t* pSrc, uint Skip);
62 schoenebeck 320
63     enum channels_t {
64     MONO,
65     STEREO
66     };
67    
68 schoenebeck 563 /** @brief Main Synthesis algorithms for the gig::Engine
69     *
70     * Implementation of the main synthesis algorithms of the Gigasampler
71     * format capable sampler engine. This means resampling / interpolation
72     * for pitching the audio signal, looping, filter and amplification.
73     */
74 schoenebeck 320 template<implementation_t IMPLEMENTATION, channels_t CHANNELS, bool USEFILTER, bool INTERPOLATE, bool DOLOOP, bool CONSTPITCH>
75     class Synthesizer : public __RTMath<IMPLEMENTATION>, public LinuxSampler::Resampler<INTERPOLATE> {
76 persson 497
77     // declarations of derived functions (see "Name lookup,
78     // templates, and accessing members of base classes" in
79     // the gcc manual for an explanation of why this is
80     // needed).
81     using __RTMath<IMPLEMENTATION>::Mul;
82     using __RTMath<IMPLEMENTATION>::Float;
83     using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleMonoCPP;
84     using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleStereoCPP;
85 schoenebeck 617 #if CONFIG_ASM && ARCH_X86
86 persson 497 using LinuxSampler::Resampler<INTERPOLATE>::GetNext4SamplesMonoMMXSSE;
87     using LinuxSampler::Resampler<INTERPOLATE>::GetNext4SamplesStereoMMXSSE;
88     #endif
89    
90 schoenebeck 320 public:
91 schoenebeck 563 /**
92     * Render audio for the current fragment for the given voice.
93     * This is the toplevel method of this class.
94     */
95 schoenebeck 320 template<typename VOICE_T>
96 senkov 325 inline static void SynthesizeFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, uint i) {
97 schoenebeck 424 const float panLeft = Mul(Voice.PanLeft, Voice.pEngineChannel->GlobalPanLeft);
98     const float panRight = Mul(Voice.PanRight, Voice.pEngineChannel->GlobalPanRight);
99 schoenebeck 320 if (IMPLEMENTATION == ASM_X86_MMX_SSE) {
100     float fPos = (float) Voice.Pos;
101     SynthesizeFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
102     Voice.pSample->LoopStart,
103     Voice.pSample->LoopEnd,
104     Voice.pSample->LoopSize,
105     Voice.LoopCyclesLeft,
106     (void *)&fPos,
107     Voice.PitchBase,
108 schoenebeck 424 Voice.PitchBend,
109     &panLeft, &panRight);
110 schoenebeck 617 #if CONFIG_ASM && ARCH_X86
111 schoenebeck 320 if (INTERPOLATE) EMMS;
112 schoenebeck 361 #endif
113 schoenebeck 320 Voice.Pos = (double) fPos;
114     } else {
115     SynthesizeFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
116     Voice.pSample->LoopStart,
117     Voice.pSample->LoopEnd,
118     Voice.pSample->LoopSize,
119     Voice.LoopCyclesLeft,
120     (void *)&Voice.Pos,
121     Voice.PitchBase,
122 schoenebeck 424 Voice.PitchBend,
123     &panLeft, &panRight);
124 schoenebeck 320 }
125     }
126    
127     //protected:
128    
129 schoenebeck 563 /**
130     * Render audio for the current fragment for the given voice.
131     * Will be called by the toplevel SynthesizeFragment() method.
132     */
133 schoenebeck 320 template<typename VOICE_T>
134 schoenebeck 424 inline static void SynthesizeFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, uint& i, uint& LoopPlayCount, uint LoopStart, uint LoopEnd, uint LoopSize, uint& LoopCyclesLeft, void* Pos, float& PitchBase, float& PitchBend, const float* PanLeft, const float* PanRight) {
135 schoenebeck 320 const float loopEnd = Float(LoopEnd);
136     const float PBbyPB = Mul(PitchBase, PitchBend);
137     const float f_LoopStart = Float(LoopStart);
138     const float f_LoopSize = Float(LoopSize);
139     if (DOLOOP) {
140     if (LoopPlayCount) {
141     // render loop (loop count limited)
142     while (i < Samples && LoopCyclesLeft) {
143     if (CONSTPITCH) {
144 senkov 325 const uint processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd,Pos, PBbyPB) + 1); //TODO: instead of +1 we could also round up
145 schoenebeck 424 while (i < processEnd) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
146 schoenebeck 320 }
147 schoenebeck 424 else Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
148 schoenebeck 320 if (WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos)) LoopCyclesLeft--;
149     }
150     // render on without loop
151 schoenebeck 424 while (i < Samples) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
152 schoenebeck 320 }
153     else { // render loop (endless loop)
154     while (i < Samples) {
155     if (CONSTPITCH) {
156 senkov 325 const uint processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd, Pos, PBbyPB) + 1); //TODO: instead of +1 we could also round up
157 schoenebeck 424 while (i < processEnd) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
158 schoenebeck 320 }
159 schoenebeck 424 else Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
160 schoenebeck 320 WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos);
161     }
162     }
163     }
164     else { // no looping
165 schoenebeck 424 while (i < Samples) { Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);}
166 schoenebeck 320 }
167     }
168    
169 schoenebeck 563 /**
170     * Atomicly render a piece for the voice. For the C++
171     * implementation this means rendering exactly one sample
172     * point, whereas for the MMX/SSE implementation this means
173     * rendering 4 sample points.
174     */
175 schoenebeck 320 template<typename VOICE_T>
176 schoenebeck 424 inline static void Synthesize(VOICE_T& Voice, void* Pos, sample_t* pSrc, uint& i, const float* PanLeft, const float* PanRight) {
177 schoenebeck 320 Synthesize(pSrc, Pos,
178     Voice.pEngine->pSynthesisParameters[Event::destination_vco][i],
179 schoenebeck 411 Voice.pEngineChannel->pOutputLeft,
180     Voice.pEngineChannel->pOutputRight,
181 schoenebeck 320 i,
182     Voice.pEngine->pSynthesisParameters[Event::destination_vca],
183 schoenebeck 424 PanLeft,
184     PanRight,
185 schoenebeck 320 Voice.FilterLeft,
186     Voice.FilterRight,
187     Voice.pEngine->pBasicFilterParameters[i],
188     Voice.pEngine->pMainFilterParameters[i]);
189     }
190    
191 schoenebeck 563 /**
192     * Returns the difference to the sample's loop end.
193     */
194 schoenebeck 320 inline static int DiffToLoopEnd(const float& LoopEnd, const void* Pos, const float& Pitch) {
195     switch (IMPLEMENTATION) {
196     // pure C++ implementation (thus platform independent)
197     case CPP: {
198 senkov 325 return uint((LoopEnd - *((double *)Pos)) / Pitch);
199 schoenebeck 320 }
200 schoenebeck 617 #if CONFIG_ASM && ARCH_X86
201 schoenebeck 320 case ASM_X86_MMX_SSE: {
202     int result;
203     __asm__ __volatile__ (
204     "movss (%1), %%xmm0 #read loopend\n\t"
205     "subss (%2), %%xmm0 #sub pos\n\t"
206     "divss (%3), %%xmm0 #div by pitch\n\t"
207     "cvtss2si %%xmm0, %0 #convert to int\n\t"
208     : "=r" (result) /* %0 */
209     : "r" (&LoopEnd), /* %1 */
210     "r" (Pos), /* %2 */
211     "r" (&Pitch) /* %3 */
212     );
213     return result;
214     }
215 schoenebeck 617 #endif // CONFIG_ASM && ARCH_X86
216 schoenebeck 320 }
217     }
218    
219 schoenebeck 563 /**
220     * This method handles looping of the RAM playback part of the
221     * sample, thus repositioning the playback position once the
222     * loop limit was reached. Note: looping of the disk streaming
223     * part is handled by libgig (ReadAndLoop() method which will
224     * be called by the DiskThread).
225     */
226 schoenebeck 320 inline static int WrapLoop(const float& LoopStart, const float& LoopSize, const float& LoopEnd, void* vPos) {
227     switch (IMPLEMENTATION) {
228     // pure C++ implementation (thus platform independent)
229     case CPP: {
230     double * Pos = (double *)vPos;
231     if (*Pos < LoopEnd) return 0;
232     *Pos = fmod(*Pos - LoopEnd, LoopSize) + LoopStart;
233     return 1;
234     }
235 schoenebeck 617 #if CONFIG_ASM && ARCH_X86
236 schoenebeck 320 case ASM_X86_MMX_SSE: {
237 senkov 336 int result = 0;
238 schoenebeck 320 __asm__ __volatile__ (
239     "movss (%2), %%xmm0 # load LoopEnd\n\t"
240     "movss (%1), %%xmm1 # load Pos\n\t"
241     "comiss %%xmm0, %%xmm1 # LoopEnd <> Pos\n\t"
242     "jb 1f # jump if no work needs to be done\n\t"
243     "movss (%3), %%xmm2 # load LoopSize\n\t"
244     "subss %%xmm0, %%xmm1 # Pos - LoopEnd\n\t"
245     //now the fmodf
246     "movss %%xmm1, %%xmm3 # xmm3 = (Pos - LoopEnd)\n\t"
247     "divss %%xmm2, %%xmm1 # (Pos - LoopEnd) / LoopSize\n\t"
248 schoenebeck 498 "cvttss2si %%xmm1, %2 # convert to int\n\t"
249     "cvtsi2ss %2, %%xmm1 # convert back to float\n\t"
250 schoenebeck 320 "movss (%4), %%xmm0 # load LoopStart\n\t"
251     "mulss %%xmm2, %%xmm1 # LoopSize * int((Pos-LoopEnd)/LoopSize)\n\t"
252     "subss %%xmm1, %%xmm3 # xmm2 = fmodf(Pos - LoopEnd, LoopSize)\n\t"
253     //done with fmodf
254     "addss %%xmm0, %%xmm3 # add LoopStart\n\t"
255     "movss %%xmm3, (%1) # update Pos\n\t"
256 senkov 336 "movl $1, (%0) # result = 1\n\t"
257 schoenebeck 320 ".balign 16 \n\t"
258     "1:\n\t"
259 senkov 336 :: "r" (&result), /* %0 */
260     "r" (vPos), /* %1 */
261 schoenebeck 320 "r" (&LoopEnd), /* %2 */
262     "r" (&LoopSize), /* %3 */
263     "r" (&LoopStart) /* %4 */
264     );
265     return result;
266     }
267 schoenebeck 617 #endif // CONFIG_ASM && ARCH_X86
268 schoenebeck 320 }
269     }
270    
271 schoenebeck 563 /**
272     * Atomicly render a piece for the voice. For the C++
273     * implementation this means rendering exactly one sample
274     * point, whereas for the MMX/SSE implementation this means
275     * rendering 4 sample points.
276     */
277 schoenebeck 424 inline static void Synthesize(sample_t* pSrc, void* Pos, float& Pitch, float* pOutL, float* pOutR, uint& i, float* Volume, const float* PanL, const float* PanR, Filter& FilterL, Filter& FilterR, biquad_param_t& bqBase, biquad_param_t& bqMain) {
278 schoenebeck 320 switch (IMPLEMENTATION) {
279     // pure C++ implementation (thus platform independent)
280     case CPP: {
281     switch (CHANNELS) {
282     case MONO: {
283     float samplePoint = GetNextSampleMonoCPP(pSrc, (double *)Pos, Pitch);
284     if (USEFILTER) samplePoint = FilterL.Apply(&bqBase, &bqMain, samplePoint);
285     pOutL[i] += samplePoint * Volume[i] * *PanL;
286     pOutR[i] += samplePoint * Volume[i] * *PanR;
287     i++;
288     break;
289     }
290     case STEREO: {
291     stereo_sample_t samplePoint = GetNextSampleStereoCPP(pSrc, (double *)Pos, Pitch);
292     if (USEFILTER) {
293     samplePoint.left = FilterL.Apply(&bqBase, &bqMain, samplePoint.left);
294     samplePoint.right = FilterR.Apply(&bqBase, &bqMain, samplePoint.right);
295     }
296     pOutL[i] += samplePoint.left * Volume[i] * *PanL;
297     pOutR[i] += samplePoint.right * Volume[i] * *PanR;
298     i++;
299     break;
300     }
301     }
302     break;
303     }
304 schoenebeck 617 #if CONFIG_ASM && ARCH_X86
305 schoenebeck 320 // Assembly optimization using the MMX & SSE(1) instruction set (thus only for x86)
306     case ASM_X86_MMX_SSE: {
307     const int ii = i & 0xfffffffc;
308     i += 4;
309     switch (CHANNELS) {
310     case MONO: {
311     GetNext4SamplesMonoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2
312     if (USEFILTER) {
313     /* prepare filter input */
314     __asm__ __volatile__ (
315     "movaps %xmm2,%xmm0"
316     );
317     FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
318     __asm__ __volatile__ (
319     "movaps %xmm7,%xmm2 # mono filter result -> xmm2"
320     );
321     }
322     /* apply panorama and volume factors */
323     __asm__ __volatile__ (
324     "movss (%1),%%xmm0 # load pan left\n\t"
325     "movss (%2),%%xmm1 # load pan right\n\t"
326     "movaps (%0),%%xmm4 # load vca\n\t"
327     "shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t"
328     "shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t"
329     "mulps %%xmm2,%%xmm0 # left = sample * pan_left\n\t"
330     "mulps %%xmm2,%%xmm1 # right = sample * pan_right\n\t"
331     "mulps %%xmm4,%%xmm0 # left = vca * (sample * pan_left)\n\t"
332     "mulps %%xmm4,%%xmm1 # right = vca * (sample * pan_right)\n\t"
333     : /* no output */
334     : "r" (&Volume[ii]), /* %0 */
335     "r" (PanL), /* %1 */
336     "r" (PanR) /* %2 */
337     : "xmm0", /* holds final left sample (for the 4 samples) at the end */
338     "xmm1" /* holds final right sample (for the 4 samples) at the end */
339     );
340     break;
341     }
342     case STEREO: {
343     GetNext4SamplesStereoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2 (left channel) and xmm3 (right channel)
344     if (USEFILTER) {
345     __asm__ __volatile__ (
346     "movaps %xmm2,%xmm0 # prepare left channel for filter\n\t"
347     "movaps %xmm3,%xmm1 # save right channel not to get overwritten by filter algorithms\n\t"
348     );
349     FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
350     __asm__ __volatile__ (
351     "movaps %xmm1,%xmm0 # prepare right channel for filter\n\t"
352     "movaps %xmm7,%xmm1 # save filter output for left channel\n\t"
353     );
354     FilterR.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
355     __asm__ __volatile__ (
356     "movaps %xmm1,%xmm2 # result left channel -> xmm2\n\t"
357     "movaps %xmm7,%xmm3 # result right channel -> xmm3\n\t"
358     );
359     }
360     /* apply panorama and volume factors */
361     __asm__ __volatile__ (
362     "movss (%1),%%xmm0 # load pan left\n\t"
363     "movss (%2),%%xmm1 # load pan right\n\t"
364     "movaps (%0),%%xmm4 # load vca\n\t"
365     "shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t"
366     "shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t"
367     "mulps %%xmm2,%%xmm0 # left = sample_left * pan_left\n\t"
368     "mulps %%xmm3,%%xmm1 # right = sample_right * pan_right\n\t"
369     "mulps %%xmm4,%%xmm0 # left = vca * (sample_left * pan_left)\n\t"
370     "mulps %%xmm4,%%xmm1 # right = vca * (sample_right * pan_right)\n\t"
371     : /* no output */
372     : "r" (&Volume[ii]), /* %0 */
373     "r" (PanL), /* %1 */
374     "r" (PanR) /* %2 */
375     );
376     break;
377     }
378     }
379     /* mix the 4 samples to the output channels */
380     __asm__ __volatile__ (
381     "addps (%0),%%xmm0 # mix calculated sample(s) to output left\n\t"
382     "movaps %%xmm0,(%0) # output to left channel\n\t"
383     "addps (%1),%%xmm1 # mix calculated sample(s) to output right\n\t"
384     "movaps %%xmm1,(%1) # output to right channel\n\t"
385     : /* no output */
386     : "r" (&pOutL[ii]), /* %0 - must be 16 byte aligned ! */
387     "r" (&pOutR[ii]) /* %1 - must be 16 byte aligned ! */
388     );
389     }
390 schoenebeck 617 #endif // CONFIG_ASM && ARCH_X86
391 schoenebeck 320 }
392     }
393     };
394    
395     }} // namespace LinuxSampler::gig
396    
397     #endif // __LS_GIG_SYNTHESIZER_H__

  ViewVC Help
Powered by ViewVC