/[svn]/linuxsampler/trunk/src/engines/gig/Synthesizer.h
ViewVC logotype

Contents of /linuxsampler/trunk/src/engines/gig/Synthesizer.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 738 - (show annotations) (download) (as text)
Tue Aug 16 17:14:25 2005 UTC (18 years, 8 months ago) by schoenebeck
File MIME type: text/x-c++hdr
File size: 23140 byte(s)
* extensive synthesis optimization: reimplementation of EGs and LFO(s),
  removed synthesis parameter prerendering and the synthesis parameter
  matrix in general, splitting each audio fragment into subfragments now
  where each subfragment uses constant synthesis parameters
  (everything's still very buggy ATM)

1 /***************************************************************************
2 * *
3 * LinuxSampler - modular, streaming capable sampler *
4 * *
5 * Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6 * Copyright (C) 2005 Christian Schoenebeck *
7 * *
8 * This program is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published by *
10 * the Free Software Foundation; either version 2 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This program is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU General Public License *
19 * along with this program; if not, write to the Free Software *
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21 * MA 02111-1307 USA *
22 ***************************************************************************/
23
24 #ifndef __LS_GIG_SYNTHESIZER_H__
25 #define __LS_GIG_SYNTHESIZER_H__
26
27 #include "../../common/global.h"
28 #include "../../common/RTMath.h"
29 #include "../common/Resampler.h"
30 #include "../common/BiquadFilter.h"
31 #include "Filter.h"
32 #include "Voice.h"
33
34
35 #define SYNTHESIS_MODE_SET_INTERPOLATE(iMode,bVal) if (bVal) iMode |= 0x01; else iMode &= ~0x01 /* (un)set mode bit 0 */
36 #define SYNTHESIS_MODE_SET_FILTER(iMode,bVal) if (bVal) iMode |= 0x02; else iMode &= ~0x02 /* (un)set mode bit 1 */
37 #define SYNTHESIS_MODE_SET_LOOP(iMode,bVal) if (bVal) iMode |= 0x04; else iMode &= ~0x04 /* (un)set mode bit 2 */
38 #define SYNTHESIS_MODE_SET_CHANNELS(iMode,bVal) if (bVal) iMode |= 0x08; else iMode &= ~0x08 /* (un)set mode bit 3 */
39 #define SYNTHESIS_MODE_SET_IMPLEMENTATION(iMode,bVal) if (bVal) iMode |= 0x10; else iMode &= ~0x10 /* (un)set mode bit 4 */
40 #define SYNTHESIS_MODE_SET_PROFILING(iMode,bVal) if (bVal) iMode |= 0x20; else iMode &= ~0x20 /* (un)set mode bit 5 */
41
42 #define SYNTHESIS_MODE_GET_INTERPOLATE(iMode) iMode & 0x01
43 #define SYNTHESIS_MODE_GET_FILTER(iMode) iMode & 0x02
44 #define SYNTHESIS_MODE_GET_LOOP(iMode) iMode & 0x04
45 #define SYNTHESIS_MODE_GET_CHANNELS(iMode) iMode & 0x08
46 #define SYNTHESIS_MODE_GET_IMPLEMENTATION(iMode) iMode & 0x10
47
48 // that's usually gig::Voice of course, but we make it a macro so we can
49 // include this code for our synthesis benchmark which uses fake data
50 // structures
51 #ifndef VOICE
52 # define VOICE Voice
53 #endif // VOICE
54
55 namespace LinuxSampler { namespace gig {
56
57 typedef void SynthesizeFragment_Fn(VOICE&, uint, sample_t*, uint);
58
59 void* GetSynthesisFunction(const int SynthesisMode);
60 void RunSynthesisFunction(const int SynthesisMode, VOICE& voice, uint Samples, sample_t* pSrc, uint Skip);
61
62 enum channels_t {
63 MONO,
64 STEREO
65 };
66
67 /** @brief Main Synthesis algorithms for the gig::Engine
68 *
69 * Implementation of the main synthesis algorithms of the Gigasampler
70 * format capable sampler engine. This means resampling / interpolation
71 * for pitching the audio signal, looping, filter and amplification.
72 */
73 template<implementation_t IMPLEMENTATION, channels_t CHANNELS, bool DOLOOP, bool USEFILTER, bool INTERPOLATE>
74 class Synthesizer : public __RTMath<IMPLEMENTATION>, public LinuxSampler::Resampler<INTERPOLATE> {
75
76 // declarations of derived functions (see "Name lookup,
77 // templates, and accessing members of base classes" in
78 // the gcc manual for an explanation of why this is
79 // needed).
80 using __RTMath<IMPLEMENTATION>::Mul;
81 using __RTMath<IMPLEMENTATION>::Float;
82 using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleMonoCPP;
83 using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleStereoCPP;
84 #if CONFIG_ASM && ARCH_X86
85 using LinuxSampler::Resampler<INTERPOLATE>::GetNext4SamplesMonoMMXSSE;
86 using LinuxSampler::Resampler<INTERPOLATE>::GetNext4SamplesStereoMMXSSE;
87 #endif
88
89 public:
90 /**
91 * Render audio for the current fragment for the given voice.
92 * This is the toplevel method of this class.
93 */
94 template<typename VOICE_T>
95 inline static void SynthesizeSubFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, uint i) {
96 const float panLeft = Mul(Voice.fFinalVolume, Mul(Voice.PanLeft, Voice.pEngineChannel->GlobalPanLeft));
97 const float panRight = Mul(Voice.fFinalVolume, Mul(Voice.PanRight, Voice.pEngineChannel->GlobalPanRight));
98 if (IMPLEMENTATION == ASM_X86_MMX_SSE) {
99 float fPos = (float) Voice.Pos;
100 SynthesizeSubFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
101 Voice.pSample->LoopStart,
102 Voice.pSample->LoopEnd,
103 Voice.pSample->LoopSize,
104 Voice.LoopCyclesLeft,
105 (void *)&fPos,
106 &Voice.fFinalPitch,
107 &panLeft, &panRight);
108 #if CONFIG_ASM && ARCH_X86
109 if (INTERPOLATE) EMMS;
110 #endif
111 Voice.Pos = (double) fPos;
112 } else {
113 SynthesizeSubFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
114 Voice.pSample->LoopStart,
115 Voice.pSample->LoopEnd,
116 Voice.pSample->LoopSize,
117 Voice.LoopCyclesLeft,
118 (void *)&Voice.Pos,
119 &Voice.fFinalPitch,
120 &panLeft, &panRight);
121 }
122 }
123
124 //protected:
125
126 /**
127 * Render audio for the current fragment for the given voice.
128 * Will be called by the toplevel SynthesizeFragment() method.
129 */
130 template<typename VOICE_T>
131 inline static void SynthesizeSubFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, uint& i, uint& LoopPlayCount, uint LoopStart, uint LoopEnd, uint LoopSize, uint& LoopCyclesLeft, void* Pos, const float* Pitch, const float* PanLeft, const float* PanRight) {
132 const float loopEnd = Float(LoopEnd);
133 const float f_LoopStart = Float(LoopStart);
134 const float f_LoopSize = Float(LoopSize);
135 if (DOLOOP) {
136 if (LoopPlayCount) {
137 // render loop (loop count limited)
138 while (i < Samples && LoopCyclesLeft) {
139 const uint processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd,Pos, *Pitch) + 1); //TODO: instead of +1 we could also round up
140 while (i < processEnd) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
141 LoopCyclesLeft -= WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos);
142 }
143 // render on without loop
144 while (i < Samples) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
145 }
146 else { // render loop (endless loop)
147 while (i < Samples) {
148 const uint processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd, Pos, *Pitch) + 1); //TODO: instead of +1 we could also round up
149 while (i < processEnd) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
150 WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos);
151 }
152 }
153 }
154 else { // no looping
155 while (i < Samples) { Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight); }
156 }
157 }
158
159 /**
160 * Atomicly render a piece for the voice. For the C++
161 * implementation this means rendering exactly one sample
162 * point, whereas for the MMX/SSE implementation this means
163 * rendering 4 sample points.
164 */
165 template<typename VOICE_T>
166 inline static void Synthesize(VOICE_T& Voice, void* Pos, sample_t* pSrc, uint& i, const float* PanLeft, const float* PanRight) {
167 Synthesize(pSrc, Pos,
168 Voice.fFinalPitch,
169 Voice.pEngineChannel->pOutputLeft,
170 Voice.pEngineChannel->pOutputRight,
171 i,
172 PanLeft,
173 PanRight,
174 Voice.FilterLeft,
175 Voice.FilterRight);
176 }
177
178 /**
179 * Returns the difference to the sample's loop end.
180 */
181 inline static int DiffToLoopEnd(const float& LoopEnd, const void* Pos, const float& Pitch) {
182 switch (IMPLEMENTATION) {
183 #if CONFIG_ASM && ARCH_X86
184 case ASM_X86_MMX_SSE: {
185 int result;
186 __asm__ __volatile__ (
187 "movss (%1), %%xmm0 #read loopend\n\t"
188 "subss (%2), %%xmm0 #sub pos\n\t"
189 "divss (%3), %%xmm0 #div by pitch\n\t"
190 "cvtss2si %%xmm0, %0 #convert to int\n\t"
191 : "=r" (result) /* %0 */
192 : "r" (&LoopEnd), /* %1 */
193 "r" (Pos), /* %2 */
194 "r" (&Pitch) /* %3 */
195 );
196 return result;
197 }
198 #endif // CONFIG_ASM && ARCH_X86
199 // pure C++ implementation (thus platform independent)
200 default: {
201 return uint((LoopEnd - *((double *)Pos)) / Pitch);
202 }
203 }
204 }
205
206 //TODO: this method is not in use yet, it's intended to be used for pitch=x.0f where we could use integer instead of float as playback position variable
207 inline static int WrapLoop(const int& LoopStart, const int& LoopSize, const int& LoopEnd, int& Pos) {
208 switch (IMPLEMENTATION) {
209 // pure C++ implementation (thus platform independent)
210 default: { //TODO: we can easily eliminate the branch here
211 if (Pos < LoopEnd) return 0;
212 Pos = (Pos - LoopEnd) % LoopSize + LoopStart;
213 return 1;
214 }
215 }
216 }
217
218 /**
219 * This method handles looping of the RAM playback part of the
220 * sample, thus repositioning the playback position once the
221 * loop limit was reached. Note: looping of the disk streaming
222 * part is handled by libgig (ReadAndLoop() method which will
223 * be called by the DiskThread).
224 */
225 inline static int WrapLoop(const float& LoopStart, const float& LoopSize, const float& LoopEnd, void* vPos) {
226 switch (IMPLEMENTATION) {
227 #if CONFIG_ASM && ARCH_X86
228 case ASM_X86_MMX_SSE: {
229 int result = 0;
230 __asm__ __volatile__ (
231 "movss (%2), %%xmm0 # load LoopEnd\n\t"
232 "movss (%1), %%xmm1 # load Pos\n\t"
233 "comiss %%xmm0, %%xmm1 # LoopEnd <> Pos\n\t"
234 "jb 1f # jump if no work needs to be done\n\t"
235 "movss (%3), %%xmm2 # load LoopSize\n\t"
236 "subss %%xmm0, %%xmm1 # Pos - LoopEnd\n\t"
237 //now the fmodf
238 "movss %%xmm1, %%xmm3 # xmm3 = (Pos - LoopEnd)\n\t"
239 "divss %%xmm2, %%xmm1 # (Pos - LoopEnd) / LoopSize\n\t"
240 "cvttss2si %%xmm1, %2 # convert to int\n\t"
241 "cvtsi2ss %2, %%xmm1 # convert back to float\n\t"
242 "movss (%4), %%xmm0 # load LoopStart\n\t"
243 "mulss %%xmm2, %%xmm1 # LoopSize * int((Pos-LoopEnd)/LoopSize)\n\t"
244 "subss %%xmm1, %%xmm3 # xmm2 = fmodf(Pos - LoopEnd, LoopSize)\n\t"
245 //done with fmodf
246 "addss %%xmm0, %%xmm3 # add LoopStart\n\t"
247 "movss %%xmm3, (%1) # update Pos\n\t"
248 "movl $1, (%0) # result = 1\n\t"
249 ".balign 16 \n\t"
250 "1:\n\t"
251 :: "r" (&result), /* %0 */
252 "r" (vPos), /* %1 */
253 "r" (&LoopEnd), /* %2 */
254 "r" (&LoopSize), /* %3 */
255 "r" (&LoopStart) /* %4 */
256 );
257 return result;
258 }
259 #endif // CONFIG_ASM && ARCH_X86
260 // pure C++ implementation (thus platform independent)
261 default: {
262 double * Pos = (double *)vPos;
263 if (*Pos < LoopEnd) return 0;
264 *Pos = fmod(*Pos - LoopEnd, LoopSize) + LoopStart;
265 return 1;
266 }
267 }
268 }
269
270 /**
271 * Atomicly render a piece for the voice. For the C++
272 * implementation this means rendering exactly one sample
273 * point, whereas for the MMX/SSE implementation this means
274 * rendering 4 sample points.
275 */
276 inline static void Synthesize(sample_t* pSrc, void* Pos, float& Pitch, float* pOutL, float* pOutR, uint& i, const float* PanL, const float* PanR, Filter& FilterL, Filter& FilterR) {
277 switch (IMPLEMENTATION) {
278 // pure C++ implementation (thus platform independent)
279 case CPP: {
280 switch (CHANNELS) {
281 case MONO: {
282 float samplePoint = GetNextSampleMonoCPP(pSrc, (double *)Pos, Pitch);
283 if (USEFILTER) samplePoint = FilterL.Apply(samplePoint);
284 pOutL[i] += samplePoint * *PanL;
285 pOutR[i] += samplePoint * *PanR;
286 i++;
287 break;
288 }
289 case STEREO: {
290 stereo_sample_t samplePoint = GetNextSampleStereoCPP(pSrc, (double *)Pos, Pitch);
291 if (USEFILTER) {
292 samplePoint.left = FilterL.Apply(samplePoint.left);
293 samplePoint.right = FilterR.Apply(samplePoint.right);
294 }
295 pOutL[i] += samplePoint.left * *PanL;
296 pOutR[i] += samplePoint.right * *PanR;
297 i++;
298 break;
299 }
300 }
301 break;
302 }
303 #if CONFIG_ASM && ARCH_X86
304 // Assembly optimization using the MMX & SSE(1) instruction set (thus only for x86)
305 case ASM_X86_MMX_SSE: {
306 const int ii = i & 0xfffffffc;
307 i += 4;
308 switch (CHANNELS) {
309 case MONO: {
310 GetNext4SamplesMonoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2
311 if (USEFILTER) {
312 /* prepare filter input */
313 __asm__ __volatile__ (
314 "movaps %xmm2,%xmm0"
315 );
316 FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
317 __asm__ __volatile__ (
318 "movaps %xmm7,%xmm2 # mono filter result -> xmm2"
319 );
320 }
321 /* apply panorama and volume factors */
322 __asm__ __volatile__ (
323 "movss (%1),%%xmm0 # load pan left\n\t"
324 "movss (%2),%%xmm1 # load pan right\n\t"
325 "movaps (%0),%%xmm4 # load vca\n\t"
326 "shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t"
327 "shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t"
328 "mulps %%xmm2,%%xmm0 # left = sample * pan_left\n\t"
329 "mulps %%xmm2,%%xmm1 # right = sample * pan_right\n\t"
330 "mulps %%xmm4,%%xmm0 # left = vca * (sample * pan_left)\n\t"
331 "mulps %%xmm4,%%xmm1 # right = vca * (sample * pan_right)\n\t"
332 : /* no output */
333 : "r" (&Volume[ii]), /* %0 */
334 "r" (PanL), /* %1 */
335 "r" (PanR) /* %2 */
336 : "xmm0", /* holds final left sample (for the 4 samples) at the end */
337 "xmm1" /* holds final right sample (for the 4 samples) at the end */
338 );
339 break;
340 }
341 case STEREO: {
342 GetNext4SamplesStereoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2 (left channel) and xmm3 (right channel)
343 if (USEFILTER) {
344 __asm__ __volatile__ (
345 "movaps %xmm2,%xmm0 # prepare left channel for filter\n\t"
346 "movaps %xmm3,%xmm1 # save right channel not to get overwritten by filter algorithms\n\t"
347 );
348 FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
349 __asm__ __volatile__ (
350 "movaps %xmm1,%xmm0 # prepare right channel for filter\n\t"
351 "movaps %xmm7,%xmm1 # save filter output for left channel\n\t"
352 );
353 FilterR.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
354 __asm__ __volatile__ (
355 "movaps %xmm1,%xmm2 # result left channel -> xmm2\n\t"
356 "movaps %xmm7,%xmm3 # result right channel -> xmm3\n\t"
357 );
358 }
359 /* apply panorama and volume factors */
360 __asm__ __volatile__ (
361 "movss (%1),%%xmm0 # load pan left\n\t"
362 "movss (%2),%%xmm1 # load pan right\n\t"
363 "movaps (%0),%%xmm4 # load vca\n\t"
364 "shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t"
365 "shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t"
366 "mulps %%xmm2,%%xmm0 # left = sample_left * pan_left\n\t"
367 "mulps %%xmm3,%%xmm1 # right = sample_right * pan_right\n\t"
368 "mulps %%xmm4,%%xmm0 # left = vca * (sample_left * pan_left)\n\t"
369 "mulps %%xmm4,%%xmm1 # right = vca * (sample_right * pan_right)\n\t"
370 : /* no output */
371 : "r" (&Volume[ii]), /* %0 */
372 "r" (PanL), /* %1 */
373 "r" (PanR) /* %2 */
374 );
375 break;
376 }
377 }
378 /* mix the 4 samples to the output channels */
379 __asm__ __volatile__ (
380 "addps (%0),%%xmm0 # mix calculated sample(s) to output left\n\t"
381 "movaps %%xmm0,(%0) # output to left channel\n\t"
382 "addps (%1),%%xmm1 # mix calculated sample(s) to output right\n\t"
383 "movaps %%xmm1,(%1) # output to right channel\n\t"
384 : /* no output */
385 : "r" (&pOutL[ii]), /* %0 - must be 16 byte aligned ! */
386 "r" (&pOutR[ii]) /* %1 - must be 16 byte aligned ! */
387 );
388 }
389 #endif // CONFIG_ASM && ARCH_X86
390 }
391 }
392 };
393
394 }} // namespace LinuxSampler::gig
395
396 #endif // __LS_GIG_SYNTHESIZER_H__

  ViewVC Help
Powered by ViewVC