/[svn]/linuxsampler/trunk/src/engines/gig/Synthesizer.h
ViewVC logotype

Contents of /linuxsampler/trunk/src/engines/gig/Synthesizer.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 498 - (show annotations) (download) (as text)
Sun Apr 10 18:37:45 2005 UTC (19 years ago) by schoenebeck
File MIME type: text/x-c++hdr
File size: 21763 byte(s)
* fixed bug in MMX/SSE(1) implementation of the sample loop code (caused
  RAM-only voices to crash when entering the sample's loop section under
  certain conditions)

1 /***************************************************************************
2 * *
3 * LinuxSampler - modular, streaming capable sampler *
4 * *
5 * Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6 * Copyright (C) 2005 Christian Schoenebeck *
7 * *
8 * This program is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published by *
10 * the Free Software Foundation; either version 2 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This program is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU General Public License *
19 * along with this program; if not, write to the Free Software *
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21 * MA 02111-1307 USA *
22 ***************************************************************************/
23
24 #ifndef __LS_GIG_SYNTHESIZER_H__
25 #define __LS_GIG_SYNTHESIZER_H__
26
27 #include "../../common/global.h"
28 #include "../../common/RTMath.h"
29 #include "../common/Resampler.h"
30 #include "../common/BiquadFilter.h"
31 #include "Filter.h"
32 #include "Voice.h"
33
34 #define SYNTHESIS_MODE_SET_CONSTPITCH(iMode,bVal) if (bVal) iMode |= 0x01; else iMode &= ~0x01 /* (un)set mode bit 0 */
35 #define SYNTHESIS_MODE_SET_LOOP(iMode,bVal) if (bVal) iMode |= 0x02; else iMode &= ~0x02 /* (un)set mode bit 1 */
36 #define SYNTHESIS_MODE_SET_INTERPOLATE(iMode,bVal) if (bVal) iMode |= 0x04; else iMode &= ~0x04 /* (un)set mode bit 2 */
37 #define SYNTHESIS_MODE_SET_FILTER(iMode,bVal) if (bVal) iMode |= 0x08; else iMode &= ~0x08 /* (un)set mode bit 3 */
38 #define SYNTHESIS_MODE_SET_CHANNELS(iMode,bVal) if (bVal) iMode |= 0x10; else iMode &= ~0x10 /* (un)set mode bit 4 */
39 #define SYNTHESIS_MODE_SET_IMPLEMENTATION(iMode,bVal) if (bVal) iMode |= 0x20; else iMode &= ~0x20 /* (un)set mode bit 5 */
40 #define SYNTHESIS_MODE_SET_PROFILING(iMode,bVal) if (bVal) iMode |= 0x40; else iMode &= ~0x40 /* (un)set mode bit 6 */
41
42 #define SYNTHESIS_MODE_GET_CONSTPITCH(iMode) iMode & 0x01
43 #define SYNTHESIS_MODE_GET_LOOP(iMode) iMode & 0x02
44 #define SYNTHESIS_MODE_GET_INTERPOLATE(iMode) iMode & 0x04
45 #define SYNTHESIS_MODE_GET_FILTER(iMode) iMode & 0x08
46 #define SYNTHESIS_MODE_GET_CHANNELS(iMode) iMode & 0x10
47 #define SYNTHESIS_MODE_GET_IMPLEMENTATION(iMode) iMode & 0x20
48
49 // that's usually gig::Voice of course, but we make it a macro so we can
50 // include this code for our synthesis benchmark which uses fake data
51 // structures
52 #ifndef VOICE
53 # define VOICE Voice
54 #endif // VOICE
55
56 namespace LinuxSampler { namespace gig {
57
58 typedef void SynthesizeFragment_Fn(VOICE&, uint, sample_t*, uint);
59
60 void* GetSynthesisFunction(const int SynthesisMode);
61 void RunSynthesisFunction(const int SynthesisMode, VOICE& voice, uint Samples, sample_t* pSrc, uint Skip);
62
63 enum channels_t {
64 MONO,
65 STEREO
66 };
67
68 template<implementation_t IMPLEMENTATION, channels_t CHANNELS, bool USEFILTER, bool INTERPOLATE, bool DOLOOP, bool CONSTPITCH>
69 class Synthesizer : public __RTMath<IMPLEMENTATION>, public LinuxSampler::Resampler<INTERPOLATE> {
70
71 // declarations of derived functions (see "Name lookup,
72 // templates, and accessing members of base classes" in
73 // the gcc manual for an explanation of why this is
74 // needed).
75 using __RTMath<IMPLEMENTATION>::Mul;
76 using __RTMath<IMPLEMENTATION>::Float;
77 using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleMonoCPP;
78 using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleStereoCPP;
79 #if ARCH_X86
80 using LinuxSampler::Resampler<INTERPOLATE>::GetNext4SamplesMonoMMXSSE;
81 using LinuxSampler::Resampler<INTERPOLATE>::GetNext4SamplesStereoMMXSSE;
82 #endif
83
84 public:
85 template<typename VOICE_T>
86 inline static void SynthesizeFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, uint i) {
87 const float panLeft = Mul(Voice.PanLeft, Voice.pEngineChannel->GlobalPanLeft);
88 const float panRight = Mul(Voice.PanRight, Voice.pEngineChannel->GlobalPanRight);
89 if (IMPLEMENTATION == ASM_X86_MMX_SSE) {
90 float fPos = (float) Voice.Pos;
91 SynthesizeFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
92 Voice.pSample->LoopStart,
93 Voice.pSample->LoopEnd,
94 Voice.pSample->LoopSize,
95 Voice.LoopCyclesLeft,
96 (void *)&fPos,
97 Voice.PitchBase,
98 Voice.PitchBend,
99 &panLeft, &panRight);
100 #if ARCH_X86
101 if (INTERPOLATE) EMMS;
102 #endif
103 Voice.Pos = (double) fPos;
104 } else {
105 SynthesizeFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount,
106 Voice.pSample->LoopStart,
107 Voice.pSample->LoopEnd,
108 Voice.pSample->LoopSize,
109 Voice.LoopCyclesLeft,
110 (void *)&Voice.Pos,
111 Voice.PitchBase,
112 Voice.PitchBend,
113 &panLeft, &panRight);
114 }
115 }
116
117 //protected:
118
119 template<typename VOICE_T>
120 inline static void SynthesizeFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, uint& i, uint& LoopPlayCount, uint LoopStart, uint LoopEnd, uint LoopSize, uint& LoopCyclesLeft, void* Pos, float& PitchBase, float& PitchBend, const float* PanLeft, const float* PanRight) {
121 const float loopEnd = Float(LoopEnd);
122 const float PBbyPB = Mul(PitchBase, PitchBend);
123 const float f_LoopStart = Float(LoopStart);
124 const float f_LoopSize = Float(LoopSize);
125 if (DOLOOP) {
126 if (LoopPlayCount) {
127 // render loop (loop count limited)
128 while (i < Samples && LoopCyclesLeft) {
129 if (CONSTPITCH) {
130 const uint processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd,Pos, PBbyPB) + 1); //TODO: instead of +1 we could also round up
131 while (i < processEnd) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
132 }
133 else Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
134 if (WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos)) LoopCyclesLeft--;
135 }
136 // render on without loop
137 while (i < Samples) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
138 }
139 else { // render loop (endless loop)
140 while (i < Samples) {
141 if (CONSTPITCH) {
142 const uint processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd, Pos, PBbyPB) + 1); //TODO: instead of +1 we could also round up
143 while (i < processEnd) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
144 }
145 else Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);
146 WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos);
147 }
148 }
149 }
150 else { // no looping
151 while (i < Samples) { Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight);}
152 }
153 }
154
155 template<typename VOICE_T>
156 inline static void Synthesize(VOICE_T& Voice, void* Pos, sample_t* pSrc, uint& i, const float* PanLeft, const float* PanRight) {
157 Synthesize(pSrc, Pos,
158 Voice.pEngine->pSynthesisParameters[Event::destination_vco][i],
159 Voice.pEngineChannel->pOutputLeft,
160 Voice.pEngineChannel->pOutputRight,
161 i,
162 Voice.pEngine->pSynthesisParameters[Event::destination_vca],
163 PanLeft,
164 PanRight,
165 Voice.FilterLeft,
166 Voice.FilterRight,
167 Voice.pEngine->pBasicFilterParameters[i],
168 Voice.pEngine->pMainFilterParameters[i]);
169 }
170
171 inline static int DiffToLoopEnd(const float& LoopEnd, const void* Pos, const float& Pitch) {
172 switch (IMPLEMENTATION) {
173 // pure C++ implementation (thus platform independent)
174 case CPP: {
175 return uint((LoopEnd - *((double *)Pos)) / Pitch);
176 }
177 #if ARCH_X86
178 case ASM_X86_MMX_SSE: {
179 int result;
180 __asm__ __volatile__ (
181 "movss (%1), %%xmm0 #read loopend\n\t"
182 "subss (%2), %%xmm0 #sub pos\n\t"
183 "divss (%3), %%xmm0 #div by pitch\n\t"
184 "cvtss2si %%xmm0, %0 #convert to int\n\t"
185 : "=r" (result) /* %0 */
186 : "r" (&LoopEnd), /* %1 */
187 "r" (Pos), /* %2 */
188 "r" (&Pitch) /* %3 */
189 );
190 return result;
191 }
192 #endif // ARCH_X86
193 }
194 }
195
196 inline static int WrapLoop(const float& LoopStart, const float& LoopSize, const float& LoopEnd, void* vPos) {
197 switch (IMPLEMENTATION) {
198 // pure C++ implementation (thus platform independent)
199 case CPP: {
200 double * Pos = (double *)vPos;
201 if (*Pos < LoopEnd) return 0;
202 *Pos = fmod(*Pos - LoopEnd, LoopSize) + LoopStart;
203 return 1;
204 }
205 #if ARCH_X86
206 case ASM_X86_MMX_SSE: {
207 int result = 0;
208 __asm__ __volatile__ (
209 "movss (%2), %%xmm0 # load LoopEnd\n\t"
210 "movss (%1), %%xmm1 # load Pos\n\t"
211 "comiss %%xmm0, %%xmm1 # LoopEnd <> Pos\n\t"
212 "jb 1f # jump if no work needs to be done\n\t"
213 "movss (%3), %%xmm2 # load LoopSize\n\t"
214 "subss %%xmm0, %%xmm1 # Pos - LoopEnd\n\t"
215 //now the fmodf
216 "movss %%xmm1, %%xmm3 # xmm3 = (Pos - LoopEnd)\n\t"
217 "divss %%xmm2, %%xmm1 # (Pos - LoopEnd) / LoopSize\n\t"
218 "cvttss2si %%xmm1, %2 # convert to int\n\t"
219 "cvtsi2ss %2, %%xmm1 # convert back to float\n\t"
220 "movss (%4), %%xmm0 # load LoopStart\n\t"
221 "mulss %%xmm2, %%xmm1 # LoopSize * int((Pos-LoopEnd)/LoopSize)\n\t"
222 "subss %%xmm1, %%xmm3 # xmm2 = fmodf(Pos - LoopEnd, LoopSize)\n\t"
223 //done with fmodf
224 "addss %%xmm0, %%xmm3 # add LoopStart\n\t"
225 "movss %%xmm3, (%1) # update Pos\n\t"
226 "movl $1, (%0) # result = 1\n\t"
227 ".balign 16 \n\t"
228 "1:\n\t"
229 :: "r" (&result), /* %0 */
230 "r" (vPos), /* %1 */
231 "r" (&LoopEnd), /* %2 */
232 "r" (&LoopSize), /* %3 */
233 "r" (&LoopStart) /* %4 */
234 );
235 return result;
236 }
237 #endif // ARCH_X86
238 }
239 }
240
241 inline static void Synthesize(sample_t* pSrc, void* Pos, float& Pitch, float* pOutL, float* pOutR, uint& i, float* Volume, const float* PanL, const float* PanR, Filter& FilterL, Filter& FilterR, biquad_param_t& bqBase, biquad_param_t& bqMain) {
242 switch (IMPLEMENTATION) {
243 // pure C++ implementation (thus platform independent)
244 case CPP: {
245 switch (CHANNELS) {
246 case MONO: {
247 float samplePoint = GetNextSampleMonoCPP(pSrc, (double *)Pos, Pitch);
248 if (USEFILTER) samplePoint = FilterL.Apply(&bqBase, &bqMain, samplePoint);
249 pOutL[i] += samplePoint * Volume[i] * *PanL;
250 pOutR[i] += samplePoint * Volume[i] * *PanR;
251 i++;
252 break;
253 }
254 case STEREO: {
255 stereo_sample_t samplePoint = GetNextSampleStereoCPP(pSrc, (double *)Pos, Pitch);
256 if (USEFILTER) {
257 samplePoint.left = FilterL.Apply(&bqBase, &bqMain, samplePoint.left);
258 samplePoint.right = FilterR.Apply(&bqBase, &bqMain, samplePoint.right);
259 }
260 pOutL[i] += samplePoint.left * Volume[i] * *PanL;
261 pOutR[i] += samplePoint.right * Volume[i] * *PanR;
262 i++;
263 break;
264 }
265 }
266 break;
267 }
268 #if ARCH_X86
269 // Assembly optimization using the MMX & SSE(1) instruction set (thus only for x86)
270 case ASM_X86_MMX_SSE: {
271 const int ii = i & 0xfffffffc;
272 i += 4;
273 switch (CHANNELS) {
274 case MONO: {
275 GetNext4SamplesMonoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2
276 if (USEFILTER) {
277 /* prepare filter input */
278 __asm__ __volatile__ (
279 "movaps %xmm2,%xmm0"
280 );
281 FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
282 __asm__ __volatile__ (
283 "movaps %xmm7,%xmm2 # mono filter result -> xmm2"
284 );
285 }
286 /* apply panorama and volume factors */
287 __asm__ __volatile__ (
288 "movss (%1),%%xmm0 # load pan left\n\t"
289 "movss (%2),%%xmm1 # load pan right\n\t"
290 "movaps (%0),%%xmm4 # load vca\n\t"
291 "shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t"
292 "shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t"
293 "mulps %%xmm2,%%xmm0 # left = sample * pan_left\n\t"
294 "mulps %%xmm2,%%xmm1 # right = sample * pan_right\n\t"
295 "mulps %%xmm4,%%xmm0 # left = vca * (sample * pan_left)\n\t"
296 "mulps %%xmm4,%%xmm1 # right = vca * (sample * pan_right)\n\t"
297 : /* no output */
298 : "r" (&Volume[ii]), /* %0 */
299 "r" (PanL), /* %1 */
300 "r" (PanR) /* %2 */
301 : "xmm0", /* holds final left sample (for the 4 samples) at the end */
302 "xmm1" /* holds final right sample (for the 4 samples) at the end */
303 );
304 break;
305 }
306 case STEREO: {
307 GetNext4SamplesStereoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2 (left channel) and xmm3 (right channel)
308 if (USEFILTER) {
309 __asm__ __volatile__ (
310 "movaps %xmm2,%xmm0 # prepare left channel for filter\n\t"
311 "movaps %xmm3,%xmm1 # save right channel not to get overwritten by filter algorithms\n\t"
312 );
313 FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
314 __asm__ __volatile__ (
315 "movaps %xmm1,%xmm0 # prepare right channel for filter\n\t"
316 "movaps %xmm7,%xmm1 # save filter output for left channel\n\t"
317 );
318 FilterR.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output
319 __asm__ __volatile__ (
320 "movaps %xmm1,%xmm2 # result left channel -> xmm2\n\t"
321 "movaps %xmm7,%xmm3 # result right channel -> xmm3\n\t"
322 );
323 }
324 /* apply panorama and volume factors */
325 __asm__ __volatile__ (
326 "movss (%1),%%xmm0 # load pan left\n\t"
327 "movss (%2),%%xmm1 # load pan right\n\t"
328 "movaps (%0),%%xmm4 # load vca\n\t"
329 "shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t"
330 "shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t"
331 "mulps %%xmm2,%%xmm0 # left = sample_left * pan_left\n\t"
332 "mulps %%xmm3,%%xmm1 # right = sample_right * pan_right\n\t"
333 "mulps %%xmm4,%%xmm0 # left = vca * (sample_left * pan_left)\n\t"
334 "mulps %%xmm4,%%xmm1 # right = vca * (sample_right * pan_right)\n\t"
335 : /* no output */
336 : "r" (&Volume[ii]), /* %0 */
337 "r" (PanL), /* %1 */
338 "r" (PanR) /* %2 */
339 );
340 break;
341 }
342 }
343 /* mix the 4 samples to the output channels */
344 __asm__ __volatile__ (
345 "addps (%0),%%xmm0 # mix calculated sample(s) to output left\n\t"
346 "movaps %%xmm0,(%0) # output to left channel\n\t"
347 "addps (%1),%%xmm1 # mix calculated sample(s) to output right\n\t"
348 "movaps %%xmm1,(%1) # output to right channel\n\t"
349 : /* no output */
350 : "r" (&pOutL[ii]), /* %0 - must be 16 byte aligned ! */
351 "r" (&pOutR[ii]) /* %1 - must be 16 byte aligned ! */
352 );
353 }
354 #endif // ARCH_X86
355 }
356 }
357 };
358
359 }} // namespace LinuxSampler::gig
360
361 #endif // __LS_GIG_SYNTHESIZER_H__

  ViewVC Help
Powered by ViewVC