29 |
#include "../common/Resampler.h" |
#include "../common/Resampler.h" |
30 |
#include "../common/BiquadFilter.h" |
#include "../common/BiquadFilter.h" |
31 |
#include "Filter.h" |
#include "Filter.h" |
32 |
#include "Voice.h" |
#include "SynthesisParam.h" |
|
|
|
33 |
|
|
34 |
#define SYNTHESIS_MODE_SET_INTERPOLATE(iMode,bVal) if (bVal) iMode |= 0x01; else iMode &= ~0x01 /* (un)set mode bit 0 */ |
#define SYNTHESIS_MODE_SET_INTERPOLATE(iMode,bVal) if (bVal) iMode |= 0x01; else iMode &= ~0x01 /* (un)set mode bit 0 */ |
35 |
#define SYNTHESIS_MODE_SET_FILTER(iMode,bVal) if (bVal) iMode |= 0x02; else iMode &= ~0x02 /* (un)set mode bit 1 */ |
#define SYNTHESIS_MODE_SET_FILTER(iMode,bVal) if (bVal) iMode |= 0x02; else iMode &= ~0x02 /* (un)set mode bit 1 */ |
36 |
#define SYNTHESIS_MODE_SET_LOOP(iMode,bVal) if (bVal) iMode |= 0x04; else iMode &= ~0x04 /* (un)set mode bit 2 */ |
#define SYNTHESIS_MODE_SET_LOOP(iMode,bVal) if (bVal) iMode |= 0x04; else iMode &= ~0x04 /* (un)set mode bit 2 */ |
37 |
#define SYNTHESIS_MODE_SET_CHANNELS(iMode,bVal) if (bVal) iMode |= 0x08; else iMode &= ~0x08 /* (un)set mode bit 3 */ |
#define SYNTHESIS_MODE_SET_CHANNELS(iMode,bVal) if (bVal) iMode |= 0x08; else iMode &= ~0x08 /* (un)set mode bit 3 */ |
38 |
#define SYNTHESIS_MODE_SET_IMPLEMENTATION(iMode,bVal) if (bVal) iMode |= 0x10; else iMode &= ~0x10 /* (un)set mode bit 4 */ |
#define SYNTHESIS_MODE_SET_IMPLEMENTATION(iMode,bVal) if (bVal) iMode |= 0x10; else iMode &= ~0x10 /* (un)set mode bit 4 */ |
39 |
#define SYNTHESIS_MODE_SET_PROFILING(iMode,bVal) if (bVal) iMode |= 0x20; else iMode &= ~0x20 /* (un)set mode bit 5 */ |
#define SYNTHESIS_MODE_SET_PROFILING(iMode,bVal) if (bVal) iMode |= 0x20; else iMode &= ~0x20 /* (un)set mode bit 5 */ |
40 |
|
|
41 |
#define SYNTHESIS_MODE_GET_INTERPOLATE(iMode) iMode & 0x01 |
#define SYNTHESIS_MODE_GET_INTERPOLATE(iMode) iMode & 0x01 |
42 |
#define SYNTHESIS_MODE_GET_FILTER(iMode) iMode & 0x02 |
#define SYNTHESIS_MODE_GET_FILTER(iMode) iMode & 0x02 |
44 |
#define SYNTHESIS_MODE_GET_CHANNELS(iMode) iMode & 0x08 |
#define SYNTHESIS_MODE_GET_CHANNELS(iMode) iMode & 0x08 |
45 |
#define SYNTHESIS_MODE_GET_IMPLEMENTATION(iMode) iMode & 0x10 |
#define SYNTHESIS_MODE_GET_IMPLEMENTATION(iMode) iMode & 0x10 |
46 |
|
|
|
// that's usually gig::Voice of course, but we make it a macro so we can |
|
|
// include this code for our synthesis benchmark which uses fake data |
|
|
// structures |
|
|
#ifndef VOICE |
|
|
# define VOICE Voice |
|
|
#endif // VOICE |
|
|
|
|
47 |
namespace LinuxSampler { namespace gig { |
namespace LinuxSampler { namespace gig { |
48 |
|
|
49 |
typedef void SynthesizeFragment_Fn(VOICE&, uint, sample_t*, uint); |
typedef void SynthesizeFragment_Fn(SynthesisParam* pFinalParam, Loop* pLoop); |
50 |
|
|
51 |
void* GetSynthesisFunction(const int SynthesisMode); |
void* GetSynthesisFunction(const int SynthesisMode); |
52 |
void RunSynthesisFunction(const int SynthesisMode, VOICE& voice, uint Samples, sample_t* pSrc, uint Skip); |
void RunSynthesisFunction(const int SynthesisMode, SynthesisParam* pFinalParam, Loop* pLoop); |
53 |
|
|
54 |
enum channels_t { |
enum channels_t { |
55 |
MONO, |
MONO, |
62 |
* format capable sampler engine. This means resampling / interpolation |
* format capable sampler engine. This means resampling / interpolation |
63 |
* for pitching the audio signal, looping, filter and amplification. |
* for pitching the audio signal, looping, filter and amplification. |
64 |
*/ |
*/ |
65 |
template<implementation_t IMPLEMENTATION, channels_t CHANNELS, bool DOLOOP, bool USEFILTER, bool INTERPOLATE> |
template<channels_t CHANNELS, bool DOLOOP, bool USEFILTER, bool INTERPOLATE> |
66 |
class Synthesizer : public __RTMath<IMPLEMENTATION>, public LinuxSampler::Resampler<INTERPOLATE> { |
class Synthesizer : public __RTMath<CPP>, public LinuxSampler::Resampler<INTERPOLATE> { |
67 |
|
|
68 |
// declarations of derived functions (see "Name lookup, |
// declarations of derived functions (see "Name lookup, |
69 |
// templates, and accessing members of base classes" in |
// templates, and accessing members of base classes" in |
70 |
// the gcc manual for an explanation of why this is |
// the gcc manual for an explanation of why this is |
71 |
// needed). |
// needed). |
72 |
using __RTMath<IMPLEMENTATION>::Mul; |
using __RTMath<CPP>::Mul; |
73 |
using __RTMath<IMPLEMENTATION>::Float; |
using __RTMath<CPP>::Float; |
74 |
using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleMonoCPP; |
//using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleMonoCPP; |
75 |
using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleStereoCPP; |
//using LinuxSampler::Resampler<INTERPOLATE>::GetNextSampleStereoCPP; |
76 |
#if CONFIG_ASM && ARCH_X86 |
using LinuxSampler::Resampler<INTERPOLATE>::Interpolate1StepMonoCPP; |
77 |
using LinuxSampler::Resampler<INTERPOLATE>::GetNext4SamplesMonoMMXSSE; |
using LinuxSampler::Resampler<INTERPOLATE>::Interpolate1StepStereoCPP; |
|
using LinuxSampler::Resampler<INTERPOLATE>::GetNext4SamplesStereoMMXSSE; |
|
|
#endif |
|
78 |
|
|
79 |
public: |
public: |
|
/** |
|
|
* Render audio for the current fragment for the given voice. |
|
|
* This is the toplevel method of this class. |
|
|
*/ |
|
|
template<typename VOICE_T> |
|
|
inline static void SynthesizeSubFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, uint i) { |
|
|
const float panLeft = Mul(Voice.fFinalVolume, Mul(Voice.PanLeft, Voice.pEngineChannel->GlobalPanLeft)); |
|
|
const float panRight = Mul(Voice.fFinalVolume, Mul(Voice.PanRight, Voice.pEngineChannel->GlobalPanRight)); |
|
|
if (IMPLEMENTATION == ASM_X86_MMX_SSE) { |
|
|
float fPos = (float) Voice.Pos; |
|
|
SynthesizeSubFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount, |
|
|
Voice.pSample->LoopStart, |
|
|
Voice.pSample->LoopEnd, |
|
|
Voice.pSample->LoopSize, |
|
|
Voice.LoopCyclesLeft, |
|
|
(void *)&fPos, |
|
|
&Voice.fFinalPitch, |
|
|
&panLeft, &panRight); |
|
|
#if CONFIG_ASM && ARCH_X86 |
|
|
if (INTERPOLATE) EMMS; |
|
|
#endif |
|
|
Voice.Pos = (double) fPos; |
|
|
} else { |
|
|
SynthesizeSubFragment(Voice, Samples, pSrc, i, Voice.pSample->LoopPlayCount, |
|
|
Voice.pSample->LoopStart, |
|
|
Voice.pSample->LoopEnd, |
|
|
Voice.pSample->LoopSize, |
|
|
Voice.LoopCyclesLeft, |
|
|
(void *)&Voice.Pos, |
|
|
&Voice.fFinalPitch, |
|
|
&panLeft, &panRight); |
|
|
} |
|
|
} |
|
|
|
|
80 |
//protected: |
//protected: |
81 |
|
|
82 |
/** |
static void SynthesizeSubFragment(SynthesisParam* pFinalParam, Loop* pLoop) { |
|
* Render audio for the current fragment for the given voice. |
|
|
* Will be called by the toplevel SynthesizeFragment() method. |
|
|
*/ |
|
|
template<typename VOICE_T> |
|
|
inline static void SynthesizeSubFragment(VOICE_T& Voice, uint Samples, sample_t* pSrc, uint& i, uint& LoopPlayCount, uint LoopStart, uint LoopEnd, uint LoopSize, uint& LoopCyclesLeft, void* Pos, const float* Pitch, const float* PanLeft, const float* PanRight) { |
|
|
const float loopEnd = Float(LoopEnd); |
|
|
const float f_LoopStart = Float(LoopStart); |
|
|
const float f_LoopSize = Float(LoopSize); |
|
83 |
if (DOLOOP) { |
if (DOLOOP) { |
84 |
if (LoopPlayCount) { |
const float fLoopEnd = Float(pLoop->uiEnd); |
85 |
|
const float fLoopStart = Float(pLoop->uiStart); |
86 |
|
const float fLoopSize = Float(pLoop->uiSize); |
87 |
|
if (pLoop->uiTotalCycles) { |
88 |
// render loop (loop count limited) |
// render loop (loop count limited) |
89 |
while (i < Samples && LoopCyclesLeft) { |
for (; pFinalParam->uiToGo > 0 && pLoop->uiCyclesLeft; pLoop->uiCyclesLeft -= WrapLoop(fLoopStart, fLoopSize, fLoopEnd, &pFinalParam->dPos)) { |
90 |
const uint processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd,Pos, *Pitch) + 1); //TODO: instead of +1 we could also round up |
const uint uiToGo = Min(pFinalParam->uiToGo, DiffToLoopEnd(fLoopEnd, &pFinalParam->dPos, pFinalParam->fFinalPitch) + 1); //TODO: instead of +1 we could also round up |
91 |
while (i < processEnd) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight); |
SynthesizeSubSubFragment(pFinalParam, uiToGo); |
|
LoopCyclesLeft -= WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos); |
|
92 |
} |
} |
93 |
// render on without loop |
// render on without loop |
94 |
while (i < Samples) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight); |
SynthesizeSubSubFragment(pFinalParam, pFinalParam->uiToGo); |
95 |
} |
} else { // render loop (endless loop) |
96 |
else { // render loop (endless loop) |
for (; pFinalParam->uiToGo > 0; WrapLoop(fLoopStart, fLoopSize, fLoopEnd, &pFinalParam->dPos)) { |
97 |
while (i < Samples) { |
const uint uiToGo = Min(pFinalParam->uiToGo, DiffToLoopEnd(fLoopEnd, &pFinalParam->dPos, pFinalParam->fFinalPitch) + 1); //TODO: instead of +1 we could also round up |
98 |
const uint processEnd = Min(Samples, i + DiffToLoopEnd(loopEnd, Pos, *Pitch) + 1); //TODO: instead of +1 we could also round up |
SynthesizeSubSubFragment(pFinalParam, uiToGo); |
|
while (i < processEnd) Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight); |
|
|
WrapLoop(f_LoopStart, f_LoopSize, loopEnd, Pos); |
|
99 |
} |
} |
100 |
} |
} |
101 |
|
} else { // no looping |
102 |
|
SynthesizeSubSubFragment(pFinalParam, pFinalParam->uiToGo); |
103 |
} |
} |
|
else { // no looping |
|
|
while (i < Samples) { Synthesize(Voice, Pos, pSrc, i, PanLeft, PanRight); } |
|
|
} |
|
|
} |
|
|
|
|
|
/** |
|
|
* Atomicly render a piece for the voice. For the C++ |
|
|
* implementation this means rendering exactly one sample |
|
|
* point, whereas for the MMX/SSE implementation this means |
|
|
* rendering 4 sample points. |
|
|
*/ |
|
|
template<typename VOICE_T> |
|
|
inline static void Synthesize(VOICE_T& Voice, void* Pos, sample_t* pSrc, uint& i, const float* PanLeft, const float* PanRight) { |
|
|
Synthesize(pSrc, Pos, |
|
|
Voice.fFinalPitch, |
|
|
Voice.pEngineChannel->pOutputLeft, |
|
|
Voice.pEngineChannel->pOutputRight, |
|
|
i, |
|
|
PanLeft, |
|
|
PanRight, |
|
|
Voice.FilterLeft, |
|
|
Voice.FilterRight); |
|
104 |
} |
} |
105 |
|
|
106 |
/** |
/** |
107 |
* Returns the difference to the sample's loop end. |
* Returns the difference to the sample's loop end. |
108 |
*/ |
*/ |
109 |
inline static int DiffToLoopEnd(const float& LoopEnd, const void* Pos, const float& Pitch) { |
inline static int DiffToLoopEnd(const float& LoopEnd, const void* Pos, const float& Pitch) { |
110 |
switch (IMPLEMENTATION) { |
return uint((LoopEnd - *((double *)Pos)) / Pitch); |
|
#if CONFIG_ASM && ARCH_X86 |
|
|
case ASM_X86_MMX_SSE: { |
|
|
int result; |
|
|
__asm__ __volatile__ ( |
|
|
"movss (%1), %%xmm0 #read loopend\n\t" |
|
|
"subss (%2), %%xmm0 #sub pos\n\t" |
|
|
"divss (%3), %%xmm0 #div by pitch\n\t" |
|
|
"cvtss2si %%xmm0, %0 #convert to int\n\t" |
|
|
: "=r" (result) /* %0 */ |
|
|
: "r" (&LoopEnd), /* %1 */ |
|
|
"r" (Pos), /* %2 */ |
|
|
"r" (&Pitch) /* %3 */ |
|
|
); |
|
|
return result; |
|
|
} |
|
|
#endif // CONFIG_ASM && ARCH_X86 |
|
|
// pure C++ implementation (thus platform independent) |
|
|
default: { |
|
|
return uint((LoopEnd - *((double *)Pos)) / Pitch); |
|
|
} |
|
|
} |
|
111 |
} |
} |
112 |
|
|
113 |
|
#if 0 |
114 |
//TODO: this method is not in use yet, it's intended to be used for pitch=x.0f where we could use integer instead of float as playback position variable |
//TODO: this method is not in use yet, it's intended to be used for pitch=x.0f where we could use integer instead of float as playback position variable |
115 |
inline static int WrapLoop(const int& LoopStart, const int& LoopSize, const int& LoopEnd, int& Pos) { |
inline static int WrapLoop(const int& LoopStart, const int& LoopSize, const int& LoopEnd, int& Pos) { |
116 |
switch (IMPLEMENTATION) { |
//TODO: we can easily eliminate the branch here |
117 |
// pure C++ implementation (thus platform independent) |
if (Pos < LoopEnd) return 0; |
118 |
default: { //TODO: we can easily eliminate the branch here |
Pos = (Pos - LoopEnd) % LoopSize + LoopStart; |
119 |
if (Pos < LoopEnd) return 0; |
return 1; |
|
Pos = (Pos - LoopEnd) % LoopSize + LoopStart; |
|
|
return 1; |
|
|
} |
|
|
} |
|
120 |
} |
} |
121 |
|
#endif |
122 |
|
|
123 |
/** |
/** |
124 |
* This method handles looping of the RAM playback part of the |
* This method handles looping of the RAM playback part of the |
128 |
* be called by the DiskThread). |
* be called by the DiskThread). |
129 |
*/ |
*/ |
130 |
inline static int WrapLoop(const float& LoopStart, const float& LoopSize, const float& LoopEnd, void* vPos) { |
inline static int WrapLoop(const float& LoopStart, const float& LoopSize, const float& LoopEnd, void* vPos) { |
131 |
switch (IMPLEMENTATION) { |
double * Pos = (double *)vPos; |
132 |
#if CONFIG_ASM && ARCH_X86 |
if (*Pos < LoopEnd) return 0; |
133 |
case ASM_X86_MMX_SSE: { |
*Pos = fmod(*Pos - LoopEnd, LoopSize) + LoopStart; |
134 |
int result = 0; |
return 1; |
|
__asm__ __volatile__ ( |
|
|
"movss (%2), %%xmm0 # load LoopEnd\n\t" |
|
|
"movss (%1), %%xmm1 # load Pos\n\t" |
|
|
"comiss %%xmm0, %%xmm1 # LoopEnd <> Pos\n\t" |
|
|
"jb 1f # jump if no work needs to be done\n\t" |
|
|
"movss (%3), %%xmm2 # load LoopSize\n\t" |
|
|
"subss %%xmm0, %%xmm1 # Pos - LoopEnd\n\t" |
|
|
//now the fmodf |
|
|
"movss %%xmm1, %%xmm3 # xmm3 = (Pos - LoopEnd)\n\t" |
|
|
"divss %%xmm2, %%xmm1 # (Pos - LoopEnd) / LoopSize\n\t" |
|
|
"cvttss2si %%xmm1, %2 # convert to int\n\t" |
|
|
"cvtsi2ss %2, %%xmm1 # convert back to float\n\t" |
|
|
"movss (%4), %%xmm0 # load LoopStart\n\t" |
|
|
"mulss %%xmm2, %%xmm1 # LoopSize * int((Pos-LoopEnd)/LoopSize)\n\t" |
|
|
"subss %%xmm1, %%xmm3 # xmm2 = fmodf(Pos - LoopEnd, LoopSize)\n\t" |
|
|
//done with fmodf |
|
|
"addss %%xmm0, %%xmm3 # add LoopStart\n\t" |
|
|
"movss %%xmm3, (%1) # update Pos\n\t" |
|
|
"movl $1, (%0) # result = 1\n\t" |
|
|
".balign 16 \n\t" |
|
|
"1:\n\t" |
|
|
:: "r" (&result), /* %0 */ |
|
|
"r" (vPos), /* %1 */ |
|
|
"r" (&LoopEnd), /* %2 */ |
|
|
"r" (&LoopSize), /* %3 */ |
|
|
"r" (&LoopStart) /* %4 */ |
|
|
); |
|
|
return result; |
|
|
} |
|
|
#endif // CONFIG_ASM && ARCH_X86 |
|
|
// pure C++ implementation (thus platform independent) |
|
|
default: { |
|
|
double * Pos = (double *)vPos; |
|
|
if (*Pos < LoopEnd) return 0; |
|
|
*Pos = fmod(*Pos - LoopEnd, LoopSize) + LoopStart; |
|
|
return 1; |
|
|
} |
|
|
} |
|
135 |
} |
} |
136 |
|
|
137 |
/** |
static void SynthesizeSubSubFragment(SynthesisParam* pFinalParam, uint uiToGo) { |
138 |
* Atomicly render a piece for the voice. For the C++ |
switch (CHANNELS) { |
139 |
* implementation this means rendering exactly one sample |
case MONO: { |
140 |
* point, whereas for the MMX/SSE implementation this means |
if (INTERPOLATE) { |
141 |
* rendering 4 sample points. |
if (USEFILTER) { |
142 |
*/ |
Filter filterL = pFinalParam->filterLeft; |
143 |
inline static void Synthesize(sample_t* pSrc, void* Pos, float& Pitch, float* pOutL, float* pOutR, uint& i, const float* PanL, const float* PanR, Filter& FilterL, Filter& FilterR) { |
sample_t* pSrc = pFinalParam->pSrc; |
144 |
switch (IMPLEMENTATION) { |
double dPos = pFinalParam->dPos; |
145 |
// pure C++ implementation (thus platform independent) |
float fPitch = pFinalParam->fFinalPitch; |
146 |
case CPP: { |
float* pOutL = pFinalParam->pOutLeft; |
147 |
switch (CHANNELS) { |
float* pOutR = pFinalParam->pOutRight; |
148 |
case MONO: { |
float fVolumeL = pFinalParam->fFinalVolumeLeft; |
149 |
float samplePoint = GetNextSampleMonoCPP(pSrc, (double *)Pos, Pitch); |
float fVolumeR = pFinalParam->fFinalVolumeRight; |
150 |
if (USEFILTER) samplePoint = FilterL.Apply(samplePoint); |
float samplePoint; |
151 |
pOutL[i] += samplePoint * *PanL; |
for (int i = 0; i < uiToGo; ++i) { |
152 |
pOutR[i] += samplePoint * *PanR; |
samplePoint = Interpolate1StepMonoCPP(pSrc, &dPos, fPitch); |
153 |
i++; |
samplePoint = filterL.Apply(samplePoint); |
154 |
break; |
pOutL[i] += samplePoint * fVolumeL; |
155 |
|
pOutR[i] += samplePoint * fVolumeR; |
156 |
|
} |
157 |
|
pFinalParam->dPos = dPos; |
158 |
|
} else { // no filter needed |
159 |
|
sample_t* pSrc = pFinalParam->pSrc; |
160 |
|
double dPos = pFinalParam->dPos; |
161 |
|
float fPitch = pFinalParam->fFinalPitch; |
162 |
|
float* pOutL = pFinalParam->pOutLeft; |
163 |
|
float* pOutR = pFinalParam->pOutRight; |
164 |
|
float fVolumeL = pFinalParam->fFinalVolumeLeft; |
165 |
|
float fVolumeR = pFinalParam->fFinalVolumeRight; |
166 |
|
float samplePoint; |
167 |
|
for (int i = 0; i < uiToGo; ++i) { |
168 |
|
samplePoint = Interpolate1StepMonoCPP(pSrc, &dPos, fPitch); |
169 |
|
pOutL[i] += samplePoint * fVolumeL; |
170 |
|
pOutR[i] += samplePoint * fVolumeR; |
171 |
|
} |
172 |
|
pFinalParam->dPos = dPos; |
173 |
} |
} |
174 |
case STEREO: { |
} else { // no interpolation |
175 |
stereo_sample_t samplePoint = GetNextSampleStereoCPP(pSrc, (double *)Pos, Pitch); |
if (USEFILTER) { |
176 |
if (USEFILTER) { |
Filter filterL = pFinalParam->filterLeft; |
177 |
samplePoint.left = FilterL.Apply(samplePoint.left); |
sample_t* pSrc = pFinalParam->pSrc; |
178 |
samplePoint.right = FilterR.Apply(samplePoint.right); |
float* pOutL = pFinalParam->pOutLeft; |
179 |
|
float* pOutR = pFinalParam->pOutRight; |
180 |
|
float fVolumeL = pFinalParam->fFinalVolumeLeft; |
181 |
|
float fVolumeR = pFinalParam->fFinalVolumeRight; |
182 |
|
int pos_offset = (int) pFinalParam->dPos; |
183 |
|
float samplePoint; |
184 |
|
for (int i = 0; i < uiToGo; ++i) { |
185 |
|
samplePoint = pSrc[i + pos_offset]; |
186 |
|
samplePoint = filterL.Apply(samplePoint); |
187 |
|
pOutL[i] += samplePoint * fVolumeL; |
188 |
|
pOutR[i] += samplePoint * fVolumeR; |
189 |
|
} |
190 |
|
pFinalParam->dPos += uiToGo; |
191 |
|
} else { // no filter needed |
192 |
|
sample_t* pSrc = pFinalParam->pSrc; |
193 |
|
float* pOutL = pFinalParam->pOutLeft; |
194 |
|
float* pOutR = pFinalParam->pOutRight; |
195 |
|
float fVolumeL = pFinalParam->fFinalVolumeLeft; |
196 |
|
float fVolumeR = pFinalParam->fFinalVolumeRight; |
197 |
|
int pos_offset = (int) pFinalParam->dPos; |
198 |
|
float samplePoint; |
199 |
|
for (int i = 0; i < uiToGo; ++i) { |
200 |
|
samplePoint = pSrc[i + pos_offset]; |
201 |
|
pOutL[i] += samplePoint * fVolumeL; |
202 |
|
pOutR[i] += samplePoint * fVolumeR; |
203 |
} |
} |
204 |
pOutL[i] += samplePoint.left * *PanL; |
pFinalParam->dPos += uiToGo; |
|
pOutR[i] += samplePoint.right * *PanR; |
|
|
i++; |
|
|
break; |
|
205 |
} |
} |
206 |
} |
} |
207 |
break; |
break; |
208 |
} |
} |
209 |
#if CONFIG_ASM && ARCH_X86 |
case STEREO: { |
210 |
// Assembly optimization using the MMX & SSE(1) instruction set (thus only for x86) |
if (INTERPOLATE) { |
211 |
case ASM_X86_MMX_SSE: { |
if (USEFILTER) { |
212 |
const int ii = i & 0xfffffffc; |
Filter filterL = pFinalParam->filterLeft; |
213 |
i += 4; |
Filter filterR = pFinalParam->filterRight; |
214 |
switch (CHANNELS) { |
sample_t* pSrc = pFinalParam->pSrc; |
215 |
case MONO: { |
double dPos = pFinalParam->dPos; |
216 |
GetNext4SamplesMonoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2 |
float fPitch = pFinalParam->fFinalPitch; |
217 |
if (USEFILTER) { |
float* pOutL = pFinalParam->pOutLeft; |
218 |
/* prepare filter input */ |
float* pOutR = pFinalParam->pOutRight; |
219 |
__asm__ __volatile__ ( |
float fVolumeL = pFinalParam->fFinalVolumeLeft; |
220 |
"movaps %xmm2,%xmm0" |
float fVolumeR = pFinalParam->fFinalVolumeRight; |
221 |
); |
stereo_sample_t samplePoint; |
222 |
FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output |
for (int i = 0; i < uiToGo; ++i) { |
223 |
__asm__ __volatile__ ( |
samplePoint = Interpolate1StepStereoCPP(pSrc, &dPos, fPitch); |
224 |
"movaps %xmm7,%xmm2 # mono filter result -> xmm2" |
samplePoint.left = filterL.Apply(samplePoint.left); |
225 |
); |
samplePoint.right = filterR.Apply(samplePoint.right); |
226 |
|
pOutL[i] += samplePoint.left * fVolumeL; |
227 |
|
pOutR[i] += samplePoint.right * fVolumeR; |
228 |
} |
} |
229 |
/* apply panorama and volume factors */ |
pFinalParam->dPos = dPos; |
230 |
__asm__ __volatile__ ( |
} else { // no filter needed |
231 |
"movss (%1),%%xmm0 # load pan left\n\t" |
sample_t* pSrc = pFinalParam->pSrc; |
232 |
"movss (%2),%%xmm1 # load pan right\n\t" |
double dPos = pFinalParam->dPos; |
233 |
"movaps (%0),%%xmm4 # load vca\n\t" |
float fPitch = pFinalParam->fFinalPitch; |
234 |
"shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t" |
float* pOutL = pFinalParam->pOutLeft; |
235 |
"shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t" |
float* pOutR = pFinalParam->pOutRight; |
236 |
"mulps %%xmm2,%%xmm0 # left = sample * pan_left\n\t" |
float fVolumeL = pFinalParam->fFinalVolumeLeft; |
237 |
"mulps %%xmm2,%%xmm1 # right = sample * pan_right\n\t" |
float fVolumeR = pFinalParam->fFinalVolumeRight; |
238 |
"mulps %%xmm4,%%xmm0 # left = vca * (sample * pan_left)\n\t" |
stereo_sample_t samplePoint; |
239 |
"mulps %%xmm4,%%xmm1 # right = vca * (sample * pan_right)\n\t" |
for (int i = 0; i < uiToGo; ++i) { |
240 |
: /* no output */ |
samplePoint = Interpolate1StepStereoCPP(pSrc, &dPos, fPitch); |
241 |
: "r" (&Volume[ii]), /* %0 */ |
pOutL[i] += samplePoint.left * fVolumeL; |
242 |
"r" (PanL), /* %1 */ |
pOutR[i] += samplePoint.right * fVolumeR; |
243 |
"r" (PanR) /* %2 */ |
} |
244 |
: "xmm0", /* holds final left sample (for the 4 samples) at the end */ |
pFinalParam->dPos = dPos; |
|
"xmm1" /* holds final right sample (for the 4 samples) at the end */ |
|
|
); |
|
|
break; |
|
245 |
} |
} |
246 |
case STEREO: { |
} else { // no interpolation |
247 |
GetNext4SamplesStereoMMXSSE(pSrc, (float *)Pos, Pitch); // outputs samples in xmm2 (left channel) and xmm3 (right channel) |
if (USEFILTER) { |
248 |
if (USEFILTER) { |
Filter filterL = pFinalParam->filterLeft; |
249 |
__asm__ __volatile__ ( |
Filter filterR = pFinalParam->filterRight; |
250 |
"movaps %xmm2,%xmm0 # prepare left channel for filter\n\t" |
sample_t* pSrc = pFinalParam->pSrc; |
251 |
"movaps %xmm3,%xmm1 # save right channel not to get overwritten by filter algorithms\n\t" |
float* pOutL = pFinalParam->pOutLeft; |
252 |
); |
float* pOutR = pFinalParam->pOutRight; |
253 |
FilterL.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output |
float fVolumeL = pFinalParam->fFinalVolumeLeft; |
254 |
__asm__ __volatile__ ( |
float fVolumeR = pFinalParam->fFinalVolumeRight; |
255 |
"movaps %xmm1,%xmm0 # prepare right channel for filter\n\t" |
int pos_offset = ((int) pFinalParam->dPos) << 1; |
256 |
"movaps %xmm7,%xmm1 # save filter output for left channel\n\t" |
stereo_sample_t samplePoint; |
257 |
); |
for (int i = 0, ii = 0; i < uiToGo; ++i, ii+=2) { |
258 |
FilterR.Apply4StepsSSE(&bqBase, &bqMain); // xmm0 input, xmm7 output |
samplePoint.left = pSrc[ii + pos_offset]; |
259 |
__asm__ __volatile__ ( |
samplePoint.right = pSrc[ii + pos_offset + 1]; |
260 |
"movaps %xmm1,%xmm2 # result left channel -> xmm2\n\t" |
samplePoint.left = filterL.Apply(samplePoint.left); |
261 |
"movaps %xmm7,%xmm3 # result right channel -> xmm3\n\t" |
samplePoint.right = filterR.Apply(samplePoint.right); |
262 |
); |
pOutL[i] += samplePoint.left * fVolumeL; |
263 |
|
pOutR[i] += samplePoint.right * fVolumeR; |
264 |
|
} |
265 |
|
pFinalParam->dPos += uiToGo; |
266 |
|
} else { // no filter needed |
267 |
|
sample_t* pSrc = pFinalParam->pSrc; |
268 |
|
float* pOutL = pFinalParam->pOutLeft; |
269 |
|
float* pOutR = pFinalParam->pOutRight; |
270 |
|
float fVolumeL = pFinalParam->fFinalVolumeLeft; |
271 |
|
float fVolumeR = pFinalParam->fFinalVolumeRight; |
272 |
|
int pos_offset = ((int) pFinalParam->dPos) << 1; |
273 |
|
stereo_sample_t samplePoint; |
274 |
|
for (int i = 0, ii = 0; i < uiToGo; ++i, ii+=2) { |
275 |
|
samplePoint.left = pSrc[ii + pos_offset]; |
276 |
|
samplePoint.right = pSrc[ii + pos_offset + 1]; |
277 |
|
pOutL[i] += samplePoint.left * fVolumeL; |
278 |
|
pOutR[i] += samplePoint.right * fVolumeR; |
279 |
} |
} |
280 |
/* apply panorama and volume factors */ |
pFinalParam->dPos += uiToGo; |
|
__asm__ __volatile__ ( |
|
|
"movss (%1),%%xmm0 # load pan left\n\t" |
|
|
"movss (%2),%%xmm1 # load pan right\n\t" |
|
|
"movaps (%0),%%xmm4 # load vca\n\t" |
|
|
"shufps $0x00,%%xmm0,%%xmm0 # copy pan left to the other 3 cells\n\t" |
|
|
"shufps $0x00,%%xmm1,%%xmm1 # copy pan right to the other 3 cells\n\t" |
|
|
"mulps %%xmm2,%%xmm0 # left = sample_left * pan_left\n\t" |
|
|
"mulps %%xmm3,%%xmm1 # right = sample_right * pan_right\n\t" |
|
|
"mulps %%xmm4,%%xmm0 # left = vca * (sample_left * pan_left)\n\t" |
|
|
"mulps %%xmm4,%%xmm1 # right = vca * (sample_right * pan_right)\n\t" |
|
|
: /* no output */ |
|
|
: "r" (&Volume[ii]), /* %0 */ |
|
|
"r" (PanL), /* %1 */ |
|
|
"r" (PanR) /* %2 */ |
|
|
); |
|
|
break; |
|
281 |
} |
} |
282 |
} |
} |
283 |
/* mix the 4 samples to the output channels */ |
break; |
|
__asm__ __volatile__ ( |
|
|
"addps (%0),%%xmm0 # mix calculated sample(s) to output left\n\t" |
|
|
"movaps %%xmm0,(%0) # output to left channel\n\t" |
|
|
"addps (%1),%%xmm1 # mix calculated sample(s) to output right\n\t" |
|
|
"movaps %%xmm1,(%1) # output to right channel\n\t" |
|
|
: /* no output */ |
|
|
: "r" (&pOutL[ii]), /* %0 - must be 16 byte aligned ! */ |
|
|
"r" (&pOutR[ii]) /* %1 - must be 16 byte aligned ! */ |
|
|
); |
|
284 |
} |
} |
|
#endif // CONFIG_ASM && ARCH_X86 |
|
285 |
} |
} |
286 |
|
pFinalParam->pOutRight += uiToGo; |
287 |
|
pFinalParam->pOutLeft += uiToGo; |
288 |
|
pFinalParam->uiToGo -= uiToGo; |
289 |
} |
} |
290 |
}; |
}; |
291 |
|
|