1 |
/*************************************************************************** |
2 |
* * |
3 |
* LinuxSampler - modular, streaming capable sampler * |
4 |
* * |
5 |
* Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck * |
6 |
* * |
7 |
* This program is free software; you can redistribute it and/or modify * |
8 |
* it under the terms of the GNU General Public License as published by * |
9 |
* the Free Software Foundation; either version 2 of the License, or * |
10 |
* (at your option) any later version. * |
11 |
* * |
12 |
* This program is distributed in the hope that it will be useful, * |
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
15 |
* GNU General Public License for more details. * |
16 |
* * |
17 |
* You should have received a copy of the GNU General Public License * |
18 |
* along with this program; if not, write to the Free Software * |
19 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, * |
20 |
* MA 02111-1307 USA * |
21 |
***************************************************************************/ |
22 |
|
23 |
#ifndef __LS_RESAMPLER_H__ |
24 |
#define __LS_RESAMPLER_H__ |
25 |
|
26 |
#include "../../common/global.h" |
27 |
|
28 |
// TODO: cubic interpolation is not yet supported by the MMX/SSE(1) version though |
29 |
#ifndef USE_LINEAR_INTERPOLATION |
30 |
# define USE_LINEAR_INTERPOLATION 1 ///< set to 0 if you prefer cubic interpolation (slower, better quality) |
31 |
#endif |
32 |
|
33 |
namespace LinuxSampler { |
34 |
|
35 |
struct stereo_sample_t { |
36 |
float left; |
37 |
float right; |
38 |
}; |
39 |
|
40 |
template<bool INTERPOLATE> |
41 |
class Resampler { |
42 |
public: |
43 |
inline static float GetNextSampleMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
44 |
if (INTERPOLATE) return Interpolate1StepMonoCPP(pSrc, Pos, Pitch); |
45 |
else { // no pitch, so no interpolation necessary |
46 |
int pos_int = (int) *Pos; |
47 |
*Pos += 1.0; |
48 |
return pSrc [pos_int]; |
49 |
} |
50 |
} |
51 |
|
52 |
inline static stereo_sample_t GetNextSampleStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
53 |
if (INTERPOLATE) return Interpolate1StepStereoCPP(pSrc, Pos, Pitch); |
54 |
else { // no pitch, so no interpolation necessary |
55 |
int pos_int = (int) *Pos; |
56 |
pos_int <<= 1; |
57 |
*Pos += 1.0; |
58 |
stereo_sample_t samplePoint; |
59 |
samplePoint.left = pSrc[pos_int]; |
60 |
samplePoint.right = pSrc[pos_int+1]; |
61 |
return samplePoint; |
62 |
} |
63 |
} |
64 |
|
65 |
#if ARCH_X86 |
66 |
inline static void GetNext4SamplesMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
67 |
if (INTERPOLATE) Interpolate4StepsMonoMMXSSE(pSrc, Pos, Pitch); |
68 |
else { // no pitch, so no interpolation necessary |
69 |
const float __4f = 4.0f; |
70 |
__asm__ __volatile__ ( |
71 |
"movss (%1), %%xmm5 # load Pos\n\t" |
72 |
"cvtss2si %%xmm5, %%edi # int(Pos)\n\t" |
73 |
"addss %2, %%xmm5 # Pos += 4.0f\n\t" |
74 |
"movswl (%0,%%edi,2), %%eax # load sample 0\n\t" |
75 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
76 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
77 |
"movswl 2(%0,%%edi,2), %%edx # load sample 1\n\t" |
78 |
"cvtsi2ss %%edx, %%xmm2 # convert to float\n\t" |
79 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
80 |
"movss %%xmm5, (%1) # update Pos\n\t" |
81 |
"movswl 4(%0,%%edi,2), %%eax # load sample 2\n\t" |
82 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
83 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
84 |
"movswl 6(%0,%%edi,2), %%edx # load sample 3\n\t" |
85 |
"cvtsi2ss %%edx, %%xmm2 # convert to float\n\t" |
86 |
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
87 |
:: "r" (pSrc), "r" (Pos), "m" (__4f) |
88 |
: "%eax", "%edx", "%edi" |
89 |
); |
90 |
} |
91 |
} |
92 |
|
93 |
inline static void GetNext4SamplesStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
94 |
if (INTERPOLATE) { |
95 |
Interpolate4StepsStereoMMXSSE(pSrc, Pos, Pitch); |
96 |
//EMMS; |
97 |
} else { // no pitch, so no interpolation necessary |
98 |
const float __4f = 4.0f; |
99 |
__asm__ __volatile__ ( |
100 |
"movss (%1), %%xmm5 # load Pos\n\t" |
101 |
"cvtss2si %%xmm5, %%edi # int(Pos)\n\t" |
102 |
"addss %2, %%xmm5 # Pos += 4.0f\n\t" |
103 |
"movswl (%0, %%edi,4), %%eax # load sample 0 (left)\n\t" |
104 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
105 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
106 |
"movss %%xmm5, (%1) # update Pos\n\t" |
107 |
"movswl 2(%0, %%edi,4), %%edx # load sample 0 (left)\n\t" |
108 |
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
109 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
110 |
"movswl 4(%0, %%edi,4), %%eax # load sample 1 (left)\n\t" |
111 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
112 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
113 |
"movswl 6(%0, %%edi,4), %%edx # load sample 1 (right)\n\t" |
114 |
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
115 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
116 |
"movswl 8(%0, %%edi,4), %%eax # load sample 2 (left)\n\t" |
117 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
118 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
119 |
"movswl 10(%0, %%edi,4), %%edx # load sample 2 (right)\n\t" |
120 |
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
121 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
122 |
"movswl 12(%0, %%edi,4), %%eax # load sample 3 (left)\n\t" |
123 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
124 |
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
125 |
"movswl 14(%0, %%edi,4), %%edx # load sample 3 (right)\n\t" |
126 |
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
127 |
"shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t" |
128 |
:: "r" (pSrc), "r" (Pos), "m" (__4f) |
129 |
: "%eax", "%edx", "%edi" |
130 |
); |
131 |
} |
132 |
} |
133 |
#endif // ARCH_X86 |
134 |
|
135 |
protected: |
136 |
|
137 |
inline static float Interpolate1StepMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
138 |
int pos_int = (int) *Pos; // integer position |
139 |
float pos_fract = *Pos - pos_int; // fractional part of position |
140 |
|
141 |
#if USE_LINEAR_INTERPOLATION |
142 |
float samplePoint = pSrc[pos_int] + pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]); |
143 |
#else // polynomial interpolation |
144 |
float xm1 = pSrc[pos_int]; |
145 |
float x0 = pSrc[pos_int+1]; |
146 |
float x1 = pSrc[pos_int+2]; |
147 |
float x2 = pSrc[pos_int+3]; |
148 |
float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
149 |
float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
150 |
float c = (x1 - xm1) * 0.5f; |
151 |
float samplePoint = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
152 |
#endif // USE_LINEAR_INTERPOLATION |
153 |
|
154 |
*Pos += Pitch; |
155 |
return samplePoint; |
156 |
} |
157 |
|
158 |
inline static stereo_sample_t Interpolate1StepStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
159 |
int pos_int = (int) *Pos; // integer position |
160 |
float pos_fract = *Pos - pos_int; // fractional part of position |
161 |
pos_int <<= 1; |
162 |
|
163 |
stereo_sample_t samplePoint; |
164 |
|
165 |
#if USE_LINEAR_INTERPOLATION |
166 |
// left channel |
167 |
samplePoint.left = pSrc[pos_int] + pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]); |
168 |
// right channel |
169 |
samplePoint.right = pSrc[pos_int+1] + pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]); |
170 |
#else // polynomial interpolation |
171 |
// calculate left channel |
172 |
float xm1 = pSrc[pos_int]; |
173 |
float x0 = pSrc[pos_int+2]; |
174 |
float x1 = pSrc[pos_int+4]; |
175 |
float x2 = pSrc[pos_int+6]; |
176 |
float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
177 |
float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
178 |
float c = (x1 - xm1) * 0.5f; |
179 |
samplePoint.left = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
180 |
|
181 |
//calculate right channel |
182 |
xm1 = pSrc[pos_int+1]; |
183 |
x0 = pSrc[pos_int+3]; |
184 |
x1 = pSrc[pos_int+5]; |
185 |
x2 = pSrc[pos_int+7]; |
186 |
a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
187 |
b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
188 |
c = (x1 - xm1) * 0.5f; |
189 |
samplePoint.right = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
190 |
#endif // USE_LINEAR_INTERPOLATION |
191 |
|
192 |
*Pos += Pitch; |
193 |
return samplePoint; |
194 |
} |
195 |
|
196 |
#if ARCH_X86 |
197 |
// TODO: no support for cubic interpolation yet |
198 |
inline static void Interpolate4StepsMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
199 |
/* calculate playback position of each of the 4 samples by adding the associated pitch */ |
200 |
__asm__ __volatile__ ( |
201 |
"movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t" |
202 |
"movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t" |
203 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
204 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t" |
205 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
206 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t" |
207 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
208 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t" |
209 |
"movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t" |
210 |
"addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t" |
211 |
"movss %%xmm2,(%0) # update 'Pos'\n\t" |
212 |
"shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t" |
213 |
"cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t" |
214 |
"shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t" |
215 |
"shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t" |
216 |
"cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t" |
217 |
"cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t" |
218 |
"shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t" |
219 |
"cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t" |
220 |
"subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t" |
221 |
: |
222 |
: "r" (Pos), /* %0 */ |
223 |
"m" (Pitch) /* %1 */ |
224 |
: "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */ |
225 |
"%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */ |
226 |
"mm4", /* holds integer position of sample 0-1 at the end */ |
227 |
"mm5", /* holds integer position of sample 2-3 at the end */ |
228 |
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
229 |
); |
230 |
/* get sample values of pSrc[pos_int] and pSrc[pos_int+1] of the 4 samples */ |
231 |
__asm__ __volatile__ ( |
232 |
"movd %%mm4,%%edi # sample position of sample 0\n\t" |
233 |
"psrlq $32,%%mm4 # mm4 >> 32\n\t" |
234 |
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 0)\n\t" |
235 |
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 0+1)\n\t" |
236 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
237 |
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
238 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
239 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
240 |
"movd %%mm4,%%edi # sample position of sample 1\n\t" |
241 |
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 1)\n\t" |
242 |
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 1+1)\n\t" |
243 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
244 |
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
245 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
246 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
247 |
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
248 |
"psrlq $32,%%mm5 # mm5 >> 32\n\t" |
249 |
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 2)\n\t" |
250 |
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 2+1)\n\t" |
251 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
252 |
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
253 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
254 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
255 |
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
256 |
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 3)\n\t" |
257 |
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 3+1)\n\t" |
258 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
259 |
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
260 |
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
261 |
"shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t" |
262 |
: /* no output */ |
263 |
: "S" (pSrc) /* %0 - sample read position */ |
264 |
: "%eax", "%ecx", /*"%edx",*/ "%edi", |
265 |
"%xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */ |
266 |
"%xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */ |
267 |
"mm4", /* holds integer position of sample 0-1 at the end */ |
268 |
"mm5", /* holds integer position of sample 2-3 at the end */ |
269 |
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
270 |
); |
271 |
/* linear interpolation of the 4 samples simultaniously */ |
272 |
__asm__ __volatile__ ( |
273 |
"subps %%xmm2,%%xmm3 # xmm3 = pSrc[pos_int+1] - pSrc[pos_int]\n\t" |
274 |
"mulps %%xmm0,%%xmm3 # xmm3 = pos_fract * (pSrc[pos_int+1] - pSrc[pos_int])\n\t" |
275 |
"addps %%xmm3,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]))\n\t" |
276 |
: /* no output */ |
277 |
: /* no input */ |
278 |
: "%xmm2" /* holds linear interpolated sample point (of all 4 samples) at the end */ |
279 |
); |
280 |
} |
281 |
|
282 |
// TODO: no support for cubic interpolation yet |
283 |
inline static void Interpolate4StepsStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
284 |
/* calculate playback position of each of the 4 samples by adding the associated pitch */ |
285 |
__asm__ __volatile__ ( |
286 |
"movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t" |
287 |
"movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t" |
288 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
289 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t" |
290 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
291 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t" |
292 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
293 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t" |
294 |
"movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t" |
295 |
"addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t" |
296 |
"movss %%xmm2,(%0) # update 'Pos'\n\t" |
297 |
"shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t" |
298 |
"cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t" |
299 |
"shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t" |
300 |
"shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t" |
301 |
"cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t" |
302 |
"cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t" |
303 |
"shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t" |
304 |
"cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t" |
305 |
"subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t" |
306 |
: |
307 |
: "r" (Pos), /* %0 */ |
308 |
"m" (Pitch) /* %1 */ |
309 |
: "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */ |
310 |
"%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */ |
311 |
"mm4", /* holds integer position of sample 0-1 at the end */ |
312 |
"mm5", /* holds integer position of sample 2-3 at the end */ |
313 |
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
314 |
); |
315 |
|
316 |
/* get sample values of pSrc[pos_int], pSrc[pos_int+1], pSrc[pos_int+2] and pSrc[pos_int+3] of the 4 samples */ |
317 |
__asm__ __volatile__ ( |
318 |
"xorl %%eax,%%eax # clear eax\n\t" |
319 |
"xorl %%edx,%%edx # clear edx\n\t" |
320 |
"movd %%mm4,%%edi # sample position of sample 0\n\t" |
321 |
"psrlq $32,%%mm4 # mm4 >> 32\n\t" |
322 |
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0)\n\t" |
323 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
324 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
325 |
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+1)\n\t" |
326 |
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
327 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
328 |
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0+2)\n\t" |
329 |
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
330 |
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
331 |
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+3)\n\t" |
332 |
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
333 |
"movd %%mm4,%%edi # sample position of sample 1\n\t" |
334 |
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
335 |
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1)\n\t" |
336 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
337 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
338 |
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+1)\n\t" |
339 |
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
340 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
341 |
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1+2)\n\t" |
342 |
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
343 |
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
344 |
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+3)\n\t" |
345 |
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
346 |
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
347 |
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
348 |
"psrlq $32,%%mm5 # mm5 >> 32\n\t" |
349 |
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2)\n\t" |
350 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
351 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
352 |
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+1)\n\t" |
353 |
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
354 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
355 |
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2+2)\n\t" |
356 |
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
357 |
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
358 |
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+3)\n\t" |
359 |
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
360 |
"movd %%mm5,%%edi # sample position of sample 3\n\t" |
361 |
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
362 |
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3)\n\t" |
363 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
364 |
"shufps $0x1b, %%xmm2, %%xmm2 # shift up\n\t" |
365 |
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+1)\n\t" |
366 |
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
367 |
"shufps $0x1b, %%xmm3, %%xmm3 # shift up\n\t" |
368 |
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3+2)\n\t" |
369 |
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
370 |
"shufps $0x1b, %%xmm4, %%xmm4 # swap to correct order\n\t" |
371 |
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+3)\n\t" |
372 |
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
373 |
"shufps $0x1b, %%xmm5, %%xmm5 # swap to correct order\n\t" |
374 |
: /* no output */ |
375 |
: "S" (pSrc) /* %0 - sample read position */ |
376 |
: "%eax", "%edx", "%edi", |
377 |
"xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */ |
378 |
"xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */ |
379 |
"xmm4", /* holds pSrc[int_pos+2] of the 4 samples at the end */ |
380 |
"xmm5", /* holds pSrc[int_pos+3] of the 4 samples at the end */ |
381 |
"mm4", /* holds integer position of sample 0-1 at the end */ |
382 |
"mm5", /* holds integer position of sample 2-3 at the end */ |
383 |
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
384 |
); |
385 |
/* linear interpolation of the 4 samples (left & right channel) simultaniously */ |
386 |
__asm__ __volatile__ ( |
387 |
"subps %%xmm2,%%xmm4 # xmm4 = pSrc[pos_int+2] - pSrc[pos_int] (left channel)\n\t" |
388 |
"mulps %%xmm0,%%xmm4 # xmm4 = pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]) (left channel)\n\t" |
389 |
"addps %%xmm4,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+2] - pSrc[pos_int])) (left channel)\n\t" |
390 |
"subps %%xmm3,%%xmm5 # xmm5 = pSrc[pos_int+3] - pSrc[pos_int+1] (right channel)\n\t" |
391 |
"mulps %%xmm0,%%xmm5 # xmm5 = pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]) (right channel)\n\t" |
392 |
"addps %%xmm5,%%xmm3 # xmm3 = pSrc[pos_int+1] + (pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1])) (right channel)\n\t" |
393 |
: /* no output */ |
394 |
: /* no input */ |
395 |
: "%xmm2", /* holds linear interpolated sample of left channel (of all 4 samples) at the end */ |
396 |
"%xmm3" /* holds linear interpolated sample of right channel (of all 4 samples) at the end */ |
397 |
); |
398 |
} |
399 |
#endif // ARCH_X86 |
400 |
}; |
401 |
|
402 |
} // namespace LinuxSampler |
403 |
|
404 |
#endif // __LS_RESAMPLER_H__ |