1 |
/*************************************************************************** |
2 |
* * |
3 |
* LinuxSampler - modular, streaming capable sampler * |
4 |
* * |
5 |
* Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck * |
6 |
* * |
7 |
* This program is free software; you can redistribute it and/or modify * |
8 |
* it under the terms of the GNU General Public License as published by * |
9 |
* the Free Software Foundation; either version 2 of the License, or * |
10 |
* (at your option) any later version. * |
11 |
* * |
12 |
* This program is distributed in the hope that it will be useful, * |
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
15 |
* GNU General Public License for more details. * |
16 |
* * |
17 |
* You should have received a copy of the GNU General Public License * |
18 |
* along with this program; if not, write to the Free Software * |
19 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, * |
20 |
* MA 02111-1307 USA * |
21 |
***************************************************************************/ |
22 |
|
23 |
#ifndef __LS_RESAMPLER_H__ |
24 |
#define __LS_RESAMPLER_H__ |
25 |
|
26 |
#include "../../common/global.h" |
27 |
|
28 |
// TODO: cubic interpolation is not yet supported by the MMX/SSE(1) version though |
29 |
#ifndef USE_LINEAR_INTERPOLATION |
30 |
# define USE_LINEAR_INTERPOLATION 1 ///< set to 0 if you prefer cubic interpolation (slower, better quality) |
31 |
#endif |
32 |
|
33 |
namespace LinuxSampler { |
34 |
|
35 |
/** @brief Stereo sample point |
36 |
* |
37 |
* Encapsulates one stereo sample point, thus signal value for one |
38 |
* sample point for left and right channel. |
39 |
*/ |
40 |
struct stereo_sample_t { |
41 |
float left; |
42 |
float right; |
43 |
}; |
44 |
|
45 |
/** @brief Resampler Template |
46 |
* |
47 |
* This template provides pure C++ and MMX/SSE assembly implementations |
48 |
* for linear and cubic interpolation for pitching a mono or stereo |
49 |
* input signal. |
50 |
*/ |
51 |
template<bool INTERPOLATE> |
52 |
class Resampler { |
53 |
public: |
54 |
inline static float GetNextSampleMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
55 |
if (INTERPOLATE) return Interpolate1StepMonoCPP(pSrc, Pos, Pitch); |
56 |
else { // no pitch, so no interpolation necessary |
57 |
int pos_int = (int) *Pos; |
58 |
*Pos += 1.0; |
59 |
return pSrc [pos_int]; |
60 |
} |
61 |
} |
62 |
|
63 |
inline static stereo_sample_t GetNextSampleStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
64 |
if (INTERPOLATE) return Interpolate1StepStereoCPP(pSrc, Pos, Pitch); |
65 |
else { // no pitch, so no interpolation necessary |
66 |
int pos_int = (int) *Pos; |
67 |
pos_int <<= 1; |
68 |
*Pos += 1.0; |
69 |
stereo_sample_t samplePoint; |
70 |
samplePoint.left = pSrc[pos_int]; |
71 |
samplePoint.right = pSrc[pos_int+1]; |
72 |
return samplePoint; |
73 |
} |
74 |
} |
75 |
|
76 |
#if ARCH_X86 |
77 |
inline static void GetNext4SamplesMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
78 |
if (INTERPOLATE) Interpolate4StepsMonoMMXSSE(pSrc, Pos, Pitch); |
79 |
else { // no pitch, so no interpolation necessary |
80 |
const float __4f = 4.0f; |
81 |
__asm__ __volatile__ ( |
82 |
"movss (%1), %%xmm5 # load Pos\n\t" |
83 |
"cvtss2si %%xmm5, %%edi # int(Pos)\n\t" |
84 |
"addss %2, %%xmm5 # Pos += 4.0f\n\t" |
85 |
"movswl (%0,%%edi,2), %%eax # load sample 0\n\t" |
86 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
87 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
88 |
"movswl 2(%0,%%edi,2), %%edx # load sample 1\n\t" |
89 |
"cvtsi2ss %%edx, %%xmm2 # convert to float\n\t" |
90 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
91 |
"movss %%xmm5, (%1) # update Pos\n\t" |
92 |
"movswl 4(%0,%%edi,2), %%eax # load sample 2\n\t" |
93 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
94 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
95 |
"movswl 6(%0,%%edi,2), %%edx # load sample 3\n\t" |
96 |
"cvtsi2ss %%edx, %%xmm2 # convert to float\n\t" |
97 |
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
98 |
:: "r" (pSrc), "r" (Pos), "m" (__4f) |
99 |
: "%eax", "%edx", "%edi" |
100 |
); |
101 |
} |
102 |
} |
103 |
|
104 |
inline static void GetNext4SamplesStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
105 |
if (INTERPOLATE) { |
106 |
Interpolate4StepsStereoMMXSSE(pSrc, Pos, Pitch); |
107 |
//EMMS; |
108 |
} else { // no pitch, so no interpolation necessary |
109 |
const float __4f = 4.0f; |
110 |
__asm__ __volatile__ ( |
111 |
"movss (%1), %%xmm5 # load Pos\n\t" |
112 |
"cvtss2si %%xmm5, %%edi # int(Pos)\n\t" |
113 |
"addss %2, %%xmm5 # Pos += 4.0f\n\t" |
114 |
"movswl (%0, %%edi,4), %%eax # load sample 0 (left)\n\t" |
115 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
116 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
117 |
"movss %%xmm5, (%1) # update Pos\n\t" |
118 |
"movswl 2(%0, %%edi,4), %%edx # load sample 0 (left)\n\t" |
119 |
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
120 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
121 |
"movswl 4(%0, %%edi,4), %%eax # load sample 1 (left)\n\t" |
122 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
123 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
124 |
"movswl 6(%0, %%edi,4), %%edx # load sample 1 (right)\n\t" |
125 |
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
126 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
127 |
"movswl 8(%0, %%edi,4), %%eax # load sample 2 (left)\n\t" |
128 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
129 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
130 |
"movswl 10(%0, %%edi,4), %%edx # load sample 2 (right)\n\t" |
131 |
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
132 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
133 |
"movswl 12(%0, %%edi,4), %%eax # load sample 3 (left)\n\t" |
134 |
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
135 |
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
136 |
"movswl 14(%0, %%edi,4), %%edx # load sample 3 (right)\n\t" |
137 |
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
138 |
"shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t" |
139 |
:: "r" (pSrc), "r" (Pos), "m" (__4f) |
140 |
: "%eax", "%edx", "%edi" |
141 |
); |
142 |
} |
143 |
} |
144 |
#endif // ARCH_X86 |
145 |
|
146 |
protected: |
147 |
|
148 |
inline static float Interpolate1StepMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
149 |
int pos_int = (int) *Pos; // integer position |
150 |
float pos_fract = *Pos - pos_int; // fractional part of position |
151 |
|
152 |
#if USE_LINEAR_INTERPOLATION |
153 |
float samplePoint = pSrc[pos_int] + pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]); |
154 |
#else // polynomial interpolation |
155 |
float xm1 = pSrc[pos_int]; |
156 |
float x0 = pSrc[pos_int+1]; |
157 |
float x1 = pSrc[pos_int+2]; |
158 |
float x2 = pSrc[pos_int+3]; |
159 |
float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
160 |
float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
161 |
float c = (x1 - xm1) * 0.5f; |
162 |
float samplePoint = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
163 |
#endif // USE_LINEAR_INTERPOLATION |
164 |
|
165 |
*Pos += Pitch; |
166 |
return samplePoint; |
167 |
} |
168 |
|
169 |
inline static stereo_sample_t Interpolate1StepStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
170 |
int pos_int = (int) *Pos; // integer position |
171 |
float pos_fract = *Pos - pos_int; // fractional part of position |
172 |
pos_int <<= 1; |
173 |
|
174 |
stereo_sample_t samplePoint; |
175 |
|
176 |
#if USE_LINEAR_INTERPOLATION |
177 |
// left channel |
178 |
samplePoint.left = pSrc[pos_int] + pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]); |
179 |
// right channel |
180 |
samplePoint.right = pSrc[pos_int+1] + pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]); |
181 |
#else // polynomial interpolation |
182 |
// calculate left channel |
183 |
float xm1 = pSrc[pos_int]; |
184 |
float x0 = pSrc[pos_int+2]; |
185 |
float x1 = pSrc[pos_int+4]; |
186 |
float x2 = pSrc[pos_int+6]; |
187 |
float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
188 |
float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
189 |
float c = (x1 - xm1) * 0.5f; |
190 |
samplePoint.left = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
191 |
|
192 |
//calculate right channel |
193 |
xm1 = pSrc[pos_int+1]; |
194 |
x0 = pSrc[pos_int+3]; |
195 |
x1 = pSrc[pos_int+5]; |
196 |
x2 = pSrc[pos_int+7]; |
197 |
a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
198 |
b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
199 |
c = (x1 - xm1) * 0.5f; |
200 |
samplePoint.right = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
201 |
#endif // USE_LINEAR_INTERPOLATION |
202 |
|
203 |
*Pos += Pitch; |
204 |
return samplePoint; |
205 |
} |
206 |
|
207 |
#if ARCH_X86 |
208 |
// TODO: no support for cubic interpolation yet |
209 |
inline static void Interpolate4StepsMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
210 |
/* calculate playback position of each of the 4 samples by adding the associated pitch */ |
211 |
__asm__ __volatile__ ( |
212 |
"movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t" |
213 |
"movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t" |
214 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
215 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t" |
216 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
217 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t" |
218 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
219 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t" |
220 |
"movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t" |
221 |
"addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t" |
222 |
"movss %%xmm2,(%0) # update 'Pos'\n\t" |
223 |
"shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t" |
224 |
"cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t" |
225 |
"shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t" |
226 |
"shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t" |
227 |
"cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t" |
228 |
"cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t" |
229 |
"shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t" |
230 |
"cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t" |
231 |
"subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t" |
232 |
: |
233 |
: "r" (Pos), /* %0 */ |
234 |
"m" (Pitch) /* %1 */ |
235 |
: "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */ |
236 |
"%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */ |
237 |
"mm4", /* holds integer position of sample 0-1 at the end */ |
238 |
"mm5", /* holds integer position of sample 2-3 at the end */ |
239 |
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
240 |
); |
241 |
/* get sample values of pSrc[pos_int] and pSrc[pos_int+1] of the 4 samples */ |
242 |
__asm__ __volatile__ ( |
243 |
"movd %%mm4,%%edi # sample position of sample 0\n\t" |
244 |
"psrlq $32,%%mm4 # mm4 >> 32\n\t" |
245 |
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 0)\n\t" |
246 |
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 0+1)\n\t" |
247 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
248 |
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
249 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
250 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
251 |
"movd %%mm4,%%edi # sample position of sample 1\n\t" |
252 |
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 1)\n\t" |
253 |
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 1+1)\n\t" |
254 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
255 |
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
256 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
257 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
258 |
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
259 |
"psrlq $32,%%mm5 # mm5 >> 32\n\t" |
260 |
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 2)\n\t" |
261 |
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 2+1)\n\t" |
262 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
263 |
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
264 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
265 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
266 |
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
267 |
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 3)\n\t" |
268 |
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 3+1)\n\t" |
269 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
270 |
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
271 |
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
272 |
"shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t" |
273 |
: /* no output */ |
274 |
: "S" (pSrc) /* %0 - sample read position */ |
275 |
: "%eax", "%ecx", /*"%edx",*/ "%edi", |
276 |
"%xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */ |
277 |
"%xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */ |
278 |
"mm4", /* holds integer position of sample 0-1 at the end */ |
279 |
"mm5", /* holds integer position of sample 2-3 at the end */ |
280 |
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
281 |
); |
282 |
/* linear interpolation of the 4 samples simultaniously */ |
283 |
__asm__ __volatile__ ( |
284 |
"subps %%xmm2,%%xmm3 # xmm3 = pSrc[pos_int+1] - pSrc[pos_int]\n\t" |
285 |
"mulps %%xmm0,%%xmm3 # xmm3 = pos_fract * (pSrc[pos_int+1] - pSrc[pos_int])\n\t" |
286 |
"addps %%xmm3,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]))\n\t" |
287 |
: /* no output */ |
288 |
: /* no input */ |
289 |
: "%xmm2" /* holds linear interpolated sample point (of all 4 samples) at the end */ |
290 |
); |
291 |
} |
292 |
|
293 |
// TODO: no support for cubic interpolation yet |
294 |
inline static void Interpolate4StepsStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
295 |
/* calculate playback position of each of the 4 samples by adding the associated pitch */ |
296 |
__asm__ __volatile__ ( |
297 |
"movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t" |
298 |
"movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t" |
299 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
300 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t" |
301 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
302 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t" |
303 |
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
304 |
"addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t" |
305 |
"movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t" |
306 |
"addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t" |
307 |
"movss %%xmm2,(%0) # update 'Pos'\n\t" |
308 |
"shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t" |
309 |
"cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t" |
310 |
"shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t" |
311 |
"shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t" |
312 |
"cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t" |
313 |
"cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t" |
314 |
"shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t" |
315 |
"cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t" |
316 |
"subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t" |
317 |
: |
318 |
: "r" (Pos), /* %0 */ |
319 |
"m" (Pitch) /* %1 */ |
320 |
: "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */ |
321 |
"%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */ |
322 |
"mm4", /* holds integer position of sample 0-1 at the end */ |
323 |
"mm5", /* holds integer position of sample 2-3 at the end */ |
324 |
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
325 |
); |
326 |
|
327 |
/* get sample values of pSrc[pos_int], pSrc[pos_int+1], pSrc[pos_int+2] and pSrc[pos_int+3] of the 4 samples */ |
328 |
__asm__ __volatile__ ( |
329 |
"xorl %%eax,%%eax # clear eax\n\t" |
330 |
"xorl %%edx,%%edx # clear edx\n\t" |
331 |
"movd %%mm4,%%edi # sample position of sample 0\n\t" |
332 |
"psrlq $32,%%mm4 # mm4 >> 32\n\t" |
333 |
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0)\n\t" |
334 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
335 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
336 |
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+1)\n\t" |
337 |
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
338 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
339 |
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0+2)\n\t" |
340 |
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
341 |
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
342 |
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+3)\n\t" |
343 |
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
344 |
"movd %%mm4,%%edi # sample position of sample 1\n\t" |
345 |
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
346 |
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1)\n\t" |
347 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
348 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
349 |
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+1)\n\t" |
350 |
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
351 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
352 |
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1+2)\n\t" |
353 |
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
354 |
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
355 |
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+3)\n\t" |
356 |
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
357 |
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
358 |
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
359 |
"psrlq $32,%%mm5 # mm5 >> 32\n\t" |
360 |
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2)\n\t" |
361 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
362 |
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
363 |
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+1)\n\t" |
364 |
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
365 |
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
366 |
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2+2)\n\t" |
367 |
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
368 |
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
369 |
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+3)\n\t" |
370 |
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
371 |
"movd %%mm5,%%edi # sample position of sample 3\n\t" |
372 |
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
373 |
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3)\n\t" |
374 |
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
375 |
"shufps $0x1b, %%xmm2, %%xmm2 # shift up\n\t" |
376 |
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+1)\n\t" |
377 |
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
378 |
"shufps $0x1b, %%xmm3, %%xmm3 # shift up\n\t" |
379 |
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3+2)\n\t" |
380 |
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
381 |
"shufps $0x1b, %%xmm4, %%xmm4 # swap to correct order\n\t" |
382 |
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+3)\n\t" |
383 |
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
384 |
"shufps $0x1b, %%xmm5, %%xmm5 # swap to correct order\n\t" |
385 |
: /* no output */ |
386 |
: "S" (pSrc) /* %0 - sample read position */ |
387 |
: "%eax", "%edx", "%edi", |
388 |
"xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */ |
389 |
"xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */ |
390 |
"xmm4", /* holds pSrc[int_pos+2] of the 4 samples at the end */ |
391 |
"xmm5", /* holds pSrc[int_pos+3] of the 4 samples at the end */ |
392 |
"mm4", /* holds integer position of sample 0-1 at the end */ |
393 |
"mm5", /* holds integer position of sample 2-3 at the end */ |
394 |
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
395 |
); |
396 |
/* linear interpolation of the 4 samples (left & right channel) simultaniously */ |
397 |
__asm__ __volatile__ ( |
398 |
"subps %%xmm2,%%xmm4 # xmm4 = pSrc[pos_int+2] - pSrc[pos_int] (left channel)\n\t" |
399 |
"mulps %%xmm0,%%xmm4 # xmm4 = pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]) (left channel)\n\t" |
400 |
"addps %%xmm4,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+2] - pSrc[pos_int])) (left channel)\n\t" |
401 |
"subps %%xmm3,%%xmm5 # xmm5 = pSrc[pos_int+3] - pSrc[pos_int+1] (right channel)\n\t" |
402 |
"mulps %%xmm0,%%xmm5 # xmm5 = pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]) (right channel)\n\t" |
403 |
"addps %%xmm5,%%xmm3 # xmm3 = pSrc[pos_int+1] + (pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1])) (right channel)\n\t" |
404 |
: /* no output */ |
405 |
: /* no input */ |
406 |
: "%xmm2", /* holds linear interpolated sample of left channel (of all 4 samples) at the end */ |
407 |
"%xmm3" /* holds linear interpolated sample of right channel (of all 4 samples) at the end */ |
408 |
); |
409 |
} |
410 |
#endif // ARCH_X86 |
411 |
}; |
412 |
|
413 |
} // namespace LinuxSampler |
414 |
|
415 |
#endif // __LS_RESAMPLER_H__ |