/[svn]/linuxsampler/trunk/src/engines/common/Resampler.h
ViewVC logotype

Contents of /linuxsampler/trunk/src/engines/common/Resampler.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2377 - (show annotations) (download) (as text)
Thu Oct 4 18:16:26 2012 UTC (11 years, 5 months ago) by schoenebeck
File MIME type: text/x-c++hdr
File size: 28731 byte(s)
* Various "const" and "restrict" optimizations.

1 /***************************************************************************
2 * *
3 * LinuxSampler - modular, streaming capable sampler *
4 * *
5 * Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6 * Copyright (C) 2005 - 2012 Christian Schoenebeck *
7 * *
8 * This program is free software; you can redistribute it and/or modify *
9 * it under the terms of the GNU General Public License as published by *
10 * the Free Software Foundation; either version 2 of the License, or *
11 * (at your option) any later version. *
12 * *
13 * This program is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16 * GNU General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU General Public License *
19 * along with this program; if not, write to the Free Software *
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21 * MA 02111-1307 USA *
22 ***************************************************************************/
23
24 // Note: the assembly code is currently disabled, as it doesn't fit into
25 // the new synthesis core introduced by LS 0.4.0
26
27 #ifndef __LS_RESAMPLER_H__
28 #define __LS_RESAMPLER_H__
29
30 #include "../../common/global_private.h"
31
32 // TODO: cubic interpolation is not yet supported by the MMX/SSE(1) version though
33 #ifndef USE_LINEAR_INTERPOLATION
34 # define USE_LINEAR_INTERPOLATION 1 ///< set to 0 if you prefer cubic interpolation (slower, better quality)
35 #endif
36
37 namespace LinuxSampler {
38
39 /** @brief Stereo sample point
40 *
41 * Encapsulates one stereo sample point, thus signal value for one
42 * sample point for left and right channel.
43 */
44 struct stereo_sample_t {
45 float left;
46 float right;
47 };
48
49 /** @brief Resampler Template
50 *
51 * This template provides pure C++ and MMX/SSE assembly implementations
52 * for linear and cubic interpolation for pitching a mono or stereo
53 * input signal.
54 */
55 template<bool INTERPOLATE,bool BITDEPTH24>
56 class Resampler {
57 public:
58 inline static float GetNextSampleMonoCPP(sample_t* __restrict pSrc, double* __restrict Pos, float& Pitch) {
59 if (INTERPOLATE) return Interpolate1StepMonoCPP(pSrc, Pos, Pitch);
60 else { // no pitch, so no interpolation necessary
61 int pos_int = (int) *Pos;
62 *Pos += 1.0;
63 return pSrc [pos_int];
64 }
65 }
66
67 inline static stereo_sample_t GetNextSampleStereoCPP(sample_t* __restrict pSrc, double* __restrict Pos, float& Pitch) {
68 if (INTERPOLATE) return Interpolate1StepStereoCPP(pSrc, Pos, Pitch);
69 else { // no pitch, so no interpolation necessary
70 int pos_int = (int) *Pos;
71 pos_int <<= 1;
72 *Pos += 1.0;
73 stereo_sample_t samplePoint;
74 samplePoint.left = pSrc[pos_int];
75 samplePoint.right = pSrc[pos_int+1];
76 return samplePoint;
77 }
78 }
79
80 #if 0 // CONFIG_ASM && ARCH_X86
81 inline static void GetNext4SamplesMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
82 if (INTERPOLATE) Interpolate4StepsMonoMMXSSE(pSrc, Pos, Pitch);
83 else { // no pitch, so no interpolation necessary
84 const float __4f = 4.0f;
85 __asm__ __volatile__ (
86 "movss (%1), %%xmm5 # load Pos\n\t"
87 "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
88 "addss %2, %%xmm5 # Pos += 4.0f\n\t"
89 "movswl (%0,%%edi,2), %%eax # load sample 0\n\t"
90 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
91 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
92 "movswl 2(%0,%%edi,2), %%edx # load sample 1\n\t"
93 "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
94 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
95 "movss %%xmm5, (%1) # update Pos\n\t"
96 "movswl 4(%0,%%edi,2), %%eax # load sample 2\n\t"
97 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
98 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
99 "movswl 6(%0,%%edi,2), %%edx # load sample 3\n\t"
100 "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
101 "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
102 :: "r" (pSrc), "r" (Pos), "m" (__4f)
103 : "%eax", "%edx", "%edi"
104 );
105 }
106 }
107
108 inline static void GetNext4SamplesStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
109 if (INTERPOLATE) {
110 Interpolate4StepsStereoMMXSSE(pSrc, Pos, Pitch);
111 //EMMS;
112 } else { // no pitch, so no interpolation necessary
113 const float __4f = 4.0f;
114 __asm__ __volatile__ (
115 "movss (%1), %%xmm5 # load Pos\n\t"
116 "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
117 "addss %2, %%xmm5 # Pos += 4.0f\n\t"
118 "movswl (%0, %%edi,4), %%eax # load sample 0 (left)\n\t"
119 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
120 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
121 "movss %%xmm5, (%1) # update Pos\n\t"
122 "movswl 2(%0, %%edi,4), %%edx # load sample 0 (left)\n\t"
123 "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
124 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
125 "movswl 4(%0, %%edi,4), %%eax # load sample 1 (left)\n\t"
126 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
127 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
128 "movswl 6(%0, %%edi,4), %%edx # load sample 1 (right)\n\t"
129 "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
130 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
131 "movswl 8(%0, %%edi,4), %%eax # load sample 2 (left)\n\t"
132 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
133 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
134 "movswl 10(%0, %%edi,4), %%edx # load sample 2 (right)\n\t"
135 "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
136 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
137 "movswl 12(%0, %%edi,4), %%eax # load sample 3 (left)\n\t"
138 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
139 "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
140 "movswl 14(%0, %%edi,4), %%edx # load sample 3 (right)\n\t"
141 "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
142 "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
143 :: "r" (pSrc), "r" (Pos), "m" (__4f)
144 : "%eax", "%edx", "%edi"
145 );
146 }
147 }
148 #endif // CONFIG_ASM && ARCH_X86
149
150 protected:
151
152 inline static int32_t getSample(sample_t* __restrict src, int pos) {
153 if (BITDEPTH24) {
154 pos *= 3;
155 #if WORDS_BIGENDIAN
156 unsigned char* p = (unsigned char*)src;
157 return p[pos] << 8 | p[pos + 1] << 16 | p[pos + 2] << 24;
158 #else
159 // 24bit read optimization:
160 // a misaligned 32bit read and subquent 8 bit shift is faster (on x86) than reading 3 single bytes and shifting them
161 return (*((int32_t *)(&((char *)(src))[pos])))<<8;
162 #endif
163 } else {
164 return src[pos];
165 }
166 }
167
168 inline static float Interpolate1StepMonoCPP(sample_t* __restrict pSrc, double* __restrict Pos, float& Pitch) {
169 int pos_int = (int) *Pos; // integer position
170 float pos_fract = *Pos - pos_int; // fractional part of position
171
172 #if USE_LINEAR_INTERPOLATION
173 int x1 = getSample(pSrc, pos_int);
174 int x2 = getSample(pSrc, pos_int + 1);
175 float samplePoint = (x1 + pos_fract * (x2 - x1));
176 #else // polynomial interpolation
177 float xm1 = getSample(pSrc, pos_int);
178 float x0 = getSample(pSrc, pos_int + 1);
179 float x1 = getSample(pSrc, pos_int + 2);
180 float x2 = getSample(pSrc, pos_int + 3);
181 float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
182 float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
183 float c = (x1 - xm1) * 0.5f;
184 float samplePoint = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
185 #endif // USE_LINEAR_INTERPOLATION
186
187 *Pos += Pitch;
188 return samplePoint;
189 }
190
191 inline static stereo_sample_t Interpolate1StepStereoCPP(sample_t* __restrict pSrc, double* __restrict Pos, float& Pitch) {
192 int pos_int = (int) *Pos; // integer position
193 float pos_fract = *Pos - pos_int; // fractional part of position
194 pos_int <<= 1;
195
196 stereo_sample_t samplePoint;
197
198 #if USE_LINEAR_INTERPOLATION
199 // left channel
200 int x1 = getSample(pSrc, pos_int);
201 int x2 = getSample(pSrc, pos_int + 2);
202 samplePoint.left = (x1 + pos_fract * (x2 - x1));
203 // right channel
204 x1 = getSample(pSrc, pos_int + 1);
205 x2 = getSample(pSrc, pos_int + 3);
206 samplePoint.right = (x1 + pos_fract * (x2 - x1));
207 #else // polynomial interpolation
208 // calculate left channel
209 float xm1 = getSample(pSrc, pos_int);
210 float x0 = getSample(pSrc, pos_int + 2);
211 float x1 = getSample(pSrc, pos_int + 4);
212 float x2 = getSample(pSrc, pos_int + 6);
213 float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
214 float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
215 float c = (x1 - xm1) * 0.5f;
216 samplePoint.left = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
217
218 //calculate right channel
219 xm1 = getSample(pSrc, pos_int + 1);
220 x0 = getSample(pSrc, pos_int + 3);
221 x1 = getSample(pSrc, pos_int + 5);
222 x2 = getSample(pSrc, pos_int + 7);
223 a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
224 b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
225 c = (x1 - xm1) * 0.5f;
226 samplePoint.right = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
227 #endif // USE_LINEAR_INTERPOLATION
228
229 *Pos += Pitch;
230 return samplePoint;
231 }
232
233 #if 0 // CONFIG_ASM && ARCH_X86
234 // TODO: no support for cubic interpolation yet
235 inline static void Interpolate4StepsMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
236 /* calculate playback position of each of the 4 samples by adding the associated pitch */
237 __asm__ __volatile__ (
238 "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
239 "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
240 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
241 "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
242 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
243 "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
244 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
245 "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
246 "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
247 "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
248 "movss %%xmm2,(%0) # update 'Pos'\n\t"
249 "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
250 "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
251 "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
252 "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
253 "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
254 "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
255 "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
256 "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
257 "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
258 :
259 : "r" (Pos), /* %0 */
260 "m" (Pitch) /* %1 */
261 : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
262 "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
263 "mm4", /* holds integer position of sample 0-1 at the end */
264 "mm5", /* holds integer position of sample 2-3 at the end */
265 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
266 );
267 /* get sample values of pSrc[pos_int] and pSrc[pos_int+1] of the 4 samples */
268 __asm__ __volatile__ (
269 "movd %%mm4,%%edi # sample position of sample 0\n\t"
270 "psrlq $32,%%mm4 # mm4 >> 32\n\t"
271 "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 0)\n\t"
272 "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 0+1)\n\t"
273 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
274 "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
275 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
276 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
277 "movd %%mm4,%%edi # sample position of sample 1\n\t"
278 "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 1)\n\t"
279 "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 1+1)\n\t"
280 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
281 "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
282 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
283 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
284 "movd %%mm5,%%edi # sample position of sample 2\n\t"
285 "psrlq $32,%%mm5 # mm5 >> 32\n\t"
286 "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 2)\n\t"
287 "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 2+1)\n\t"
288 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
289 "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
290 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
291 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
292 "movd %%mm5,%%edi # sample position of sample 2\n\t"
293 "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 3)\n\t"
294 "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 3+1)\n\t"
295 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
296 "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
297 "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
298 "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
299 : /* no output */
300 : "S" (pSrc) /* %0 - sample read position */
301 : "%eax", "%ecx", /*"%edx",*/ "%edi",
302 "%xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
303 "%xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
304 "mm4", /* holds integer position of sample 0-1 at the end */
305 "mm5", /* holds integer position of sample 2-3 at the end */
306 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
307 );
308 /* linear interpolation of the 4 samples simultaniously */
309 __asm__ __volatile__ (
310 "subps %%xmm2,%%xmm3 # xmm3 = pSrc[pos_int+1] - pSrc[pos_int]\n\t"
311 "mulps %%xmm0,%%xmm3 # xmm3 = pos_fract * (pSrc[pos_int+1] - pSrc[pos_int])\n\t"
312 "addps %%xmm3,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]))\n\t"
313 : /* no output */
314 : /* no input */
315 : "%xmm2" /* holds linear interpolated sample point (of all 4 samples) at the end */
316 );
317 }
318
319 // TODO: no support for cubic interpolation yet
320 inline static void Interpolate4StepsStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
321 /* calculate playback position of each of the 4 samples by adding the associated pitch */
322 __asm__ __volatile__ (
323 "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
324 "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
325 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
326 "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
327 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
328 "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
329 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
330 "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
331 "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
332 "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
333 "movss %%xmm2,(%0) # update 'Pos'\n\t"
334 "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
335 "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
336 "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
337 "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
338 "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
339 "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
340 "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
341 "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
342 "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
343 :
344 : "r" (Pos), /* %0 */
345 "m" (Pitch) /* %1 */
346 : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
347 "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
348 "mm4", /* holds integer position of sample 0-1 at the end */
349 "mm5", /* holds integer position of sample 2-3 at the end */
350 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
351 );
352
353 /* get sample values of pSrc[pos_int], pSrc[pos_int+1], pSrc[pos_int+2] and pSrc[pos_int+3] of the 4 samples */
354 __asm__ __volatile__ (
355 "xorl %%eax,%%eax # clear eax\n\t"
356 "xorl %%edx,%%edx # clear edx\n\t"
357 "movd %%mm4,%%edi # sample position of sample 0\n\t"
358 "psrlq $32,%%mm4 # mm4 >> 32\n\t"
359 "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0)\n\t"
360 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
361 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
362 "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+1)\n\t"
363 "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
364 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
365 "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0+2)\n\t"
366 "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
367 "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
368 "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+3)\n\t"
369 "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
370 "movd %%mm4,%%edi # sample position of sample 1\n\t"
371 "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
372 "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1)\n\t"
373 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
374 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
375 "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+1)\n\t"
376 "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
377 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
378 "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1+2)\n\t"
379 "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
380 "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
381 "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+3)\n\t"
382 "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
383 "movd %%mm5,%%edi # sample position of sample 2\n\t"
384 "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
385 "psrlq $32,%%mm5 # mm5 >> 32\n\t"
386 "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2)\n\t"
387 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
388 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
389 "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+1)\n\t"
390 "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
391 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
392 "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2+2)\n\t"
393 "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
394 "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
395 "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+3)\n\t"
396 "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
397 "movd %%mm5,%%edi # sample position of sample 3\n\t"
398 "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
399 "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3)\n\t"
400 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
401 "shufps $0x1b, %%xmm2, %%xmm2 # shift up\n\t"
402 "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+1)\n\t"
403 "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
404 "shufps $0x1b, %%xmm3, %%xmm3 # shift up\n\t"
405 "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3+2)\n\t"
406 "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
407 "shufps $0x1b, %%xmm4, %%xmm4 # swap to correct order\n\t"
408 "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+3)\n\t"
409 "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
410 "shufps $0x1b, %%xmm5, %%xmm5 # swap to correct order\n\t"
411 : /* no output */
412 : "S" (pSrc) /* %0 - sample read position */
413 : "%eax", "%edx", "%edi",
414 "xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
415 "xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
416 "xmm4", /* holds pSrc[int_pos+2] of the 4 samples at the end */
417 "xmm5", /* holds pSrc[int_pos+3] of the 4 samples at the end */
418 "mm4", /* holds integer position of sample 0-1 at the end */
419 "mm5", /* holds integer position of sample 2-3 at the end */
420 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
421 );
422 /* linear interpolation of the 4 samples (left & right channel) simultaniously */
423 __asm__ __volatile__ (
424 "subps %%xmm2,%%xmm4 # xmm4 = pSrc[pos_int+2] - pSrc[pos_int] (left channel)\n\t"
425 "mulps %%xmm0,%%xmm4 # xmm4 = pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]) (left channel)\n\t"
426 "addps %%xmm4,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+2] - pSrc[pos_int])) (left channel)\n\t"
427 "subps %%xmm3,%%xmm5 # xmm5 = pSrc[pos_int+3] - pSrc[pos_int+1] (right channel)\n\t"
428 "mulps %%xmm0,%%xmm5 # xmm5 = pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]) (right channel)\n\t"
429 "addps %%xmm5,%%xmm3 # xmm3 = pSrc[pos_int+1] + (pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1])) (right channel)\n\t"
430 : /* no output */
431 : /* no input */
432 : "%xmm2", /* holds linear interpolated sample of left channel (of all 4 samples) at the end */
433 "%xmm3" /* holds linear interpolated sample of right channel (of all 4 samples) at the end */
434 );
435 }
436 #endif // CONFIG_ASM && ARCH_X86
437 };
438
439 } // namespace LinuxSampler
440
441 #endif // __LS_RESAMPLER_H__

  ViewVC Help
Powered by ViewVC