/[svn]/linuxsampler/trunk/src/engines/common/Resampler.h
ViewVC logotype

Contents of /linuxsampler/trunk/src/engines/common/Resampler.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 563 - (show annotations) (download) (as text)
Sun May 22 20:43:32 2005 UTC (18 years, 11 months ago) by schoenebeck
File MIME type: text/x-c++hdr
File size: 27239 byte(s)
* (re)implemented voice stealing algorithm "oldestvoiceonkey" and made it
  the default voice stealing algorithm
* added new LSCP command "GET SERVER INFO" which currently returns
  description and version about this sampler
* added some API documentation comments
* minor cleanup

1 /***************************************************************************
2 * *
3 * LinuxSampler - modular, streaming capable sampler *
4 * *
5 * Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the Free Software *
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
20 * MA 02111-1307 USA *
21 ***************************************************************************/
22
23 #ifndef __LS_RESAMPLER_H__
24 #define __LS_RESAMPLER_H__
25
26 #include "../../common/global.h"
27
28 // TODO: cubic interpolation is not yet supported by the MMX/SSE(1) version though
29 #ifndef USE_LINEAR_INTERPOLATION
30 # define USE_LINEAR_INTERPOLATION 1 ///< set to 0 if you prefer cubic interpolation (slower, better quality)
31 #endif
32
33 namespace LinuxSampler {
34
35 /** @brief Stereo sample point
36 *
37 * Encapsulates one stereo sample point, thus signal value for one
38 * sample point for left and right channel.
39 */
40 struct stereo_sample_t {
41 float left;
42 float right;
43 };
44
45 /** @brief Resampler Template
46 *
47 * This template provides pure C++ and MMX/SSE assembly implementations
48 * for linear and cubic interpolation for pitching a mono or stereo
49 * input signal.
50 */
51 template<bool INTERPOLATE>
52 class Resampler {
53 public:
54 inline static float GetNextSampleMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
55 if (INTERPOLATE) return Interpolate1StepMonoCPP(pSrc, Pos, Pitch);
56 else { // no pitch, so no interpolation necessary
57 int pos_int = (int) *Pos;
58 *Pos += 1.0;
59 return pSrc [pos_int];
60 }
61 }
62
63 inline static stereo_sample_t GetNextSampleStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
64 if (INTERPOLATE) return Interpolate1StepStereoCPP(pSrc, Pos, Pitch);
65 else { // no pitch, so no interpolation necessary
66 int pos_int = (int) *Pos;
67 pos_int <<= 1;
68 *Pos += 1.0;
69 stereo_sample_t samplePoint;
70 samplePoint.left = pSrc[pos_int];
71 samplePoint.right = pSrc[pos_int+1];
72 return samplePoint;
73 }
74 }
75
76 #if ARCH_X86
77 inline static void GetNext4SamplesMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
78 if (INTERPOLATE) Interpolate4StepsMonoMMXSSE(pSrc, Pos, Pitch);
79 else { // no pitch, so no interpolation necessary
80 const float __4f = 4.0f;
81 __asm__ __volatile__ (
82 "movss (%1), %%xmm5 # load Pos\n\t"
83 "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
84 "addss %2, %%xmm5 # Pos += 4.0f\n\t"
85 "movswl (%0,%%edi,2), %%eax # load sample 0\n\t"
86 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
87 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
88 "movswl 2(%0,%%edi,2), %%edx # load sample 1\n\t"
89 "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
90 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
91 "movss %%xmm5, (%1) # update Pos\n\t"
92 "movswl 4(%0,%%edi,2), %%eax # load sample 2\n\t"
93 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
94 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
95 "movswl 6(%0,%%edi,2), %%edx # load sample 3\n\t"
96 "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
97 "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
98 :: "r" (pSrc), "r" (Pos), "m" (__4f)
99 : "%eax", "%edx", "%edi"
100 );
101 }
102 }
103
104 inline static void GetNext4SamplesStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
105 if (INTERPOLATE) {
106 Interpolate4StepsStereoMMXSSE(pSrc, Pos, Pitch);
107 //EMMS;
108 } else { // no pitch, so no interpolation necessary
109 const float __4f = 4.0f;
110 __asm__ __volatile__ (
111 "movss (%1), %%xmm5 # load Pos\n\t"
112 "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
113 "addss %2, %%xmm5 # Pos += 4.0f\n\t"
114 "movswl (%0, %%edi,4), %%eax # load sample 0 (left)\n\t"
115 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
116 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
117 "movss %%xmm5, (%1) # update Pos\n\t"
118 "movswl 2(%0, %%edi,4), %%edx # load sample 0 (left)\n\t"
119 "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
120 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
121 "movswl 4(%0, %%edi,4), %%eax # load sample 1 (left)\n\t"
122 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
123 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
124 "movswl 6(%0, %%edi,4), %%edx # load sample 1 (right)\n\t"
125 "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
126 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
127 "movswl 8(%0, %%edi,4), %%eax # load sample 2 (left)\n\t"
128 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
129 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
130 "movswl 10(%0, %%edi,4), %%edx # load sample 2 (right)\n\t"
131 "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
132 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
133 "movswl 12(%0, %%edi,4), %%eax # load sample 3 (left)\n\t"
134 "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
135 "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
136 "movswl 14(%0, %%edi,4), %%edx # load sample 3 (right)\n\t"
137 "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
138 "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
139 :: "r" (pSrc), "r" (Pos), "m" (__4f)
140 : "%eax", "%edx", "%edi"
141 );
142 }
143 }
144 #endif // ARCH_X86
145
146 protected:
147
148 inline static float Interpolate1StepMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
149 int pos_int = (int) *Pos; // integer position
150 float pos_fract = *Pos - pos_int; // fractional part of position
151
152 #if USE_LINEAR_INTERPOLATION
153 float samplePoint = pSrc[pos_int] + pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]);
154 #else // polynomial interpolation
155 float xm1 = pSrc[pos_int];
156 float x0 = pSrc[pos_int+1];
157 float x1 = pSrc[pos_int+2];
158 float x2 = pSrc[pos_int+3];
159 float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
160 float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
161 float c = (x1 - xm1) * 0.5f;
162 float samplePoint = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
163 #endif // USE_LINEAR_INTERPOLATION
164
165 *Pos += Pitch;
166 return samplePoint;
167 }
168
169 inline static stereo_sample_t Interpolate1StepStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
170 int pos_int = (int) *Pos; // integer position
171 float pos_fract = *Pos - pos_int; // fractional part of position
172 pos_int <<= 1;
173
174 stereo_sample_t samplePoint;
175
176 #if USE_LINEAR_INTERPOLATION
177 // left channel
178 samplePoint.left = pSrc[pos_int] + pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]);
179 // right channel
180 samplePoint.right = pSrc[pos_int+1] + pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]);
181 #else // polynomial interpolation
182 // calculate left channel
183 float xm1 = pSrc[pos_int];
184 float x0 = pSrc[pos_int+2];
185 float x1 = pSrc[pos_int+4];
186 float x2 = pSrc[pos_int+6];
187 float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
188 float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
189 float c = (x1 - xm1) * 0.5f;
190 samplePoint.left = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
191
192 //calculate right channel
193 xm1 = pSrc[pos_int+1];
194 x0 = pSrc[pos_int+3];
195 x1 = pSrc[pos_int+5];
196 x2 = pSrc[pos_int+7];
197 a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
198 b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
199 c = (x1 - xm1) * 0.5f;
200 samplePoint.right = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
201 #endif // USE_LINEAR_INTERPOLATION
202
203 *Pos += Pitch;
204 return samplePoint;
205 }
206
207 #if ARCH_X86
208 // TODO: no support for cubic interpolation yet
209 inline static void Interpolate4StepsMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
210 /* calculate playback position of each of the 4 samples by adding the associated pitch */
211 __asm__ __volatile__ (
212 "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
213 "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
214 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
215 "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
216 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
217 "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
218 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
219 "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
220 "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
221 "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
222 "movss %%xmm2,(%0) # update 'Pos'\n\t"
223 "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
224 "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
225 "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
226 "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
227 "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
228 "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
229 "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
230 "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
231 "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
232 :
233 : "r" (Pos), /* %0 */
234 "m" (Pitch) /* %1 */
235 : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
236 "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
237 "mm4", /* holds integer position of sample 0-1 at the end */
238 "mm5", /* holds integer position of sample 2-3 at the end */
239 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
240 );
241 /* get sample values of pSrc[pos_int] and pSrc[pos_int+1] of the 4 samples */
242 __asm__ __volatile__ (
243 "movd %%mm4,%%edi # sample position of sample 0\n\t"
244 "psrlq $32,%%mm4 # mm4 >> 32\n\t"
245 "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 0)\n\t"
246 "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 0+1)\n\t"
247 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
248 "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
249 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
250 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
251 "movd %%mm4,%%edi # sample position of sample 1\n\t"
252 "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 1)\n\t"
253 "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 1+1)\n\t"
254 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
255 "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
256 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
257 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
258 "movd %%mm5,%%edi # sample position of sample 2\n\t"
259 "psrlq $32,%%mm5 # mm5 >> 32\n\t"
260 "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 2)\n\t"
261 "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 2+1)\n\t"
262 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
263 "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
264 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
265 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
266 "movd %%mm5,%%edi # sample position of sample 2\n\t"
267 "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 3)\n\t"
268 "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 3+1)\n\t"
269 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
270 "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
271 "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
272 "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
273 : /* no output */
274 : "S" (pSrc) /* %0 - sample read position */
275 : "%eax", "%ecx", /*"%edx",*/ "%edi",
276 "%xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
277 "%xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
278 "mm4", /* holds integer position of sample 0-1 at the end */
279 "mm5", /* holds integer position of sample 2-3 at the end */
280 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
281 );
282 /* linear interpolation of the 4 samples simultaniously */
283 __asm__ __volatile__ (
284 "subps %%xmm2,%%xmm3 # xmm3 = pSrc[pos_int+1] - pSrc[pos_int]\n\t"
285 "mulps %%xmm0,%%xmm3 # xmm3 = pos_fract * (pSrc[pos_int+1] - pSrc[pos_int])\n\t"
286 "addps %%xmm3,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]))\n\t"
287 : /* no output */
288 : /* no input */
289 : "%xmm2" /* holds linear interpolated sample point (of all 4 samples) at the end */
290 );
291 }
292
293 // TODO: no support for cubic interpolation yet
294 inline static void Interpolate4StepsStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
295 /* calculate playback position of each of the 4 samples by adding the associated pitch */
296 __asm__ __volatile__ (
297 "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
298 "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
299 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
300 "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
301 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
302 "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
303 "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
304 "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
305 "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
306 "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
307 "movss %%xmm2,(%0) # update 'Pos'\n\t"
308 "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
309 "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
310 "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
311 "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
312 "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
313 "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
314 "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
315 "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
316 "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
317 :
318 : "r" (Pos), /* %0 */
319 "m" (Pitch) /* %1 */
320 : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
321 "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
322 "mm4", /* holds integer position of sample 0-1 at the end */
323 "mm5", /* holds integer position of sample 2-3 at the end */
324 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
325 );
326
327 /* get sample values of pSrc[pos_int], pSrc[pos_int+1], pSrc[pos_int+2] and pSrc[pos_int+3] of the 4 samples */
328 __asm__ __volatile__ (
329 "xorl %%eax,%%eax # clear eax\n\t"
330 "xorl %%edx,%%edx # clear edx\n\t"
331 "movd %%mm4,%%edi # sample position of sample 0\n\t"
332 "psrlq $32,%%mm4 # mm4 >> 32\n\t"
333 "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0)\n\t"
334 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
335 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
336 "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+1)\n\t"
337 "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
338 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
339 "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0+2)\n\t"
340 "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
341 "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
342 "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+3)\n\t"
343 "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
344 "movd %%mm4,%%edi # sample position of sample 1\n\t"
345 "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
346 "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1)\n\t"
347 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
348 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
349 "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+1)\n\t"
350 "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
351 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
352 "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1+2)\n\t"
353 "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
354 "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
355 "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+3)\n\t"
356 "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
357 "movd %%mm5,%%edi # sample position of sample 2\n\t"
358 "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
359 "psrlq $32,%%mm5 # mm5 >> 32\n\t"
360 "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2)\n\t"
361 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
362 "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
363 "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+1)\n\t"
364 "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
365 "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
366 "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2+2)\n\t"
367 "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
368 "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
369 "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+3)\n\t"
370 "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
371 "movd %%mm5,%%edi # sample position of sample 3\n\t"
372 "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
373 "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3)\n\t"
374 "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
375 "shufps $0x1b, %%xmm2, %%xmm2 # shift up\n\t"
376 "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+1)\n\t"
377 "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
378 "shufps $0x1b, %%xmm3, %%xmm3 # shift up\n\t"
379 "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3+2)\n\t"
380 "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
381 "shufps $0x1b, %%xmm4, %%xmm4 # swap to correct order\n\t"
382 "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+3)\n\t"
383 "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
384 "shufps $0x1b, %%xmm5, %%xmm5 # swap to correct order\n\t"
385 : /* no output */
386 : "S" (pSrc) /* %0 - sample read position */
387 : "%eax", "%edx", "%edi",
388 "xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
389 "xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
390 "xmm4", /* holds pSrc[int_pos+2] of the 4 samples at the end */
391 "xmm5", /* holds pSrc[int_pos+3] of the 4 samples at the end */
392 "mm4", /* holds integer position of sample 0-1 at the end */
393 "mm5", /* holds integer position of sample 2-3 at the end */
394 "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
395 );
396 /* linear interpolation of the 4 samples (left & right channel) simultaniously */
397 __asm__ __volatile__ (
398 "subps %%xmm2,%%xmm4 # xmm4 = pSrc[pos_int+2] - pSrc[pos_int] (left channel)\n\t"
399 "mulps %%xmm0,%%xmm4 # xmm4 = pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]) (left channel)\n\t"
400 "addps %%xmm4,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+2] - pSrc[pos_int])) (left channel)\n\t"
401 "subps %%xmm3,%%xmm5 # xmm5 = pSrc[pos_int+3] - pSrc[pos_int+1] (right channel)\n\t"
402 "mulps %%xmm0,%%xmm5 # xmm5 = pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]) (right channel)\n\t"
403 "addps %%xmm5,%%xmm3 # xmm3 = pSrc[pos_int+1] + (pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1])) (right channel)\n\t"
404 : /* no output */
405 : /* no input */
406 : "%xmm2", /* holds linear interpolated sample of left channel (of all 4 samples) at the end */
407 "%xmm3" /* holds linear interpolated sample of right channel (of all 4 samples) at the end */
408 );
409 }
410 #endif // ARCH_X86
411 };
412
413 } // namespace LinuxSampler
414
415 #endif // __LS_RESAMPLER_H__

  ViewVC Help
Powered by ViewVC