/[svn]/linuxsampler/trunk/src/engines/common/Resampler.h
ViewVC logotype

Annotation of /linuxsampler/trunk/src/engines/common/Resampler.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 320 - (hide annotations) (download) (as text)
Mon Dec 13 00:53:16 2004 UTC (19 years, 4 months ago) by schoenebeck
File MIME type: text/x-c++hdr
File size: 26788 byte(s)
* introduced 'synthesis mode' to reduce the amount of code and conditionals
  for the current synthesis case in the main synthesis loop
* support for MMX and SSE(1) in the core synthesis algorithms (CPU feature
  detection at runtime, only x86 so far)

1 schoenebeck 320 /***************************************************************************
2     * *
3     * LinuxSampler - modular, streaming capable sampler *
4     * *
5     * Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6     * *
7     * This program is free software; you can redistribute it and/or modify *
8     * it under the terms of the GNU General Public License as published by *
9     * the Free Software Foundation; either version 2 of the License, or *
10     * (at your option) any later version. *
11     * *
12     * This program is distributed in the hope that it will be useful, *
13     * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15     * GNU General Public License for more details. *
16     * *
17     * You should have received a copy of the GNU General Public License *
18     * along with this program; if not, write to the Free Software *
19     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
20     * MA 02111-1307 USA *
21     ***************************************************************************/
22    
23     #ifndef __LS_RESAMPLER_H__
24     #define __LS_RESAMPLER_H__
25    
26     #include "../../common/global.h"
27    
28     // TODO: cubic interpolation is not yet supported by the MMX/SSE(1) version though
29     #ifndef USE_LINEAR_INTERPOLATION
30     # define USE_LINEAR_INTERPOLATION 1 ///< set to 0 if you prefer cubic interpolation (slower, better quality)
31     #endif
32    
33     namespace LinuxSampler {
34    
35     struct stereo_sample_t {
36     float left;
37     float right;
38     };
39    
40     template<bool INTERPOLATE>
41     class Resampler {
42     public:
43     inline static float GetNextSampleMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
44     if (INTERPOLATE) return Interpolate1StepMonoCPP(pSrc, Pos, Pitch);
45     else { // no pitch, so no interpolation necessary
46     int pos_int = (int) *Pos;
47     *Pos += 1.0;
48     return pSrc [pos_int];
49     }
50     }
51    
52     inline static stereo_sample_t GetNextSampleStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
53     if (INTERPOLATE) return Interpolate1StepStereoCPP(pSrc, Pos, Pitch);
54     else { // no pitch, so no interpolation necessary
55     int pos_int = (int) *Pos;
56     pos_int <<= 1;
57     *Pos += 1.0;
58     stereo_sample_t samplePoint;
59     samplePoint.left = pSrc[pos_int];
60     samplePoint.right = pSrc[pos_int+1];
61     return samplePoint;
62     }
63     }
64    
65     inline static void GetNext4SamplesMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
66     if (INTERPOLATE) Interpolate4StepsMonoMMXSSE(pSrc, Pos, Pitch);
67     else { // no pitch, so no interpolation necessary
68     const float __4f = 4.0f;
69     __asm__ __volatile__ (
70     "movss (%1), %%xmm5 # load Pos\n\t"
71     "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
72     "addss %2, %%xmm5 # Pos += 4.0f\n\t"
73     "movswl (%0,%%edi,2), %%eax # load sample 0\n\t"
74     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
75     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
76     "movswl 2(%0,%%edi,2), %%edx # load sample 1\n\t"
77     "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
78     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
79     "movss %%xmm5, (%1) # update Pos\n\t"
80     "movswl 4(%0,%%edi,2), %%eax # load sample 2\n\t"
81     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
82     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
83     "movswl 6(%0,%%edi,2), %%edx # load sample 3\n\t"
84     "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
85     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
86     :: "r" (pSrc), "r" (Pos), "m" (__4f)
87     : "%eax", "%edx", "%edi"
88     );
89     }
90     }
91    
92     inline static void GetNext4SamplesStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
93     if (INTERPOLATE) {
94     Interpolate4StepsStereoMMXSSE(pSrc, Pos, Pitch);
95     //EMMS;
96     } else { // no pitch, so no interpolation necessary
97     const float __4f = 4.0f;
98     __asm__ __volatile__ (
99     "movss (%1), %%xmm5 # load Pos\n\t"
100     "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
101     "addss %2, %%xmm5 # Pos += 4.0f\n\t"
102     "movswl (%0, %%edi,4), %%eax # load sample 0 (left)\n\t"
103     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
104     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
105     "movss %%xmm5, (%1) # update Pos\n\t"
106     "movswl 2(%0, %%edi,4), %%edx # load sample 0 (left)\n\t"
107     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
108     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
109     "movswl 4(%0, %%edi,4), %%eax # load sample 1 (left)\n\t"
110     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
111     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
112     "movswl 6(%0, %%edi,4), %%edx # load sample 1 (right)\n\t"
113     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
114     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
115     "movswl 8(%0, %%edi,4), %%eax # load sample 2 (left)\n\t"
116     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
117     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
118     "movswl 10(%0, %%edi,4), %%edx # load sample 2 (right)\n\t"
119     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
120     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
121     "movswl 12(%0, %%edi,4), %%eax # load sample 3 (left)\n\t"
122     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
123     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
124     "movswl 14(%0, %%edi,4), %%edx # load sample 3 (right)\n\t"
125     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
126     "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
127     :: "r" (pSrc), "r" (Pos), "m" (__4f)
128     : "%eax", "%edx", "%edi"
129     );
130     }
131     }
132    
133     protected:
134    
135     inline static float Interpolate1StepMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
136     int pos_int = (int) *Pos; // integer position
137     float pos_fract = *Pos - pos_int; // fractional part of position
138    
139     #if USE_LINEAR_INTERPOLATION
140     float samplePoint = pSrc[pos_int] + pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]);
141     #else // polynomial interpolation
142     float xm1 = pSrc[pos_int];
143     float x0 = pSrc[pos_int+1];
144     float x1 = pSrc[pos_int+2];
145     float x2 = pSrc[pos_int+3];
146     float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
147     float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
148     float c = (x1 - xm1) * 0.5f;
149     float samplePoint = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
150     #endif // USE_LINEAR_INTERPOLATION
151    
152     *Pos += Pitch;
153     return samplePoint;
154     }
155    
156     inline static stereo_sample_t Interpolate1StepStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
157     int pos_int = (int) *Pos; // integer position
158     float pos_fract = *Pos - pos_int; // fractional part of position
159     pos_int <<= 1;
160    
161     stereo_sample_t samplePoint;
162    
163     #if USE_LINEAR_INTERPOLATION
164     // left channel
165     samplePoint.left = pSrc[pos_int] + pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]);
166     // right channel
167     samplePoint.right = pSrc[pos_int+1] + pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]);
168     #else // polynomial interpolation
169     // calculate left channel
170     float xm1 = pSrc[pos_int];
171     float x0 = pSrc[pos_int+2];
172     float x1 = pSrc[pos_int+4];
173     float x2 = pSrc[pos_int+6];
174     float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
175     float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
176     float c = (x1 - xm1) * 0.5f;
177     samplePoint.left = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
178    
179     //calculate right channel
180     xm1 = pSrc[pos_int+1];
181     x0 = pSrc[pos_int+3];
182     x1 = pSrc[pos_int+5];
183     x2 = pSrc[pos_int+7];
184     a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
185     b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
186     c = (x1 - xm1) * 0.5f;
187     samplePoint.right = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
188     #endif // USE_LINEAR_INTERPOLATION
189    
190     *Pos += Pitch;
191     return samplePoint;
192     }
193    
194     // TODO: no support for cubic interpolation yet
195     inline static void Interpolate4StepsMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
196     /* calculate playback position of each of the 4 samples by adding the associated pitch */
197     __asm__ __volatile__ (
198     "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
199     "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
200     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
201     "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
202     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
203     "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
204     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
205     "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
206     "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
207     "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
208     "movss %%xmm2,(%0) # update 'Pos'\n\t"
209     "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
210     "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
211     "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
212     "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
213     "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
214     "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
215     "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
216     "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
217     "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
218     :
219     : "r" (Pos), /* %0 */
220     "m" (Pitch) /* %1 */
221     : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
222     "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
223     "mm4", /* holds integer position of sample 0-1 at the end */
224     "mm5", /* holds integer position of sample 2-3 at the end */
225     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
226     );
227     /* get sample values of pSrc[pos_int] and pSrc[pos_int+1] of the 4 samples */
228     __asm__ __volatile__ (
229     "movd %%mm4,%%edi # sample position of sample 0\n\t"
230     "psrlq $32,%%mm4 # mm4 >> 32\n\t"
231     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 0)\n\t"
232     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 0+1)\n\t"
233     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
234     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
235     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
236     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
237     "movd %%mm4,%%edi # sample position of sample 1\n\t"
238     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 1)\n\t"
239     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 1+1)\n\t"
240     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
241     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
242     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
243     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
244     "movd %%mm5,%%edi # sample position of sample 2\n\t"
245     "psrlq $32,%%mm5 # mm5 >> 32\n\t"
246     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 2)\n\t"
247     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 2+1)\n\t"
248     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
249     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
250     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
251     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
252     "movd %%mm5,%%edi # sample position of sample 2\n\t"
253     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 3)\n\t"
254     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 3+1)\n\t"
255     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
256     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
257     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
258     "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
259     : /* no output */
260     : "S" (pSrc) /* %0 - sample read position */
261     : "%eax", "%ecx", /*"%edx",*/ "%edi",
262     "%xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
263     "%xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
264     "mm4", /* holds integer position of sample 0-1 at the end */
265     "mm5", /* holds integer position of sample 2-3 at the end */
266     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
267     );
268     /* linear interpolation of the 4 samples simultaniously */
269     __asm__ __volatile__ (
270     "subps %%xmm2,%%xmm3 # xmm3 = pSrc[pos_int+1] - pSrc[pos_int]\n\t"
271     "mulps %%xmm0,%%xmm3 # xmm3 = pos_fract * (pSrc[pos_int+1] - pSrc[pos_int])\n\t"
272     "addps %%xmm3,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]))\n\t"
273     : /* no output */
274     : /* no input */
275     : "%xmm2" /* holds linear interpolated sample point (of all 4 samples) at the end */
276     );
277     }
278    
279     // TODO: no support for cubic interpolation yet
280     inline static void Interpolate4StepsStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
281     /* calculate playback position of each of the 4 samples by adding the associated pitch */
282     __asm__ __volatile__ (
283     "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
284     "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
285     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
286     "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
287     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
288     "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
289     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
290     "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
291     "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
292     "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
293     "movss %%xmm2,(%0) # update 'Pos'\n\t"
294     "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
295     "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
296     "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
297     "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
298     "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
299     "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
300     "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
301     "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
302     "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
303     :
304     : "r" (Pos), /* %0 */
305     "m" (Pitch) /* %1 */
306     : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
307     "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
308     "mm4", /* holds integer position of sample 0-1 at the end */
309     "mm5", /* holds integer position of sample 2-3 at the end */
310     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
311     );
312    
313     /* get sample values of pSrc[pos_int], pSrc[pos_int+1], pSrc[pos_int+2] and pSrc[pos_int+3] of the 4 samples */
314     __asm__ __volatile__ (
315     "xorl %%eax,%%eax # clear eax\n\t"
316     "xorl %%edx,%%edx # clear edx\n\t"
317     "movd %%mm4,%%edi # sample position of sample 0\n\t"
318     "psrlq $32,%%mm4 # mm4 >> 32\n\t"
319     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0)\n\t"
320     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
321     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
322     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+1)\n\t"
323     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
324     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
325     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0+2)\n\t"
326     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
327     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
328     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+3)\n\t"
329     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
330     "movd %%mm4,%%edi # sample position of sample 1\n\t"
331     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
332     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1)\n\t"
333     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
334     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
335     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+1)\n\t"
336     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
337     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
338     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1+2)\n\t"
339     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
340     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
341     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+3)\n\t"
342     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
343     "movd %%mm5,%%edi # sample position of sample 2\n\t"
344     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
345     "psrlq $32,%%mm5 # mm5 >> 32\n\t"
346     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2)\n\t"
347     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
348     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
349     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+1)\n\t"
350     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
351     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
352     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2+2)\n\t"
353     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
354     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
355     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+3)\n\t"
356     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
357     "movd %%mm5,%%edi # sample position of sample 3\n\t"
358     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
359     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3)\n\t"
360     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
361     "shufps $0x1b, %%xmm2, %%xmm2 # shift up\n\t"
362     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+1)\n\t"
363     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
364     "shufps $0x1b, %%xmm3, %%xmm3 # shift up\n\t"
365     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3+2)\n\t"
366     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
367     "shufps $0x1b, %%xmm4, %%xmm4 # swap to correct order\n\t"
368     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+3)\n\t"
369     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
370     "shufps $0x1b, %%xmm5, %%xmm5 # swap to correct order\n\t"
371     : /* no output */
372     : "S" (pSrc) /* %0 - sample read position */
373     : "%eax", "%edx", "%edi",
374     "xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
375     "xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
376     "xmm4", /* holds pSrc[int_pos+2] of the 4 samples at the end */
377     "xmm5", /* holds pSrc[int_pos+3] of the 4 samples at the end */
378     "mm4", /* holds integer position of sample 0-1 at the end */
379     "mm5", /* holds integer position of sample 2-3 at the end */
380     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
381     );
382     /* linear interpolation of the 4 samples (left & right channel) simultaniously */
383     __asm__ __volatile__ (
384     "subps %%xmm2,%%xmm4 # xmm4 = pSrc[pos_int+2] - pSrc[pos_int] (left channel)\n\t"
385     "mulps %%xmm0,%%xmm4 # xmm4 = pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]) (left channel)\n\t"
386     "addps %%xmm4,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+2] - pSrc[pos_int])) (left channel)\n\t"
387     "subps %%xmm3,%%xmm5 # xmm5 = pSrc[pos_int+3] - pSrc[pos_int+1] (right channel)\n\t"
388     "mulps %%xmm0,%%xmm5 # xmm5 = pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]) (right channel)\n\t"
389     "addps %%xmm5,%%xmm3 # xmm3 = pSrc[pos_int+1] + (pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1])) (right channel)\n\t"
390     : /* no output */
391     : /* no input */
392     : "%xmm2", /* holds linear interpolated sample of left channel (of all 4 samples) at the end */
393     "%xmm3" /* holds linear interpolated sample of right channel (of all 4 samples) at the end */
394     );
395     }
396     };
397    
398     } // namespace LinuxSampler
399    
400     #endif // __LS_RESAMPLER_H__

  ViewVC Help
Powered by ViewVC