/[svn]/linuxsampler/trunk/src/engines/common/Resampler.h
ViewVC logotype

Annotation of /linuxsampler/trunk/src/engines/common/Resampler.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 617 - (hide annotations) (download) (as text)
Wed Jun 8 21:00:06 2005 UTC (18 years, 10 months ago) by schoenebeck
File MIME type: text/x-c++hdr
File size: 27372 byte(s)
* hand-crafted assembly optimization code can be disabled with
  './configure --disable-asm' (definitely not recommended)

1 schoenebeck 320 /***************************************************************************
2     * *
3     * LinuxSampler - modular, streaming capable sampler *
4     * *
5     * Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6 schoenebeck 617 * Copyright (C) 2005 Christian Schoenebeck *
7 schoenebeck 320 * *
8     * This program is free software; you can redistribute it and/or modify *
9     * it under the terms of the GNU General Public License as published by *
10     * the Free Software Foundation; either version 2 of the License, or *
11     * (at your option) any later version. *
12     * *
13     * This program is distributed in the hope that it will be useful, *
14     * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16     * GNU General Public License for more details. *
17     * *
18     * You should have received a copy of the GNU General Public License *
19     * along with this program; if not, write to the Free Software *
20     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21     * MA 02111-1307 USA *
22     ***************************************************************************/
23    
24     #ifndef __LS_RESAMPLER_H__
25     #define __LS_RESAMPLER_H__
26    
27     #include "../../common/global.h"
28    
29     // TODO: cubic interpolation is not yet supported by the MMX/SSE(1) version though
30     #ifndef USE_LINEAR_INTERPOLATION
31     # define USE_LINEAR_INTERPOLATION 1 ///< set to 0 if you prefer cubic interpolation (slower, better quality)
32     #endif
33    
34     namespace LinuxSampler {
35    
36 schoenebeck 563 /** @brief Stereo sample point
37     *
38     * Encapsulates one stereo sample point, thus signal value for one
39     * sample point for left and right channel.
40     */
41 schoenebeck 320 struct stereo_sample_t {
42     float left;
43     float right;
44     };
45    
46 schoenebeck 563 /** @brief Resampler Template
47     *
48     * This template provides pure C++ and MMX/SSE assembly implementations
49     * for linear and cubic interpolation for pitching a mono or stereo
50     * input signal.
51     */
52 schoenebeck 320 template<bool INTERPOLATE>
53     class Resampler {
54     public:
55     inline static float GetNextSampleMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
56     if (INTERPOLATE) return Interpolate1StepMonoCPP(pSrc, Pos, Pitch);
57     else { // no pitch, so no interpolation necessary
58     int pos_int = (int) *Pos;
59     *Pos += 1.0;
60     return pSrc [pos_int];
61     }
62     }
63    
64     inline static stereo_sample_t GetNextSampleStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
65     if (INTERPOLATE) return Interpolate1StepStereoCPP(pSrc, Pos, Pitch);
66     else { // no pitch, so no interpolation necessary
67     int pos_int = (int) *Pos;
68     pos_int <<= 1;
69     *Pos += 1.0;
70     stereo_sample_t samplePoint;
71     samplePoint.left = pSrc[pos_int];
72     samplePoint.right = pSrc[pos_int+1];
73     return samplePoint;
74     }
75     }
76    
77 schoenebeck 617 #if CONFIG_ASM && ARCH_X86
78 schoenebeck 320 inline static void GetNext4SamplesMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
79     if (INTERPOLATE) Interpolate4StepsMonoMMXSSE(pSrc, Pos, Pitch);
80     else { // no pitch, so no interpolation necessary
81     const float __4f = 4.0f;
82     __asm__ __volatile__ (
83     "movss (%1), %%xmm5 # load Pos\n\t"
84     "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
85     "addss %2, %%xmm5 # Pos += 4.0f\n\t"
86     "movswl (%0,%%edi,2), %%eax # load sample 0\n\t"
87     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
88     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
89     "movswl 2(%0,%%edi,2), %%edx # load sample 1\n\t"
90     "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
91     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
92     "movss %%xmm5, (%1) # update Pos\n\t"
93     "movswl 4(%0,%%edi,2), %%eax # load sample 2\n\t"
94     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
95     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
96     "movswl 6(%0,%%edi,2), %%edx # load sample 3\n\t"
97     "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
98     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
99     :: "r" (pSrc), "r" (Pos), "m" (__4f)
100     : "%eax", "%edx", "%edi"
101     );
102     }
103     }
104    
105     inline static void GetNext4SamplesStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
106     if (INTERPOLATE) {
107     Interpolate4StepsStereoMMXSSE(pSrc, Pos, Pitch);
108     //EMMS;
109     } else { // no pitch, so no interpolation necessary
110     const float __4f = 4.0f;
111     __asm__ __volatile__ (
112     "movss (%1), %%xmm5 # load Pos\n\t"
113     "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
114     "addss %2, %%xmm5 # Pos += 4.0f\n\t"
115     "movswl (%0, %%edi,4), %%eax # load sample 0 (left)\n\t"
116     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
117     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
118     "movss %%xmm5, (%1) # update Pos\n\t"
119     "movswl 2(%0, %%edi,4), %%edx # load sample 0 (left)\n\t"
120     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
121     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
122     "movswl 4(%0, %%edi,4), %%eax # load sample 1 (left)\n\t"
123     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
124     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
125     "movswl 6(%0, %%edi,4), %%edx # load sample 1 (right)\n\t"
126     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
127     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
128     "movswl 8(%0, %%edi,4), %%eax # load sample 2 (left)\n\t"
129     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
130     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
131     "movswl 10(%0, %%edi,4), %%edx # load sample 2 (right)\n\t"
132     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
133     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
134     "movswl 12(%0, %%edi,4), %%eax # load sample 3 (left)\n\t"
135     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
136     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
137     "movswl 14(%0, %%edi,4), %%edx # load sample 3 (right)\n\t"
138     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
139     "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
140     :: "r" (pSrc), "r" (Pos), "m" (__4f)
141     : "%eax", "%edx", "%edi"
142     );
143     }
144     }
145 schoenebeck 617 #endif // CONFIG_ASM && ARCH_X86
146 schoenebeck 320
147     protected:
148    
149     inline static float Interpolate1StepMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
150     int pos_int = (int) *Pos; // integer position
151     float pos_fract = *Pos - pos_int; // fractional part of position
152    
153     #if USE_LINEAR_INTERPOLATION
154     float samplePoint = pSrc[pos_int] + pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]);
155     #else // polynomial interpolation
156     float xm1 = pSrc[pos_int];
157     float x0 = pSrc[pos_int+1];
158     float x1 = pSrc[pos_int+2];
159     float x2 = pSrc[pos_int+3];
160     float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
161     float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
162     float c = (x1 - xm1) * 0.5f;
163     float samplePoint = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
164     #endif // USE_LINEAR_INTERPOLATION
165    
166     *Pos += Pitch;
167     return samplePoint;
168     }
169    
170     inline static stereo_sample_t Interpolate1StepStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
171     int pos_int = (int) *Pos; // integer position
172     float pos_fract = *Pos - pos_int; // fractional part of position
173     pos_int <<= 1;
174    
175     stereo_sample_t samplePoint;
176    
177     #if USE_LINEAR_INTERPOLATION
178     // left channel
179     samplePoint.left = pSrc[pos_int] + pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]);
180     // right channel
181     samplePoint.right = pSrc[pos_int+1] + pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]);
182     #else // polynomial interpolation
183     // calculate left channel
184     float xm1 = pSrc[pos_int];
185     float x0 = pSrc[pos_int+2];
186     float x1 = pSrc[pos_int+4];
187     float x2 = pSrc[pos_int+6];
188     float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
189     float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
190     float c = (x1 - xm1) * 0.5f;
191     samplePoint.left = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
192    
193     //calculate right channel
194     xm1 = pSrc[pos_int+1];
195     x0 = pSrc[pos_int+3];
196     x1 = pSrc[pos_int+5];
197     x2 = pSrc[pos_int+7];
198     a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
199     b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
200     c = (x1 - xm1) * 0.5f;
201     samplePoint.right = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
202     #endif // USE_LINEAR_INTERPOLATION
203    
204     *Pos += Pitch;
205     return samplePoint;
206     }
207    
208 schoenebeck 617 #if CONFIG_ASM && ARCH_X86
209 schoenebeck 320 // TODO: no support for cubic interpolation yet
210     inline static void Interpolate4StepsMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
211     /* calculate playback position of each of the 4 samples by adding the associated pitch */
212     __asm__ __volatile__ (
213     "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
214     "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
215     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
216     "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
217     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
218     "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
219     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
220     "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
221     "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
222     "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
223     "movss %%xmm2,(%0) # update 'Pos'\n\t"
224     "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
225     "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
226     "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
227     "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
228     "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
229     "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
230     "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
231     "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
232     "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
233     :
234     : "r" (Pos), /* %0 */
235     "m" (Pitch) /* %1 */
236     : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
237     "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
238     "mm4", /* holds integer position of sample 0-1 at the end */
239     "mm5", /* holds integer position of sample 2-3 at the end */
240     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
241     );
242     /* get sample values of pSrc[pos_int] and pSrc[pos_int+1] of the 4 samples */
243     __asm__ __volatile__ (
244     "movd %%mm4,%%edi # sample position of sample 0\n\t"
245     "psrlq $32,%%mm4 # mm4 >> 32\n\t"
246     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 0)\n\t"
247     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 0+1)\n\t"
248     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
249     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
250     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
251     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
252     "movd %%mm4,%%edi # sample position of sample 1\n\t"
253     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 1)\n\t"
254     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 1+1)\n\t"
255     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
256     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
257     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
258     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
259     "movd %%mm5,%%edi # sample position of sample 2\n\t"
260     "psrlq $32,%%mm5 # mm5 >> 32\n\t"
261     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 2)\n\t"
262     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 2+1)\n\t"
263     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
264     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
265     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
266     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
267     "movd %%mm5,%%edi # sample position of sample 2\n\t"
268     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 3)\n\t"
269     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 3+1)\n\t"
270     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
271     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
272     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
273     "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
274     : /* no output */
275     : "S" (pSrc) /* %0 - sample read position */
276     : "%eax", "%ecx", /*"%edx",*/ "%edi",
277     "%xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
278     "%xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
279     "mm4", /* holds integer position of sample 0-1 at the end */
280     "mm5", /* holds integer position of sample 2-3 at the end */
281     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
282     );
283     /* linear interpolation of the 4 samples simultaniously */
284     __asm__ __volatile__ (
285     "subps %%xmm2,%%xmm3 # xmm3 = pSrc[pos_int+1] - pSrc[pos_int]\n\t"
286     "mulps %%xmm0,%%xmm3 # xmm3 = pos_fract * (pSrc[pos_int+1] - pSrc[pos_int])\n\t"
287     "addps %%xmm3,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]))\n\t"
288     : /* no output */
289     : /* no input */
290     : "%xmm2" /* holds linear interpolated sample point (of all 4 samples) at the end */
291     );
292     }
293    
294     // TODO: no support for cubic interpolation yet
295     inline static void Interpolate4StepsStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
296     /* calculate playback position of each of the 4 samples by adding the associated pitch */
297     __asm__ __volatile__ (
298     "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
299     "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
300     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
301     "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
302     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
303     "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
304     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
305     "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
306     "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
307     "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
308     "movss %%xmm2,(%0) # update 'Pos'\n\t"
309     "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
310     "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
311     "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
312     "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
313     "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
314     "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
315     "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
316     "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
317     "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
318     :
319     : "r" (Pos), /* %0 */
320     "m" (Pitch) /* %1 */
321     : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
322     "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
323     "mm4", /* holds integer position of sample 0-1 at the end */
324     "mm5", /* holds integer position of sample 2-3 at the end */
325     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
326     );
327    
328     /* get sample values of pSrc[pos_int], pSrc[pos_int+1], pSrc[pos_int+2] and pSrc[pos_int+3] of the 4 samples */
329     __asm__ __volatile__ (
330     "xorl %%eax,%%eax # clear eax\n\t"
331     "xorl %%edx,%%edx # clear edx\n\t"
332     "movd %%mm4,%%edi # sample position of sample 0\n\t"
333     "psrlq $32,%%mm4 # mm4 >> 32\n\t"
334     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0)\n\t"
335     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
336     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
337     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+1)\n\t"
338     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
339     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
340     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0+2)\n\t"
341     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
342     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
343     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+3)\n\t"
344     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
345     "movd %%mm4,%%edi # sample position of sample 1\n\t"
346     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
347     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1)\n\t"
348     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
349     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
350     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+1)\n\t"
351     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
352     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
353     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1+2)\n\t"
354     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
355     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
356     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+3)\n\t"
357     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
358     "movd %%mm5,%%edi # sample position of sample 2\n\t"
359     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
360     "psrlq $32,%%mm5 # mm5 >> 32\n\t"
361     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2)\n\t"
362     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
363     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
364     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+1)\n\t"
365     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
366     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
367     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2+2)\n\t"
368     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
369     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
370     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+3)\n\t"
371     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
372     "movd %%mm5,%%edi # sample position of sample 3\n\t"
373     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
374     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3)\n\t"
375     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
376     "shufps $0x1b, %%xmm2, %%xmm2 # shift up\n\t"
377     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+1)\n\t"
378     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
379     "shufps $0x1b, %%xmm3, %%xmm3 # shift up\n\t"
380     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3+2)\n\t"
381     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
382     "shufps $0x1b, %%xmm4, %%xmm4 # swap to correct order\n\t"
383     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+3)\n\t"
384     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
385     "shufps $0x1b, %%xmm5, %%xmm5 # swap to correct order\n\t"
386     : /* no output */
387     : "S" (pSrc) /* %0 - sample read position */
388     : "%eax", "%edx", "%edi",
389     "xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
390     "xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
391     "xmm4", /* holds pSrc[int_pos+2] of the 4 samples at the end */
392     "xmm5", /* holds pSrc[int_pos+3] of the 4 samples at the end */
393     "mm4", /* holds integer position of sample 0-1 at the end */
394     "mm5", /* holds integer position of sample 2-3 at the end */
395     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
396     );
397     /* linear interpolation of the 4 samples (left & right channel) simultaniously */
398     __asm__ __volatile__ (
399     "subps %%xmm2,%%xmm4 # xmm4 = pSrc[pos_int+2] - pSrc[pos_int] (left channel)\n\t"
400     "mulps %%xmm0,%%xmm4 # xmm4 = pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]) (left channel)\n\t"
401     "addps %%xmm4,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+2] - pSrc[pos_int])) (left channel)\n\t"
402     "subps %%xmm3,%%xmm5 # xmm5 = pSrc[pos_int+3] - pSrc[pos_int+1] (right channel)\n\t"
403     "mulps %%xmm0,%%xmm5 # xmm5 = pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]) (right channel)\n\t"
404     "addps %%xmm5,%%xmm3 # xmm3 = pSrc[pos_int+1] + (pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1])) (right channel)\n\t"
405     : /* no output */
406     : /* no input */
407     : "%xmm2", /* holds linear interpolated sample of left channel (of all 4 samples) at the end */
408     "%xmm3" /* holds linear interpolated sample of right channel (of all 4 samples) at the end */
409     );
410     }
411 schoenebeck 617 #endif // CONFIG_ASM && ARCH_X86
412 schoenebeck 320 };
413    
414     } // namespace LinuxSampler
415    
416     #endif // __LS_RESAMPLER_H__

  ViewVC Help
Powered by ViewVC