/[svn]/linuxsampler/trunk/src/engines/common/Resampler.h
ViewVC logotype

Annotation of /linuxsampler/trunk/src/engines/common/Resampler.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1484 - (hide annotations) (download) (as text)
Thu Nov 15 13:24:41 2007 UTC (16 years, 5 months ago) by senoner
File MIME type: text/x-c++hdr
File size: 28624 byte(s)
* Stream.h: added a missing divide by BytesPerSample in GetWriteSpace()
* since this function is currently only used in the stream's
* qsort() compare function, it did trigger any bugs
* Resampler.h, Synthesizer.h:
* cubic interpolation now works in 24bit mode too
* faster method to read 24bit words on little endian machines (x86):
*  replaced 3 byte reads + shifts with a 1 unaligned 32bit read and shift

1 schoenebeck 320 /***************************************************************************
2     * *
3     * LinuxSampler - modular, streaming capable sampler *
4     * *
5     * Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck *
6 schoenebeck 1259 * Copyright (C) 2005 - 2007 Christian Schoenebeck *
7 schoenebeck 320 * *
8     * This program is free software; you can redistribute it and/or modify *
9     * it under the terms of the GNU General Public License as published by *
10     * the Free Software Foundation; either version 2 of the License, or *
11     * (at your option) any later version. *
12     * *
13     * This program is distributed in the hope that it will be useful, *
14     * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
16     * GNU General Public License for more details. *
17     * *
18     * You should have received a copy of the GNU General Public License *
19     * along with this program; if not, write to the Free Software *
20     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, *
21     * MA 02111-1307 USA *
22     ***************************************************************************/
23    
24 schoenebeck 1259 // Note: the assembly code is currently disabled, as it doesn't fit into
25     // the new synthesis core introduced by LS 0.4.0
26    
27 schoenebeck 320 #ifndef __LS_RESAMPLER_H__
28     #define __LS_RESAMPLER_H__
29    
30 schoenebeck 1424 #include "../../common/global_private.h"
31 schoenebeck 320
32     // TODO: cubic interpolation is not yet supported by the MMX/SSE(1) version though
33     #ifndef USE_LINEAR_INTERPOLATION
34     # define USE_LINEAR_INTERPOLATION 1 ///< set to 0 if you prefer cubic interpolation (slower, better quality)
35     #endif
36    
37     namespace LinuxSampler {
38    
39 schoenebeck 563 /** @brief Stereo sample point
40     *
41     * Encapsulates one stereo sample point, thus signal value for one
42     * sample point for left and right channel.
43     */
44 schoenebeck 320 struct stereo_sample_t {
45     float left;
46     float right;
47     };
48    
49 schoenebeck 563 /** @brief Resampler Template
50     *
51     * This template provides pure C++ and MMX/SSE assembly implementations
52     * for linear and cubic interpolation for pitching a mono or stereo
53     * input signal.
54     */
55 persson 903 template<bool INTERPOLATE,bool BITDEPTH24>
56 schoenebeck 320 class Resampler {
57     public:
58     inline static float GetNextSampleMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
59     if (INTERPOLATE) return Interpolate1StepMonoCPP(pSrc, Pos, Pitch);
60     else { // no pitch, so no interpolation necessary
61     int pos_int = (int) *Pos;
62     *Pos += 1.0;
63     return pSrc [pos_int];
64     }
65     }
66    
67     inline static stereo_sample_t GetNextSampleStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
68     if (INTERPOLATE) return Interpolate1StepStereoCPP(pSrc, Pos, Pitch);
69     else { // no pitch, so no interpolation necessary
70     int pos_int = (int) *Pos;
71     pos_int <<= 1;
72     *Pos += 1.0;
73     stereo_sample_t samplePoint;
74     samplePoint.left = pSrc[pos_int];
75     samplePoint.right = pSrc[pos_int+1];
76     return samplePoint;
77     }
78     }
79    
80 schoenebeck 1259 #if 0 // CONFIG_ASM && ARCH_X86
81 schoenebeck 320 inline static void GetNext4SamplesMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
82     if (INTERPOLATE) Interpolate4StepsMonoMMXSSE(pSrc, Pos, Pitch);
83     else { // no pitch, so no interpolation necessary
84     const float __4f = 4.0f;
85     __asm__ __volatile__ (
86     "movss (%1), %%xmm5 # load Pos\n\t"
87     "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
88     "addss %2, %%xmm5 # Pos += 4.0f\n\t"
89     "movswl (%0,%%edi,2), %%eax # load sample 0\n\t"
90     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
91     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
92     "movswl 2(%0,%%edi,2), %%edx # load sample 1\n\t"
93     "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
94     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
95     "movss %%xmm5, (%1) # update Pos\n\t"
96     "movswl 4(%0,%%edi,2), %%eax # load sample 2\n\t"
97     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
98     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
99     "movswl 6(%0,%%edi,2), %%edx # load sample 3\n\t"
100     "cvtsi2ss %%edx, %%xmm2 # convert to float\n\t"
101     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
102     :: "r" (pSrc), "r" (Pos), "m" (__4f)
103     : "%eax", "%edx", "%edi"
104     );
105     }
106     }
107    
108     inline static void GetNext4SamplesStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
109     if (INTERPOLATE) {
110     Interpolate4StepsStereoMMXSSE(pSrc, Pos, Pitch);
111     //EMMS;
112     } else { // no pitch, so no interpolation necessary
113     const float __4f = 4.0f;
114     __asm__ __volatile__ (
115     "movss (%1), %%xmm5 # load Pos\n\t"
116     "cvtss2si %%xmm5, %%edi # int(Pos)\n\t"
117     "addss %2, %%xmm5 # Pos += 4.0f\n\t"
118     "movswl (%0, %%edi,4), %%eax # load sample 0 (left)\n\t"
119     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
120     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
121     "movss %%xmm5, (%1) # update Pos\n\t"
122     "movswl 2(%0, %%edi,4), %%edx # load sample 0 (left)\n\t"
123     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
124     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
125     "movswl 4(%0, %%edi,4), %%eax # load sample 1 (left)\n\t"
126     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
127     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
128     "movswl 6(%0, %%edi,4), %%edx # load sample 1 (right)\n\t"
129     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
130     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
131     "movswl 8(%0, %%edi,4), %%eax # load sample 2 (left)\n\t"
132     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
133     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
134     "movswl 10(%0, %%edi,4), %%edx # load sample 2 (right)\n\t"
135     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
136     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
137     "movswl 12(%0, %%edi,4), %%eax # load sample 3 (left)\n\t"
138     "cvtsi2ss %%eax, %%xmm2 # convert to float\n\t"
139     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
140     "movswl 14(%0, %%edi,4), %%edx # load sample 3 (right)\n\t"
141     "cvtsi2ss %%edx, %%xmm3 # convert to float\n\t"
142     "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
143     :: "r" (pSrc), "r" (Pos), "m" (__4f)
144     : "%eax", "%edx", "%edi"
145     );
146     }
147     }
148 schoenebeck 617 #endif // CONFIG_ASM && ARCH_X86
149 schoenebeck 320
150     protected:
151    
152 senoner 1484 inline static int getSample(sample_t* src, int pos) {
153 persson 903 if (BITDEPTH24) {
154     pos *= 3;
155 senoner 1484 #if WORDS_BIGENDIAN
156 persson 903 unsigned char* p = (unsigned char*)src;
157     return p[pos] << 8 | p[pos + 1] << 16 | p[pos + 2] << 24;
158 senoner 1484 #else
159     // 24bit read optimization:
160     // a misaligned 32bit read and subquent 8 bit shift is faster (on x86) than reading 3 single bytes and shifting them
161     return (*((int *)(&((char *)(src))[pos])))<<8;
162     #endif
163 persson 903 } else {
164     return src[pos];
165     }
166     }
167    
168 schoenebeck 320 inline static float Interpolate1StepMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
169     int pos_int = (int) *Pos; // integer position
170     float pos_fract = *Pos - pos_int; // fractional part of position
171    
172     #if USE_LINEAR_INTERPOLATION
173 persson 903 int x1 = getSample(pSrc, pos_int);
174     int x2 = getSample(pSrc, pos_int + 1);
175     float samplePoint = (x1 + pos_fract * (x2 - x1));
176 schoenebeck 320 #else // polynomial interpolation
177 senoner 1484 float xm1 = getSample(pSrc, pos_int);
178     float x0 = getSample(pSrc, pos_int + 1);
179     float x1 = getSample(pSrc, pos_int + 2);
180     float x2 = getSample(pSrc, pos_int + 3);
181 schoenebeck 320 float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
182     float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
183     float c = (x1 - xm1) * 0.5f;
184     float samplePoint = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
185     #endif // USE_LINEAR_INTERPOLATION
186    
187     *Pos += Pitch;
188     return samplePoint;
189     }
190    
191     inline static stereo_sample_t Interpolate1StepStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) {
192     int pos_int = (int) *Pos; // integer position
193     float pos_fract = *Pos - pos_int; // fractional part of position
194     pos_int <<= 1;
195    
196     stereo_sample_t samplePoint;
197    
198     #if USE_LINEAR_INTERPOLATION
199     // left channel
200 persson 903 int x1 = getSample(pSrc, pos_int);
201     int x2 = getSample(pSrc, pos_int + 2);
202     samplePoint.left = (x1 + pos_fract * (x2 - x1));
203 schoenebeck 320 // right channel
204 persson 903 x1 = getSample(pSrc, pos_int + 1);
205     x2 = getSample(pSrc, pos_int + 3);
206     samplePoint.right = (x1 + pos_fract * (x2 - x1));
207 schoenebeck 320 #else // polynomial interpolation
208     // calculate left channel
209 senoner 1484 float xm1 = getSample(pSrc, pos_int);
210     float x0 = getSample(pSrc, pos_int + 2);
211     float x1 = getSample(pSrc, pos_int + 4);
212     float x2 = getSample(pSrc, pos_int + 6);
213 schoenebeck 320 float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
214     float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
215     float c = (x1 - xm1) * 0.5f;
216     samplePoint.left = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
217    
218     //calculate right channel
219 senoner 1484 xm1 = getSample(pSrc, pos_int + 1);
220     x0 = getSample(pSrc, pos_int + 3);
221     x1 = getSample(pSrc, pos_int + 5);
222     x2 = getSample(pSrc, pos_int + 7);
223 schoenebeck 320 a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f;
224     b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f;
225     c = (x1 - xm1) * 0.5f;
226     samplePoint.right = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0;
227     #endif // USE_LINEAR_INTERPOLATION
228    
229     *Pos += Pitch;
230     return samplePoint;
231     }
232    
233 schoenebeck 1259 #if 0 // CONFIG_ASM && ARCH_X86
234 schoenebeck 320 // TODO: no support for cubic interpolation yet
235     inline static void Interpolate4StepsMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
236     /* calculate playback position of each of the 4 samples by adding the associated pitch */
237     __asm__ __volatile__ (
238     "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
239     "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
240     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
241     "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
242     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
243     "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
244     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
245     "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
246     "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
247     "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
248     "movss %%xmm2,(%0) # update 'Pos'\n\t"
249     "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
250     "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
251     "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
252     "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
253     "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
254     "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
255     "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
256     "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
257     "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
258     :
259     : "r" (Pos), /* %0 */
260     "m" (Pitch) /* %1 */
261     : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
262     "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
263     "mm4", /* holds integer position of sample 0-1 at the end */
264     "mm5", /* holds integer position of sample 2-3 at the end */
265     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
266     );
267     /* get sample values of pSrc[pos_int] and pSrc[pos_int+1] of the 4 samples */
268     __asm__ __volatile__ (
269     "movd %%mm4,%%edi # sample position of sample 0\n\t"
270     "psrlq $32,%%mm4 # mm4 >> 32\n\t"
271     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 0)\n\t"
272     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 0+1)\n\t"
273     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
274     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
275     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
276     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
277     "movd %%mm4,%%edi # sample position of sample 1\n\t"
278     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 1)\n\t"
279     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 1+1)\n\t"
280     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
281     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
282     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
283     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
284     "movd %%mm5,%%edi # sample position of sample 2\n\t"
285     "psrlq $32,%%mm5 # mm5 >> 32\n\t"
286     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 2)\n\t"
287     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 2+1)\n\t"
288     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
289     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
290     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
291     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
292     "movd %%mm5,%%edi # sample position of sample 2\n\t"
293     "movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 3)\n\t"
294     "movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 3+1)\n\t"
295     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
296     "cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
297     "shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t"
298     "shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t"
299     : /* no output */
300     : "S" (pSrc) /* %0 - sample read position */
301     : "%eax", "%ecx", /*"%edx",*/ "%edi",
302     "%xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
303     "%xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
304     "mm4", /* holds integer position of sample 0-1 at the end */
305     "mm5", /* holds integer position of sample 2-3 at the end */
306     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
307     );
308     /* linear interpolation of the 4 samples simultaniously */
309     __asm__ __volatile__ (
310     "subps %%xmm2,%%xmm3 # xmm3 = pSrc[pos_int+1] - pSrc[pos_int]\n\t"
311     "mulps %%xmm0,%%xmm3 # xmm3 = pos_fract * (pSrc[pos_int+1] - pSrc[pos_int])\n\t"
312     "addps %%xmm3,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]))\n\t"
313     : /* no output */
314     : /* no input */
315     : "%xmm2" /* holds linear interpolated sample point (of all 4 samples) at the end */
316     );
317     }
318    
319     // TODO: no support for cubic interpolation yet
320     inline static void Interpolate4StepsStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) {
321     /* calculate playback position of each of the 4 samples by adding the associated pitch */
322     __asm__ __volatile__ (
323     "movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t"
324     "movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t"
325     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
326     "addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t"
327     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
328     "addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t"
329     "shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t"
330     "addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t"
331     "movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t"
332     "addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t"
333     "movss %%xmm2,(%0) # update 'Pos'\n\t"
334     "shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t"
335     "cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t"
336     "shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t"
337     "shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t"
338     "cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t"
339     "cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t"
340     "shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t"
341     "cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t"
342     "subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t"
343     :
344     : "r" (Pos), /* %0 */
345     "m" (Pitch) /* %1 */
346     : "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */
347     "%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */
348     "mm4", /* holds integer position of sample 0-1 at the end */
349     "mm5", /* holds integer position of sample 2-3 at the end */
350     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
351     );
352    
353     /* get sample values of pSrc[pos_int], pSrc[pos_int+1], pSrc[pos_int+2] and pSrc[pos_int+3] of the 4 samples */
354     __asm__ __volatile__ (
355     "xorl %%eax,%%eax # clear eax\n\t"
356     "xorl %%edx,%%edx # clear edx\n\t"
357     "movd %%mm4,%%edi # sample position of sample 0\n\t"
358     "psrlq $32,%%mm4 # mm4 >> 32\n\t"
359     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0)\n\t"
360     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
361     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
362     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+1)\n\t"
363     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
364     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
365     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0+2)\n\t"
366     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
367     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
368     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+3)\n\t"
369     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
370     "movd %%mm4,%%edi # sample position of sample 1\n\t"
371     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
372     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1)\n\t"
373     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
374     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
375     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+1)\n\t"
376     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
377     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
378     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1+2)\n\t"
379     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
380     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
381     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+3)\n\t"
382     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
383     "movd %%mm5,%%edi # sample position of sample 2\n\t"
384     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
385     "psrlq $32,%%mm5 # mm5 >> 32\n\t"
386     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2)\n\t"
387     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
388     "shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t"
389     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+1)\n\t"
390     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
391     "shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t"
392     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2+2)\n\t"
393     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
394     "shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t"
395     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+3)\n\t"
396     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
397     "movd %%mm5,%%edi # sample position of sample 3\n\t"
398     "shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t"
399     "movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3)\n\t"
400     "cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t"
401     "shufps $0x1b, %%xmm2, %%xmm2 # shift up\n\t"
402     "movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+1)\n\t"
403     "cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t"
404     "shufps $0x1b, %%xmm3, %%xmm3 # shift up\n\t"
405     "movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3+2)\n\t"
406     "cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t"
407     "shufps $0x1b, %%xmm4, %%xmm4 # swap to correct order\n\t"
408     "movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+3)\n\t"
409     "cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t"
410     "shufps $0x1b, %%xmm5, %%xmm5 # swap to correct order\n\t"
411     : /* no output */
412     : "S" (pSrc) /* %0 - sample read position */
413     : "%eax", "%edx", "%edi",
414     "xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */
415     "xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */
416     "xmm4", /* holds pSrc[int_pos+2] of the 4 samples at the end */
417     "xmm5", /* holds pSrc[int_pos+3] of the 4 samples at the end */
418     "mm4", /* holds integer position of sample 0-1 at the end */
419     "mm5", /* holds integer position of sample 2-3 at the end */
420     "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
421     );
422     /* linear interpolation of the 4 samples (left & right channel) simultaniously */
423     __asm__ __volatile__ (
424     "subps %%xmm2,%%xmm4 # xmm4 = pSrc[pos_int+2] - pSrc[pos_int] (left channel)\n\t"
425     "mulps %%xmm0,%%xmm4 # xmm4 = pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]) (left channel)\n\t"
426     "addps %%xmm4,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+2] - pSrc[pos_int])) (left channel)\n\t"
427     "subps %%xmm3,%%xmm5 # xmm5 = pSrc[pos_int+3] - pSrc[pos_int+1] (right channel)\n\t"
428     "mulps %%xmm0,%%xmm5 # xmm5 = pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]) (right channel)\n\t"
429     "addps %%xmm5,%%xmm3 # xmm3 = pSrc[pos_int+1] + (pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1])) (right channel)\n\t"
430     : /* no output */
431     : /* no input */
432     : "%xmm2", /* holds linear interpolated sample of left channel (of all 4 samples) at the end */
433     "%xmm3" /* holds linear interpolated sample of right channel (of all 4 samples) at the end */
434     );
435     }
436 schoenebeck 617 #endif // CONFIG_ASM && ARCH_X86
437 schoenebeck 320 };
438    
439     } // namespace LinuxSampler
440    
441     #endif // __LS_RESAMPLER_H__

  ViewVC Help
Powered by ViewVC