1 |
schoenebeck |
320 |
/*************************************************************************** |
2 |
|
|
* * |
3 |
|
|
* LinuxSampler - modular, streaming capable sampler * |
4 |
|
|
* * |
5 |
|
|
* Copyright (C) 2003, 2004 by Benno Senoner and Christian Schoenebeck * |
6 |
schoenebeck |
617 |
* Copyright (C) 2005 Christian Schoenebeck * |
7 |
schoenebeck |
320 |
* * |
8 |
|
|
* This program is free software; you can redistribute it and/or modify * |
9 |
|
|
* it under the terms of the GNU General Public License as published by * |
10 |
|
|
* the Free Software Foundation; either version 2 of the License, or * |
11 |
|
|
* (at your option) any later version. * |
12 |
|
|
* * |
13 |
|
|
* This program is distributed in the hope that it will be useful, * |
14 |
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
15 |
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
16 |
|
|
* GNU General Public License for more details. * |
17 |
|
|
* * |
18 |
|
|
* You should have received a copy of the GNU General Public License * |
19 |
|
|
* along with this program; if not, write to the Free Software * |
20 |
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, * |
21 |
|
|
* MA 02111-1307 USA * |
22 |
|
|
***************************************************************************/ |
23 |
|
|
|
24 |
|
|
#ifndef __LS_RESAMPLER_H__ |
25 |
|
|
#define __LS_RESAMPLER_H__ |
26 |
|
|
|
27 |
|
|
#include "../../common/global.h" |
28 |
|
|
|
29 |
|
|
// TODO: cubic interpolation is not yet supported by the MMX/SSE(1) version though |
30 |
|
|
#ifndef USE_LINEAR_INTERPOLATION |
31 |
|
|
# define USE_LINEAR_INTERPOLATION 1 ///< set to 0 if you prefer cubic interpolation (slower, better quality) |
32 |
|
|
#endif |
33 |
|
|
|
34 |
|
|
namespace LinuxSampler { |
35 |
|
|
|
36 |
schoenebeck |
563 |
/** @brief Stereo sample point |
37 |
|
|
* |
38 |
|
|
* Encapsulates one stereo sample point, thus signal value for one |
39 |
|
|
* sample point for left and right channel. |
40 |
|
|
*/ |
41 |
schoenebeck |
320 |
struct stereo_sample_t { |
42 |
|
|
float left; |
43 |
|
|
float right; |
44 |
|
|
}; |
45 |
|
|
|
46 |
schoenebeck |
563 |
/** @brief Resampler Template |
47 |
|
|
* |
48 |
|
|
* This template provides pure C++ and MMX/SSE assembly implementations |
49 |
|
|
* for linear and cubic interpolation for pitching a mono or stereo |
50 |
|
|
* input signal. |
51 |
|
|
*/ |
52 |
schoenebeck |
320 |
template<bool INTERPOLATE> |
53 |
|
|
class Resampler { |
54 |
|
|
public: |
55 |
|
|
inline static float GetNextSampleMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
56 |
|
|
if (INTERPOLATE) return Interpolate1StepMonoCPP(pSrc, Pos, Pitch); |
57 |
|
|
else { // no pitch, so no interpolation necessary |
58 |
|
|
int pos_int = (int) *Pos; |
59 |
|
|
*Pos += 1.0; |
60 |
|
|
return pSrc [pos_int]; |
61 |
|
|
} |
62 |
|
|
} |
63 |
|
|
|
64 |
|
|
inline static stereo_sample_t GetNextSampleStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
65 |
|
|
if (INTERPOLATE) return Interpolate1StepStereoCPP(pSrc, Pos, Pitch); |
66 |
|
|
else { // no pitch, so no interpolation necessary |
67 |
|
|
int pos_int = (int) *Pos; |
68 |
|
|
pos_int <<= 1; |
69 |
|
|
*Pos += 1.0; |
70 |
|
|
stereo_sample_t samplePoint; |
71 |
|
|
samplePoint.left = pSrc[pos_int]; |
72 |
|
|
samplePoint.right = pSrc[pos_int+1]; |
73 |
|
|
return samplePoint; |
74 |
|
|
} |
75 |
|
|
} |
76 |
|
|
|
77 |
schoenebeck |
617 |
#if CONFIG_ASM && ARCH_X86 |
78 |
schoenebeck |
320 |
inline static void GetNext4SamplesMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
79 |
|
|
if (INTERPOLATE) Interpolate4StepsMonoMMXSSE(pSrc, Pos, Pitch); |
80 |
|
|
else { // no pitch, so no interpolation necessary |
81 |
|
|
const float __4f = 4.0f; |
82 |
|
|
__asm__ __volatile__ ( |
83 |
|
|
"movss (%1), %%xmm5 # load Pos\n\t" |
84 |
|
|
"cvtss2si %%xmm5, %%edi # int(Pos)\n\t" |
85 |
|
|
"addss %2, %%xmm5 # Pos += 4.0f\n\t" |
86 |
|
|
"movswl (%0,%%edi,2), %%eax # load sample 0\n\t" |
87 |
|
|
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
88 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
89 |
|
|
"movswl 2(%0,%%edi,2), %%edx # load sample 1\n\t" |
90 |
|
|
"cvtsi2ss %%edx, %%xmm2 # convert to float\n\t" |
91 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
92 |
|
|
"movss %%xmm5, (%1) # update Pos\n\t" |
93 |
|
|
"movswl 4(%0,%%edi,2), %%eax # load sample 2\n\t" |
94 |
|
|
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
95 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
96 |
|
|
"movswl 6(%0,%%edi,2), %%edx # load sample 3\n\t" |
97 |
|
|
"cvtsi2ss %%edx, %%xmm2 # convert to float\n\t" |
98 |
|
|
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
99 |
|
|
:: "r" (pSrc), "r" (Pos), "m" (__4f) |
100 |
|
|
: "%eax", "%edx", "%edi" |
101 |
|
|
); |
102 |
|
|
} |
103 |
|
|
} |
104 |
|
|
|
105 |
|
|
inline static void GetNext4SamplesStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
106 |
|
|
if (INTERPOLATE) { |
107 |
|
|
Interpolate4StepsStereoMMXSSE(pSrc, Pos, Pitch); |
108 |
|
|
//EMMS; |
109 |
|
|
} else { // no pitch, so no interpolation necessary |
110 |
|
|
const float __4f = 4.0f; |
111 |
|
|
__asm__ __volatile__ ( |
112 |
|
|
"movss (%1), %%xmm5 # load Pos\n\t" |
113 |
|
|
"cvtss2si %%xmm5, %%edi # int(Pos)\n\t" |
114 |
|
|
"addss %2, %%xmm5 # Pos += 4.0f\n\t" |
115 |
|
|
"movswl (%0, %%edi,4), %%eax # load sample 0 (left)\n\t" |
116 |
|
|
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
117 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
118 |
|
|
"movss %%xmm5, (%1) # update Pos\n\t" |
119 |
|
|
"movswl 2(%0, %%edi,4), %%edx # load sample 0 (left)\n\t" |
120 |
|
|
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
121 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
122 |
|
|
"movswl 4(%0, %%edi,4), %%eax # load sample 1 (left)\n\t" |
123 |
|
|
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
124 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
125 |
|
|
"movswl 6(%0, %%edi,4), %%edx # load sample 1 (right)\n\t" |
126 |
|
|
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
127 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
128 |
|
|
"movswl 8(%0, %%edi,4), %%eax # load sample 2 (left)\n\t" |
129 |
|
|
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
130 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
131 |
|
|
"movswl 10(%0, %%edi,4), %%edx # load sample 2 (right)\n\t" |
132 |
|
|
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
133 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
134 |
|
|
"movswl 12(%0, %%edi,4), %%eax # load sample 3 (left)\n\t" |
135 |
|
|
"cvtsi2ss %%eax, %%xmm2 # convert to float\n\t" |
136 |
|
|
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
137 |
|
|
"movswl 14(%0, %%edi,4), %%edx # load sample 3 (right)\n\t" |
138 |
|
|
"cvtsi2ss %%edx, %%xmm3 # convert to float\n\t" |
139 |
|
|
"shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t" |
140 |
|
|
:: "r" (pSrc), "r" (Pos), "m" (__4f) |
141 |
|
|
: "%eax", "%edx", "%edi" |
142 |
|
|
); |
143 |
|
|
} |
144 |
|
|
} |
145 |
schoenebeck |
617 |
#endif // CONFIG_ASM && ARCH_X86 |
146 |
schoenebeck |
320 |
|
147 |
|
|
protected: |
148 |
|
|
|
149 |
|
|
inline static float Interpolate1StepMonoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
150 |
|
|
int pos_int = (int) *Pos; // integer position |
151 |
|
|
float pos_fract = *Pos - pos_int; // fractional part of position |
152 |
|
|
|
153 |
|
|
#if USE_LINEAR_INTERPOLATION |
154 |
|
|
float samplePoint = pSrc[pos_int] + pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]); |
155 |
|
|
#else // polynomial interpolation |
156 |
|
|
float xm1 = pSrc[pos_int]; |
157 |
|
|
float x0 = pSrc[pos_int+1]; |
158 |
|
|
float x1 = pSrc[pos_int+2]; |
159 |
|
|
float x2 = pSrc[pos_int+3]; |
160 |
|
|
float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
161 |
|
|
float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
162 |
|
|
float c = (x1 - xm1) * 0.5f; |
163 |
|
|
float samplePoint = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
164 |
|
|
#endif // USE_LINEAR_INTERPOLATION |
165 |
|
|
|
166 |
|
|
*Pos += Pitch; |
167 |
|
|
return samplePoint; |
168 |
|
|
} |
169 |
|
|
|
170 |
|
|
inline static stereo_sample_t Interpolate1StepStereoCPP(sample_t* pSrc, double* Pos, float& Pitch) { |
171 |
|
|
int pos_int = (int) *Pos; // integer position |
172 |
|
|
float pos_fract = *Pos - pos_int; // fractional part of position |
173 |
|
|
pos_int <<= 1; |
174 |
|
|
|
175 |
|
|
stereo_sample_t samplePoint; |
176 |
|
|
|
177 |
|
|
#if USE_LINEAR_INTERPOLATION |
178 |
|
|
// left channel |
179 |
|
|
samplePoint.left = pSrc[pos_int] + pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]); |
180 |
|
|
// right channel |
181 |
|
|
samplePoint.right = pSrc[pos_int+1] + pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]); |
182 |
|
|
#else // polynomial interpolation |
183 |
|
|
// calculate left channel |
184 |
|
|
float xm1 = pSrc[pos_int]; |
185 |
|
|
float x0 = pSrc[pos_int+2]; |
186 |
|
|
float x1 = pSrc[pos_int+4]; |
187 |
|
|
float x2 = pSrc[pos_int+6]; |
188 |
|
|
float a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
189 |
|
|
float b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
190 |
|
|
float c = (x1 - xm1) * 0.5f; |
191 |
|
|
samplePoint.left = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
192 |
|
|
|
193 |
|
|
//calculate right channel |
194 |
|
|
xm1 = pSrc[pos_int+1]; |
195 |
|
|
x0 = pSrc[pos_int+3]; |
196 |
|
|
x1 = pSrc[pos_int+5]; |
197 |
|
|
x2 = pSrc[pos_int+7]; |
198 |
|
|
a = (3.0f * (x0 - x1) - xm1 + x2) * 0.5f; |
199 |
|
|
b = 2.0f * x1 + xm1 - (5.0f * x0 + x2) * 0.5f; |
200 |
|
|
c = (x1 - xm1) * 0.5f; |
201 |
|
|
samplePoint.right = (((a * pos_fract) + b) * pos_fract + c) * pos_fract + x0; |
202 |
|
|
#endif // USE_LINEAR_INTERPOLATION |
203 |
|
|
|
204 |
|
|
*Pos += Pitch; |
205 |
|
|
return samplePoint; |
206 |
|
|
} |
207 |
|
|
|
208 |
schoenebeck |
617 |
#if CONFIG_ASM && ARCH_X86 |
209 |
schoenebeck |
320 |
// TODO: no support for cubic interpolation yet |
210 |
|
|
inline static void Interpolate4StepsMonoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
211 |
|
|
/* calculate playback position of each of the 4 samples by adding the associated pitch */ |
212 |
|
|
__asm__ __volatile__ ( |
213 |
|
|
"movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t" |
214 |
|
|
"movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t" |
215 |
|
|
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
216 |
|
|
"addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t" |
217 |
|
|
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
218 |
|
|
"addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t" |
219 |
|
|
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
220 |
|
|
"addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t" |
221 |
|
|
"movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t" |
222 |
|
|
"addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t" |
223 |
|
|
"movss %%xmm2,(%0) # update 'Pos'\n\t" |
224 |
|
|
"shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t" |
225 |
|
|
"cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t" |
226 |
|
|
"shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t" |
227 |
|
|
"shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t" |
228 |
|
|
"cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t" |
229 |
|
|
"cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t" |
230 |
|
|
"shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t" |
231 |
|
|
"cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t" |
232 |
|
|
"subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t" |
233 |
|
|
: |
234 |
|
|
: "r" (Pos), /* %0 */ |
235 |
|
|
"m" (Pitch) /* %1 */ |
236 |
|
|
: "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */ |
237 |
|
|
"%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */ |
238 |
|
|
"mm4", /* holds integer position of sample 0-1 at the end */ |
239 |
|
|
"mm5", /* holds integer position of sample 2-3 at the end */ |
240 |
|
|
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
241 |
|
|
); |
242 |
|
|
/* get sample values of pSrc[pos_int] and pSrc[pos_int+1] of the 4 samples */ |
243 |
|
|
__asm__ __volatile__ ( |
244 |
|
|
"movd %%mm4,%%edi # sample position of sample 0\n\t" |
245 |
|
|
"psrlq $32,%%mm4 # mm4 >> 32\n\t" |
246 |
|
|
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 0)\n\t" |
247 |
|
|
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 0+1)\n\t" |
248 |
|
|
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
249 |
|
|
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
250 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
251 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
252 |
|
|
"movd %%mm4,%%edi # sample position of sample 1\n\t" |
253 |
|
|
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 1)\n\t" |
254 |
|
|
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 1+1)\n\t" |
255 |
|
|
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
256 |
|
|
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
257 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
258 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
259 |
|
|
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
260 |
|
|
"psrlq $32,%%mm5 # mm5 >> 32\n\t" |
261 |
|
|
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 2)\n\t" |
262 |
|
|
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 2+1)\n\t" |
263 |
|
|
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
264 |
|
|
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
265 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
266 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
267 |
|
|
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
268 |
|
|
"movswl (%0,%%edi,2),%%eax # pSrc[pos_int] (sample 3)\n\t" |
269 |
|
|
"movswl 2(%0,%%edi,2),%%ecx # pSrc[pos_int] (sample 3+1)\n\t" |
270 |
|
|
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
271 |
|
|
"cvtsi2ss %%ecx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
272 |
|
|
"shufps $0x1b, %%xmm2, %%xmm2 # swap to correct order\n\t" |
273 |
|
|
"shufps $0x1b, %%xmm3, %%xmm3 # swap to correct order\n\t" |
274 |
|
|
: /* no output */ |
275 |
|
|
: "S" (pSrc) /* %0 - sample read position */ |
276 |
|
|
: "%eax", "%ecx", /*"%edx",*/ "%edi", |
277 |
|
|
"%xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */ |
278 |
|
|
"%xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */ |
279 |
|
|
"mm4", /* holds integer position of sample 0-1 at the end */ |
280 |
|
|
"mm5", /* holds integer position of sample 2-3 at the end */ |
281 |
|
|
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
282 |
|
|
); |
283 |
|
|
/* linear interpolation of the 4 samples simultaniously */ |
284 |
|
|
__asm__ __volatile__ ( |
285 |
|
|
"subps %%xmm2,%%xmm3 # xmm3 = pSrc[pos_int+1] - pSrc[pos_int]\n\t" |
286 |
|
|
"mulps %%xmm0,%%xmm3 # xmm3 = pos_fract * (pSrc[pos_int+1] - pSrc[pos_int])\n\t" |
287 |
|
|
"addps %%xmm3,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+1] - pSrc[pos_int]))\n\t" |
288 |
|
|
: /* no output */ |
289 |
|
|
: /* no input */ |
290 |
|
|
: "%xmm2" /* holds linear interpolated sample point (of all 4 samples) at the end */ |
291 |
|
|
); |
292 |
|
|
} |
293 |
|
|
|
294 |
|
|
// TODO: no support for cubic interpolation yet |
295 |
|
|
inline static void Interpolate4StepsStereoMMXSSE(sample_t* pSrc, void* Pos, float& Pitch) { |
296 |
|
|
/* calculate playback position of each of the 4 samples by adding the associated pitch */ |
297 |
|
|
__asm__ __volatile__ ( |
298 |
|
|
"movss (%0),%%xmm0 # sample position of sample[0] -> xmm0[0]\n\t" |
299 |
|
|
"movss %1,%%xmm1 # copy pitch -> xmm1[0]\n\t" |
300 |
|
|
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
301 |
|
|
"addss %%xmm1,%%xmm0 # calculate sample position of sample[1]\n\t" |
302 |
|
|
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
303 |
|
|
"addss %%xmm1,%%xmm0 # calculate sample position of sample[2]\n\t" |
304 |
|
|
"shufps $0x90,%%xmm0,%%xmm0 # shift up, but keep xmm0[0]\n\t" |
305 |
|
|
"addss %%xmm1,%%xmm0 # calculate sample position of sample[3]\n\t" |
306 |
|
|
"movss %%xmm0,%%xmm2 # xmm0[0] -> xmm2[0]\n\t" |
307 |
|
|
"addss %%xmm1,%%xmm2 # calculate initial sample position for the next 4-sample cycle\n\t" |
308 |
|
|
"movss %%xmm2,(%0) # update 'Pos'\n\t" |
309 |
|
|
"shufps $0x1b,%%xmm0,%%xmm0 # swap, so that xmm0[0]=sample pos 0, xmm0[1]=sample pos 1,...\n\t" |
310 |
|
|
"cvttps2pi %%xmm0,%%mm4 # int(xmm0[0-1]) -> mm4\n\t" |
311 |
|
|
"shufps $0xe4,%%xmm0,%%xmm1 # xmm0[2-3] -> xmm1[2-3]\n\t" |
312 |
|
|
"shufps $0x0e,%%xmm1,%%xmm1 # xmm1[2-3] -> xmm1[0-1]\n\t" |
313 |
|
|
"cvttps2pi %%xmm1,%%mm5 # int(xmm1[0-1]) -> mm5\n\t" |
314 |
|
|
"cvtpi2ps %%mm5,%%xmm1 # double(mm5) -> xmm1[0-1]\n\t" |
315 |
|
|
"shufps $0x44,%%xmm1,%%xmm1 # shift lower 2 FPs up to the upper 2 cells\n\t" |
316 |
|
|
"cvtpi2ps %%mm4,%%xmm1 # double(mm4) -> xmm1[0-1]\n\t" |
317 |
|
|
"subps %%xmm1,%%xmm0 # xmm0[1-3] = xmm0[1-3] - xmm1[1-3]\n\t" |
318 |
|
|
: |
319 |
|
|
: "r" (Pos), /* %0 */ |
320 |
|
|
"m" (Pitch) /* %1 */ |
321 |
|
|
: "%xmm0", /* holds fractional position (0.0 <= x < 1.0) of sample 0-3 at the end */ |
322 |
|
|
"%xmm1", /* holds integer position (back converted to SPFP) of sample 0-3 at the end */ |
323 |
|
|
"mm4", /* holds integer position of sample 0-1 at the end */ |
324 |
|
|
"mm5", /* holds integer position of sample 2-3 at the end */ |
325 |
|
|
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
326 |
|
|
); |
327 |
|
|
|
328 |
|
|
/* get sample values of pSrc[pos_int], pSrc[pos_int+1], pSrc[pos_int+2] and pSrc[pos_int+3] of the 4 samples */ |
329 |
|
|
__asm__ __volatile__ ( |
330 |
|
|
"xorl %%eax,%%eax # clear eax\n\t" |
331 |
|
|
"xorl %%edx,%%edx # clear edx\n\t" |
332 |
|
|
"movd %%mm4,%%edi # sample position of sample 0\n\t" |
333 |
|
|
"psrlq $32,%%mm4 # mm4 >> 32\n\t" |
334 |
|
|
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0)\n\t" |
335 |
|
|
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
336 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
337 |
|
|
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+1)\n\t" |
338 |
|
|
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
339 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
340 |
|
|
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 0+2)\n\t" |
341 |
|
|
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
342 |
|
|
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
343 |
|
|
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 0+3)\n\t" |
344 |
|
|
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
345 |
|
|
"movd %%mm4,%%edi # sample position of sample 1\n\t" |
346 |
|
|
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
347 |
|
|
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1)\n\t" |
348 |
|
|
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
349 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
350 |
|
|
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+1)\n\t" |
351 |
|
|
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
352 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
353 |
|
|
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 1+2)\n\t" |
354 |
|
|
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
355 |
|
|
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
356 |
|
|
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 1+3)\n\t" |
357 |
|
|
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
358 |
|
|
"movd %%mm5,%%edi # sample position of sample 2\n\t" |
359 |
|
|
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
360 |
|
|
"psrlq $32,%%mm5 # mm5 >> 32\n\t" |
361 |
|
|
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2)\n\t" |
362 |
|
|
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
363 |
|
|
"shufps $0x93, %%xmm2, %%xmm2 # shift up\n\t" |
364 |
|
|
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+1)\n\t" |
365 |
|
|
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
366 |
|
|
"shufps $0x93, %%xmm3, %%xmm3 # shift up\n\t" |
367 |
|
|
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 2+2)\n\t" |
368 |
|
|
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
369 |
|
|
"shufps $0x93, %%xmm4, %%xmm4 # shift up\n\t" |
370 |
|
|
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 2+3)\n\t" |
371 |
|
|
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
372 |
|
|
"movd %%mm5,%%edi # sample position of sample 3\n\t" |
373 |
|
|
"shufps $0x93, %%xmm5, %%xmm5 # shift up\n\t" |
374 |
|
|
"movswl (%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3)\n\t" |
375 |
|
|
"cvtsi2ss %%eax, %%xmm2 # pSrc[pos_int] -> xmm2[0]\n\t" |
376 |
|
|
"shufps $0x1b, %%xmm2, %%xmm2 # shift up\n\t" |
377 |
|
|
"movswl 2(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+1)\n\t" |
378 |
|
|
"cvtsi2ss %%edx, %%xmm3 # pSrc[pos_int] -> xmm3[0]\n\t" |
379 |
|
|
"shufps $0x1b, %%xmm3, %%xmm3 # shift up\n\t" |
380 |
|
|
"movswl 4(%0,%%edi,4),%%eax # pSrc[pos_int] (sample 3+2)\n\t" |
381 |
|
|
"cvtsi2ss %%eax, %%xmm4 # pSrc[pos_int] -> xmm4[0]\n\t" |
382 |
|
|
"shufps $0x1b, %%xmm4, %%xmm4 # swap to correct order\n\t" |
383 |
|
|
"movswl 6(%0,%%edi,4),%%edx # pSrc[pos_int] (sample 3+3)\n\t" |
384 |
|
|
"cvtsi2ss %%edx, %%xmm5 # pSrc[pos_int] -> xmm5[0]\n\t" |
385 |
|
|
"shufps $0x1b, %%xmm5, %%xmm5 # swap to correct order\n\t" |
386 |
|
|
: /* no output */ |
387 |
|
|
: "S" (pSrc) /* %0 - sample read position */ |
388 |
|
|
: "%eax", "%edx", "%edi", |
389 |
|
|
"xmm2", /* holds pSrc[int_pos] of the 4 samples at the end */ |
390 |
|
|
"xmm3", /* holds pSrc[int_pos+1] of the 4 samples at the end */ |
391 |
|
|
"xmm4", /* holds pSrc[int_pos+2] of the 4 samples at the end */ |
392 |
|
|
"xmm5", /* holds pSrc[int_pos+3] of the 4 samples at the end */ |
393 |
|
|
"mm4", /* holds integer position of sample 0-1 at the end */ |
394 |
|
|
"mm5", /* holds integer position of sample 2-3 at the end */ |
395 |
|
|
"st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)" |
396 |
|
|
); |
397 |
|
|
/* linear interpolation of the 4 samples (left & right channel) simultaniously */ |
398 |
|
|
__asm__ __volatile__ ( |
399 |
|
|
"subps %%xmm2,%%xmm4 # xmm4 = pSrc[pos_int+2] - pSrc[pos_int] (left channel)\n\t" |
400 |
|
|
"mulps %%xmm0,%%xmm4 # xmm4 = pos_fract * (pSrc[pos_int+2] - pSrc[pos_int]) (left channel)\n\t" |
401 |
|
|
"addps %%xmm4,%%xmm2 # xmm2 = pSrc[pos_int] + (pos_fract * (pSrc[pos_int+2] - pSrc[pos_int])) (left channel)\n\t" |
402 |
|
|
"subps %%xmm3,%%xmm5 # xmm5 = pSrc[pos_int+3] - pSrc[pos_int+1] (right channel)\n\t" |
403 |
|
|
"mulps %%xmm0,%%xmm5 # xmm5 = pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1]) (right channel)\n\t" |
404 |
|
|
"addps %%xmm5,%%xmm3 # xmm3 = pSrc[pos_int+1] + (pos_fract * (pSrc[pos_int+3] - pSrc[pos_int+1])) (right channel)\n\t" |
405 |
|
|
: /* no output */ |
406 |
|
|
: /* no input */ |
407 |
|
|
: "%xmm2", /* holds linear interpolated sample of left channel (of all 4 samples) at the end */ |
408 |
|
|
"%xmm3" /* holds linear interpolated sample of right channel (of all 4 samples) at the end */ |
409 |
|
|
); |
410 |
|
|
} |
411 |
schoenebeck |
617 |
#endif // CONFIG_ASM && ARCH_X86 |
412 |
schoenebeck |
320 |
}; |
413 |
|
|
|
414 |
|
|
} // namespace LinuxSampler |
415 |
|
|
|
416 |
|
|
#endif // __LS_RESAMPLER_H__ |