|
1 /* Copyright (c) 2007-2008 CSIRO |
|
2 Copyright (c) 2007-2008 Xiph.Org Foundation |
|
3 Written by Jean-Marc Valin */ |
|
4 /* |
|
5 Redistribution and use in source and binary forms, with or without |
|
6 modification, are permitted provided that the following conditions |
|
7 are met: |
|
8 |
|
9 - Redistributions of source code must retain the above copyright |
|
10 notice, this list of conditions and the following disclaimer. |
|
11 |
|
12 - Redistributions in binary form must reproduce the above copyright |
|
13 notice, this list of conditions and the following disclaimer in the |
|
14 documentation and/or other materials provided with the distribution. |
|
15 |
|
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
|
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 /* This is a simple MDCT implementation that uses a N/4 complex FFT |
|
30 to do most of the work. It should be relatively straightforward to |
|
31 plug in pretty much and FFT here. |
|
32 |
|
33 This replaces the Vorbis FFT (and uses the exact same API), which |
|
34 was a bit too messy and that was ending up duplicating code |
|
35 (might as well use the same FFT everywhere). |
|
36 |
|
37 The algorithm is similar to (and inspired from) Fabrice Bellard's |
|
38 MDCT implementation in FFMPEG, but has differences in signs, ordering |
|
39 and scaling in many places. |
|
40 */ |
|
41 |
|
42 #ifndef SKIP_CONFIG_H |
|
43 #ifdef HAVE_CONFIG_H |
|
44 #include "config.h" |
|
45 #endif |
|
46 #endif |
|
47 |
|
48 #include "mdct.h" |
|
49 #include "kiss_fft.h" |
|
50 #include "_kiss_fft_guts.h" |
|
51 #include <math.h> |
|
52 #include "os_support.h" |
|
53 #include "mathops.h" |
|
54 #include "stack_alloc.h" |
|
55 |
|
56 #ifdef CUSTOM_MODES |
|
57 |
|
58 int clt_mdct_init(mdct_lookup *l,int N, int maxshift) |
|
59 { |
|
60 int i; |
|
61 int N4; |
|
62 kiss_twiddle_scalar *trig; |
|
63 #if defined(FIXED_POINT) |
|
64 int N2=N>>1; |
|
65 #endif |
|
66 l->n = N; |
|
67 N4 = N>>2; |
|
68 l->maxshift = maxshift; |
|
69 for (i=0;i<=maxshift;i++) |
|
70 { |
|
71 if (i==0) |
|
72 l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); |
|
73 else |
|
74 l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); |
|
75 #ifndef ENABLE_TI_DSPLIB55 |
|
76 if (l->kfft[i]==NULL) |
|
77 return 0; |
|
78 #endif |
|
79 } |
|
80 l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); |
|
81 if (l->trig==NULL) |
|
82 return 0; |
|
83 /* We have enough points that sine isn't necessary */ |
|
84 #if defined(FIXED_POINT) |
|
85 for (i=0;i<=N4;i++) |
|
86 trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); |
|
87 #else |
|
88 for (i=0;i<=N4;i++) |
|
89 trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); |
|
90 #endif |
|
91 return 1; |
|
92 } |
|
93 |
|
94 void clt_mdct_clear(mdct_lookup *l) |
|
95 { |
|
96 int i; |
|
97 for (i=0;i<=l->maxshift;i++) |
|
98 opus_fft_free(l->kfft[i]); |
|
99 opus_free((kiss_twiddle_scalar*)l->trig); |
|
100 } |
|
101 |
|
102 #endif /* CUSTOM_MODES */ |
|
103 |
|
104 /* Forward MDCT trashes the input array */ |
|
105 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, |
|
106 const opus_val16 *window, int overlap, int shift, int stride) |
|
107 { |
|
108 int i; |
|
109 int N, N2, N4; |
|
110 kiss_twiddle_scalar sine; |
|
111 VARDECL(kiss_fft_scalar, f); |
|
112 VARDECL(kiss_fft_scalar, f2); |
|
113 SAVE_STACK; |
|
114 N = l->n; |
|
115 N >>= shift; |
|
116 N2 = N>>1; |
|
117 N4 = N>>2; |
|
118 ALLOC(f, N2, kiss_fft_scalar); |
|
119 ALLOC(f2, N2, kiss_fft_scalar); |
|
120 /* sin(x) ~= x here */ |
|
121 #ifdef FIXED_POINT |
|
122 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; |
|
123 #else |
|
124 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; |
|
125 #endif |
|
126 |
|
127 /* Consider the input to be composed of four blocks: [a, b, c, d] */ |
|
128 /* Window, shuffle, fold */ |
|
129 { |
|
130 /* Temp pointers to make it really clear to the compiler what we're doing */ |
|
131 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); |
|
132 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); |
|
133 kiss_fft_scalar * OPUS_RESTRICT yp = f; |
|
134 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); |
|
135 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; |
|
136 for(i=0;i<((overlap+3)>>2);i++) |
|
137 { |
|
138 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ |
|
139 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); |
|
140 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); |
|
141 xp1+=2; |
|
142 xp2-=2; |
|
143 wp1+=2; |
|
144 wp2-=2; |
|
145 } |
|
146 wp1 = window; |
|
147 wp2 = window+overlap-1; |
|
148 for(;i<N4-((overlap+3)>>2);i++) |
|
149 { |
|
150 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ |
|
151 *yp++ = *xp2; |
|
152 *yp++ = *xp1; |
|
153 xp1+=2; |
|
154 xp2-=2; |
|
155 } |
|
156 for(;i<N4;i++) |
|
157 { |
|
158 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ |
|
159 *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); |
|
160 *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); |
|
161 xp1+=2; |
|
162 xp2-=2; |
|
163 wp1+=2; |
|
164 wp2-=2; |
|
165 } |
|
166 } |
|
167 /* Pre-rotation */ |
|
168 { |
|
169 kiss_fft_scalar * OPUS_RESTRICT yp = f; |
|
170 const kiss_twiddle_scalar *t = &l->trig[0]; |
|
171 for(i=0;i<N4;i++) |
|
172 { |
|
173 kiss_fft_scalar re, im, yr, yi; |
|
174 re = yp[0]; |
|
175 im = yp[1]; |
|
176 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); |
|
177 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); |
|
178 /* works because the cos is nearly one */ |
|
179 *yp++ = yr + S_MUL(yi,sine); |
|
180 *yp++ = yi - S_MUL(yr,sine); |
|
181 } |
|
182 } |
|
183 |
|
184 /* N/4 complex FFT, down-scales by 4/N */ |
|
185 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); |
|
186 |
|
187 /* Post-rotate */ |
|
188 { |
|
189 /* Temp pointers to make it really clear to the compiler what we're doing */ |
|
190 const kiss_fft_scalar * OPUS_RESTRICT fp = f2; |
|
191 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; |
|
192 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); |
|
193 const kiss_twiddle_scalar *t = &l->trig[0]; |
|
194 /* Temp pointers to make it really clear to the compiler what we're doing */ |
|
195 for(i=0;i<N4;i++) |
|
196 { |
|
197 kiss_fft_scalar yr, yi; |
|
198 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); |
|
199 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); |
|
200 /* works because the cos is nearly one */ |
|
201 *yp1 = yr - S_MUL(yi,sine); |
|
202 *yp2 = yi + S_MUL(yr,sine);; |
|
203 fp += 2; |
|
204 yp1 += 2*stride; |
|
205 yp2 -= 2*stride; |
|
206 } |
|
207 } |
|
208 RESTORE_STACK; |
|
209 } |
|
210 |
|
211 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, |
|
212 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) |
|
213 { |
|
214 int i; |
|
215 int N, N2, N4; |
|
216 kiss_twiddle_scalar sine; |
|
217 VARDECL(kiss_fft_scalar, f2); |
|
218 SAVE_STACK; |
|
219 N = l->n; |
|
220 N >>= shift; |
|
221 N2 = N>>1; |
|
222 N4 = N>>2; |
|
223 ALLOC(f2, N2, kiss_fft_scalar); |
|
224 /* sin(x) ~= x here */ |
|
225 #ifdef FIXED_POINT |
|
226 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; |
|
227 #else |
|
228 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; |
|
229 #endif |
|
230 |
|
231 /* Pre-rotate */ |
|
232 { |
|
233 /* Temp pointers to make it really clear to the compiler what we're doing */ |
|
234 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; |
|
235 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); |
|
236 kiss_fft_scalar * OPUS_RESTRICT yp = f2; |
|
237 const kiss_twiddle_scalar *t = &l->trig[0]; |
|
238 for(i=0;i<N4;i++) |
|
239 { |
|
240 kiss_fft_scalar yr, yi; |
|
241 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); |
|
242 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); |
|
243 /* works because the cos is nearly one */ |
|
244 *yp++ = yr - S_MUL(yi,sine); |
|
245 *yp++ = yi + S_MUL(yr,sine); |
|
246 xp1+=2*stride; |
|
247 xp2-=2*stride; |
|
248 } |
|
249 } |
|
250 |
|
251 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ |
|
252 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); |
|
253 |
|
254 /* Post-rotate and de-shuffle from both ends of the buffer at once to make |
|
255 it in-place. */ |
|
256 { |
|
257 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); |
|
258 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; |
|
259 const kiss_twiddle_scalar *t = &l->trig[0]; |
|
260 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the |
|
261 middle pair will be computed twice. */ |
|
262 for(i=0;i<(N4+1)>>1;i++) |
|
263 { |
|
264 kiss_fft_scalar re, im, yr, yi; |
|
265 kiss_twiddle_scalar t0, t1; |
|
266 re = yp0[0]; |
|
267 im = yp0[1]; |
|
268 t0 = t[i<<shift]; |
|
269 t1 = t[(N4-i)<<shift]; |
|
270 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ |
|
271 yr = S_MUL(re,t0) - S_MUL(im,t1); |
|
272 yi = S_MUL(im,t0) + S_MUL(re,t1); |
|
273 re = yp1[0]; |
|
274 im = yp1[1]; |
|
275 /* works because the cos is nearly one */ |
|
276 yp0[0] = -(yr - S_MUL(yi,sine)); |
|
277 yp1[1] = yi + S_MUL(yr,sine); |
|
278 |
|
279 t0 = t[(N4-i-1)<<shift]; |
|
280 t1 = t[(i+1)<<shift]; |
|
281 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ |
|
282 yr = S_MUL(re,t0) - S_MUL(im,t1); |
|
283 yi = S_MUL(im,t0) + S_MUL(re,t1); |
|
284 /* works because the cos is nearly one */ |
|
285 yp1[0] = -(yr - S_MUL(yi,sine)); |
|
286 yp0[1] = yi + S_MUL(yr,sine); |
|
287 yp0 += 2; |
|
288 yp1 -= 2; |
|
289 } |
|
290 } |
|
291 |
|
292 /* Mirror on both sides for TDAC */ |
|
293 { |
|
294 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; |
|
295 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; |
|
296 const opus_val16 * OPUS_RESTRICT wp1 = window; |
|
297 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; |
|
298 |
|
299 for(i = 0; i < overlap/2; i++) |
|
300 { |
|
301 kiss_fft_scalar x1, x2; |
|
302 x1 = *xp1; |
|
303 x2 = *yp1; |
|
304 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); |
|
305 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); |
|
306 wp1++; |
|
307 wp2--; |
|
308 } |
|
309 } |
|
310 RESTORE_STACK; |
|
311 } |