michael@0: /* Copyright (c) 2007-2008 CSIRO michael@0: Copyright (c) 2007-2008 Xiph.Org Foundation michael@0: Written by Jean-Marc Valin */ michael@0: /* michael@0: Redistribution and use in source and binary forms, with or without michael@0: modification, are permitted provided that the following conditions michael@0: are met: michael@0: michael@0: - Redistributions of source code must retain the above copyright michael@0: notice, this list of conditions and the following disclaimer. michael@0: michael@0: - Redistributions in binary form must reproduce the above copyright michael@0: notice, this list of conditions and the following disclaimer in the michael@0: documentation and/or other materials provided with the distribution. michael@0: michael@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER michael@0: OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, michael@0: EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, michael@0: PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR michael@0: PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF michael@0: LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING michael@0: NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS michael@0: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: */ michael@0: michael@0: /* This is a simple MDCT implementation that uses a N/4 complex FFT michael@0: to do most of the work. It should be relatively straightforward to michael@0: plug in pretty much and FFT here. michael@0: michael@0: This replaces the Vorbis FFT (and uses the exact same API), which michael@0: was a bit too messy and that was ending up duplicating code michael@0: (might as well use the same FFT everywhere). michael@0: michael@0: The algorithm is similar to (and inspired from) Fabrice Bellard's michael@0: MDCT implementation in FFMPEG, but has differences in signs, ordering michael@0: and scaling in many places. michael@0: */ michael@0: michael@0: #ifndef SKIP_CONFIG_H michael@0: #ifdef HAVE_CONFIG_H michael@0: #include "config.h" michael@0: #endif michael@0: #endif michael@0: michael@0: #include "mdct.h" michael@0: #include "kiss_fft.h" michael@0: #include "_kiss_fft_guts.h" michael@0: #include michael@0: #include "os_support.h" michael@0: #include "mathops.h" michael@0: #include "stack_alloc.h" michael@0: michael@0: #ifdef CUSTOM_MODES michael@0: michael@0: int clt_mdct_init(mdct_lookup *l,int N, int maxshift) michael@0: { michael@0: int i; michael@0: int N4; michael@0: kiss_twiddle_scalar *trig; michael@0: #if defined(FIXED_POINT) michael@0: int N2=N>>1; michael@0: #endif michael@0: l->n = N; michael@0: N4 = N>>2; michael@0: l->maxshift = maxshift; michael@0: for (i=0;i<=maxshift;i++) michael@0: { michael@0: if (i==0) michael@0: l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); michael@0: else michael@0: l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); michael@0: #ifndef ENABLE_TI_DSPLIB55 michael@0: if (l->kfft[i]==NULL) michael@0: return 0; michael@0: #endif michael@0: } michael@0: l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); michael@0: if (l->trig==NULL) michael@0: return 0; michael@0: /* We have enough points that sine isn't necessary */ michael@0: #if defined(FIXED_POINT) michael@0: for (i=0;i<=N4;i++) michael@0: trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); michael@0: #else michael@0: for (i=0;i<=N4;i++) michael@0: trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); michael@0: #endif michael@0: return 1; michael@0: } michael@0: michael@0: void clt_mdct_clear(mdct_lookup *l) michael@0: { michael@0: int i; michael@0: for (i=0;i<=l->maxshift;i++) michael@0: opus_fft_free(l->kfft[i]); michael@0: opus_free((kiss_twiddle_scalar*)l->trig); michael@0: } michael@0: michael@0: #endif /* CUSTOM_MODES */ michael@0: michael@0: /* Forward MDCT trashes the input array */ michael@0: void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, michael@0: const opus_val16 *window, int overlap, int shift, int stride) michael@0: { michael@0: int i; michael@0: int N, N2, N4; michael@0: kiss_twiddle_scalar sine; michael@0: VARDECL(kiss_fft_scalar, f); michael@0: VARDECL(kiss_fft_scalar, f2); michael@0: SAVE_STACK; michael@0: N = l->n; michael@0: N >>= shift; michael@0: N2 = N>>1; michael@0: N4 = N>>2; michael@0: ALLOC(f, N2, kiss_fft_scalar); michael@0: ALLOC(f2, N2, kiss_fft_scalar); michael@0: /* sin(x) ~= x here */ michael@0: #ifdef FIXED_POINT michael@0: sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; michael@0: #else michael@0: sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; michael@0: #endif michael@0: michael@0: /* Consider the input to be composed of four blocks: [a, b, c, d] */ michael@0: /* Window, shuffle, fold */ michael@0: { michael@0: /* Temp pointers to make it really clear to the compiler what we're doing */ michael@0: const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); michael@0: const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); michael@0: kiss_fft_scalar * OPUS_RESTRICT yp = f; michael@0: const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); michael@0: const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; michael@0: for(i=0;i<((overlap+3)>>2);i++) michael@0: { michael@0: /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ michael@0: *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); michael@0: *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); michael@0: xp1+=2; michael@0: xp2-=2; michael@0: wp1+=2; michael@0: wp2-=2; michael@0: } michael@0: wp1 = window; michael@0: wp2 = window+overlap-1; michael@0: for(;i>2);i++) michael@0: { michael@0: /* Real part arranged as a-bR, Imag part arranged as -c-dR */ michael@0: *yp++ = *xp2; michael@0: *yp++ = *xp1; michael@0: xp1+=2; michael@0: xp2-=2; michael@0: } michael@0: for(;itrig[0]; michael@0: for(i=0;ikfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); michael@0: michael@0: /* Post-rotate */ michael@0: { michael@0: /* Temp pointers to make it really clear to the compiler what we're doing */ michael@0: const kiss_fft_scalar * OPUS_RESTRICT fp = f2; michael@0: kiss_fft_scalar * OPUS_RESTRICT yp1 = out; michael@0: kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); michael@0: const kiss_twiddle_scalar *t = &l->trig[0]; michael@0: /* Temp pointers to make it really clear to the compiler what we're doing */ michael@0: for(i=0;in; michael@0: N >>= shift; michael@0: N2 = N>>1; michael@0: N4 = N>>2; michael@0: ALLOC(f2, N2, kiss_fft_scalar); michael@0: /* sin(x) ~= x here */ michael@0: #ifdef FIXED_POINT michael@0: sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; michael@0: #else michael@0: sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; michael@0: #endif michael@0: michael@0: /* Pre-rotate */ michael@0: { michael@0: /* Temp pointers to make it really clear to the compiler what we're doing */ michael@0: const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; michael@0: const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); michael@0: kiss_fft_scalar * OPUS_RESTRICT yp = f2; michael@0: const kiss_twiddle_scalar *t = &l->trig[0]; michael@0: for(i=0;ikfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); michael@0: michael@0: /* Post-rotate and de-shuffle from both ends of the buffer at once to make michael@0: it in-place. */ michael@0: { michael@0: kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); michael@0: kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; michael@0: const kiss_twiddle_scalar *t = &l->trig[0]; michael@0: /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the michael@0: middle pair will be computed twice. */ michael@0: for(i=0;i<(N4+1)>>1;i++) michael@0: { michael@0: kiss_fft_scalar re, im, yr, yi; michael@0: kiss_twiddle_scalar t0, t1; michael@0: re = yp0[0]; michael@0: im = yp0[1]; michael@0: t0 = t[i<