michael@0: /*Copyright (c) 2003-2004, Mark Borgerding michael@0: Lots of modifications by Jean-Marc Valin michael@0: Copyright (c) 2005-2007, Xiph.Org Foundation michael@0: Copyright (c) 2008, Xiph.Org Foundation, CSIRO michael@0: michael@0: All rights reserved. michael@0: michael@0: Redistribution and use in source and binary forms, with or without michael@0: modification, are permitted provided that the following conditions are met: michael@0: michael@0: * Redistributions of source code must retain the above copyright notice, michael@0: this list of conditions and the following disclaimer. michael@0: * Redistributions in binary form must reproduce the above copyright notice, michael@0: this list of conditions and the following disclaimer in the michael@0: documentation and/or other materials provided with the distribution. michael@0: michael@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" michael@0: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE michael@0: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE michael@0: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE michael@0: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR michael@0: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF michael@0: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS michael@0: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN michael@0: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) michael@0: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE michael@0: POSSIBILITY OF SUCH DAMAGE.*/ michael@0: michael@0: /* This code is originally from Mark Borgerding's KISS-FFT but has been michael@0: heavily modified to better suit Opus */ michael@0: michael@0: #ifndef SKIP_CONFIG_H michael@0: # ifdef HAVE_CONFIG_H michael@0: # include "config.h" michael@0: # endif michael@0: #endif michael@0: michael@0: #include "_kiss_fft_guts.h" michael@0: #include "arch.h" michael@0: #include "os_support.h" michael@0: #include "mathops.h" michael@0: #include "stack_alloc.h" michael@0: michael@0: /* The guts header contains all the multiplication and addition macros that are defined for michael@0: complex numbers. It also delares the kf_ internal functions. michael@0: */ michael@0: michael@0: static void kf_bfly2( michael@0: kiss_fft_cpx * Fout, michael@0: const size_t fstride, michael@0: const kiss_fft_state *st, michael@0: int m, michael@0: int N, michael@0: int mm michael@0: ) michael@0: { michael@0: kiss_fft_cpx * Fout2; michael@0: const kiss_twiddle_cpx * tw1; michael@0: int i,j; michael@0: kiss_fft_cpx * Fout_beg = Fout; michael@0: for (i=0;itwiddles; michael@0: for(j=0;jr = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); michael@0: Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); michael@0: C_MUL (t, *Fout2 , *tw1); michael@0: tw1 += fstride; michael@0: C_SUB( *Fout2 , *Fout , t ); michael@0: C_ADDTO( *Fout , t ); michael@0: ++Fout2; michael@0: ++Fout; michael@0: } michael@0: } michael@0: } michael@0: michael@0: static void ki_bfly2( michael@0: kiss_fft_cpx * Fout, michael@0: const size_t fstride, michael@0: const kiss_fft_state *st, michael@0: int m, michael@0: int N, michael@0: int mm michael@0: ) michael@0: { michael@0: kiss_fft_cpx * Fout2; michael@0: const kiss_twiddle_cpx * tw1; michael@0: kiss_fft_cpx t; michael@0: int i,j; michael@0: kiss_fft_cpx * Fout_beg = Fout; michael@0: for (i=0;itwiddles; michael@0: for(j=0;jtwiddles; michael@0: for (j=0;jr = PSHR32(Fout->r, 2); michael@0: Fout->i = PSHR32(Fout->i, 2); michael@0: C_SUB( scratch[5] , *Fout, scratch[1] ); michael@0: C_ADDTO(*Fout, scratch[1]); michael@0: C_ADD( scratch[3] , scratch[0] , scratch[2] ); michael@0: C_SUB( scratch[4] , scratch[0] , scratch[2] ); michael@0: C_SUB( Fout[m2], *Fout, scratch[3] ); michael@0: tw1 += fstride; michael@0: tw2 += fstride*2; michael@0: tw3 += fstride*3; michael@0: C_ADDTO( *Fout , scratch[3] ); michael@0: michael@0: Fout[m].r = scratch[5].r + scratch[4].i; michael@0: Fout[m].i = scratch[5].i - scratch[4].r; michael@0: Fout[m3].r = scratch[5].r - scratch[4].i; michael@0: Fout[m3].i = scratch[5].i + scratch[4].r; michael@0: ++Fout; michael@0: } michael@0: } michael@0: } michael@0: michael@0: static void ki_bfly4( michael@0: kiss_fft_cpx * Fout, michael@0: const size_t fstride, michael@0: const kiss_fft_state *st, michael@0: int m, michael@0: int N, michael@0: int mm michael@0: ) michael@0: { michael@0: const kiss_twiddle_cpx *tw1,*tw2,*tw3; michael@0: kiss_fft_cpx scratch[6]; michael@0: const size_t m2=2*m; michael@0: const size_t m3=3*m; michael@0: int i, j; michael@0: michael@0: kiss_fft_cpx * Fout_beg = Fout; michael@0: for (i=0;itwiddles; michael@0: for (j=0;jtwiddles[fstride*m]; michael@0: for (i=0;itwiddles; michael@0: k=m; michael@0: do { michael@0: C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); michael@0: michael@0: C_MUL(scratch[1],Fout[m] , *tw1); michael@0: C_MUL(scratch[2],Fout[m2] , *tw2); michael@0: michael@0: C_ADD(scratch[3],scratch[1],scratch[2]); michael@0: C_SUB(scratch[0],scratch[1],scratch[2]); michael@0: tw1 += fstride; michael@0: tw2 += fstride*2; michael@0: michael@0: Fout[m].r = Fout->r - HALF_OF(scratch[3].r); michael@0: Fout[m].i = Fout->i - HALF_OF(scratch[3].i); michael@0: michael@0: C_MULBYSCALAR( scratch[0] , epi3.i ); michael@0: michael@0: C_ADDTO(*Fout,scratch[3]); michael@0: michael@0: Fout[m2].r = Fout[m].r + scratch[0].i; michael@0: Fout[m2].i = Fout[m].i - scratch[0].r; michael@0: michael@0: Fout[m].r -= scratch[0].i; michael@0: Fout[m].i += scratch[0].r; michael@0: michael@0: ++Fout; michael@0: } while(--k); michael@0: } michael@0: } michael@0: michael@0: static void ki_bfly3( michael@0: kiss_fft_cpx * Fout, michael@0: const size_t fstride, michael@0: const kiss_fft_state *st, michael@0: int m, michael@0: int N, michael@0: int mm michael@0: ) michael@0: { michael@0: int i, k; michael@0: const size_t m2 = 2*m; michael@0: const kiss_twiddle_cpx *tw1,*tw2; michael@0: kiss_fft_cpx scratch[5]; michael@0: kiss_twiddle_cpx epi3; michael@0: michael@0: kiss_fft_cpx * Fout_beg = Fout; michael@0: epi3 = st->twiddles[fstride*m]; michael@0: for (i=0;itwiddles; michael@0: k=m; michael@0: do{ michael@0: michael@0: C_MULC(scratch[1],Fout[m] , *tw1); michael@0: C_MULC(scratch[2],Fout[m2] , *tw2); michael@0: michael@0: C_ADD(scratch[3],scratch[1],scratch[2]); michael@0: C_SUB(scratch[0],scratch[1],scratch[2]); michael@0: tw1 += fstride; michael@0: tw2 += fstride*2; michael@0: michael@0: Fout[m].r = Fout->r - HALF_OF(scratch[3].r); michael@0: Fout[m].i = Fout->i - HALF_OF(scratch[3].i); michael@0: michael@0: C_MULBYSCALAR( scratch[0] , -epi3.i ); michael@0: michael@0: C_ADDTO(*Fout,scratch[3]); michael@0: michael@0: Fout[m2].r = Fout[m].r + scratch[0].i; michael@0: Fout[m2].i = Fout[m].i - scratch[0].r; michael@0: michael@0: Fout[m].r -= scratch[0].i; michael@0: Fout[m].i += scratch[0].r; michael@0: michael@0: ++Fout; michael@0: }while(--k); michael@0: } michael@0: } michael@0: michael@0: static void kf_bfly5( michael@0: kiss_fft_cpx * Fout, michael@0: const size_t fstride, michael@0: const kiss_fft_state *st, michael@0: int m, michael@0: int N, michael@0: int mm michael@0: ) michael@0: { michael@0: kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; michael@0: int i, u; michael@0: kiss_fft_cpx scratch[13]; michael@0: const kiss_twiddle_cpx * twiddles = st->twiddles; michael@0: const kiss_twiddle_cpx *tw; michael@0: kiss_twiddle_cpx ya,yb; michael@0: kiss_fft_cpx * Fout_beg = Fout; michael@0: michael@0: ya = twiddles[fstride*m]; michael@0: yb = twiddles[fstride*2*m]; michael@0: tw=st->twiddles; michael@0: michael@0: for (i=0;ir += scratch[7].r + scratch[8].r; michael@0: Fout0->i += scratch[7].i + scratch[8].i; michael@0: michael@0: scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); michael@0: scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); michael@0: michael@0: scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); michael@0: scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); michael@0: michael@0: C_SUB(*Fout1,scratch[5],scratch[6]); michael@0: C_ADD(*Fout4,scratch[5],scratch[6]); michael@0: michael@0: scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); michael@0: scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); michael@0: scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); michael@0: scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); michael@0: michael@0: C_ADD(*Fout2,scratch[11],scratch[12]); michael@0: C_SUB(*Fout3,scratch[11],scratch[12]); michael@0: michael@0: ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; michael@0: } michael@0: } michael@0: } michael@0: michael@0: static void ki_bfly5( michael@0: kiss_fft_cpx * Fout, michael@0: const size_t fstride, michael@0: const kiss_fft_state *st, michael@0: int m, michael@0: int N, michael@0: int mm michael@0: ) michael@0: { michael@0: kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; michael@0: int i, u; michael@0: kiss_fft_cpx scratch[13]; michael@0: const kiss_twiddle_cpx * twiddles = st->twiddles; michael@0: const kiss_twiddle_cpx *tw; michael@0: kiss_twiddle_cpx ya,yb; michael@0: kiss_fft_cpx * Fout_beg = Fout; michael@0: michael@0: ya = twiddles[fstride*m]; michael@0: yb = twiddles[fstride*2*m]; michael@0: tw=st->twiddles; michael@0: michael@0: for (i=0;ir += scratch[7].r + scratch[8].r; michael@0: Fout0->i += scratch[7].i + scratch[8].i; michael@0: michael@0: scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); michael@0: scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); michael@0: michael@0: scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); michael@0: scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); michael@0: michael@0: C_SUB(*Fout1,scratch[5],scratch[6]); michael@0: C_ADD(*Fout4,scratch[5],scratch[6]); michael@0: michael@0: scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); michael@0: scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); michael@0: scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); michael@0: scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); michael@0: michael@0: C_ADD(*Fout2,scratch[11],scratch[12]); michael@0: C_SUB(*Fout3,scratch[11],scratch[12]); michael@0: michael@0: ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; michael@0: } michael@0: } michael@0: } michael@0: michael@0: #endif michael@0: michael@0: michael@0: #ifdef CUSTOM_MODES michael@0: michael@0: static michael@0: void compute_bitrev_table( michael@0: int Fout, michael@0: opus_int16 *f, michael@0: const size_t fstride, michael@0: int in_stride, michael@0: opus_int16 * factors, michael@0: const kiss_fft_state *st michael@0: ) michael@0: { michael@0: const int p=*factors++; /* the radix */ michael@0: const int m=*factors++; /* stage's fft length/p */ michael@0: michael@0: /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ michael@0: if (m==1) michael@0: { michael@0: int j; michael@0: for (j=0;j32000 || (opus_int32)p*(opus_int32)p > n) michael@0: p = n; /* no more factors, skip to end */ michael@0: } michael@0: n /= p; michael@0: #ifdef RADIX_TWO_ONLY michael@0: if (p!=2 && p != 4) michael@0: #else michael@0: if (p>5) michael@0: #endif michael@0: { michael@0: return 0; michael@0: } michael@0: *facbuf++ = p; michael@0: *facbuf++ = n; michael@0: } while (n > 1); michael@0: return 1; michael@0: } michael@0: michael@0: static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) michael@0: { michael@0: int i; michael@0: #ifdef FIXED_POINT michael@0: for (i=0;i= memneeded) michael@0: st = (kiss_fft_state*)mem; michael@0: *lenmem = memneeded; michael@0: } michael@0: if (st) { michael@0: opus_int16 *bitrev; michael@0: kiss_twiddle_cpx *twiddles; michael@0: michael@0: st->nfft=nfft; michael@0: #ifndef FIXED_POINT michael@0: st->scale = 1.f/nfft; michael@0: #endif michael@0: if (base != NULL) michael@0: { michael@0: st->twiddles = base->twiddles; michael@0: st->shift = 0; michael@0: while (nfft<shift != base->nfft && st->shift < 32) michael@0: st->shift++; michael@0: if (st->shift>=32) michael@0: goto fail; michael@0: } else { michael@0: st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft); michael@0: compute_twiddles(twiddles, nfft); michael@0: st->shift = -1; michael@0: } michael@0: if (!kf_factor(nfft,st->factors)) michael@0: { michael@0: goto fail; michael@0: } michael@0: michael@0: /* bitrev */ michael@0: st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft); michael@0: if (st->bitrev==NULL) michael@0: goto fail; michael@0: compute_bitrev_table(0, bitrev, 1,1, st->factors,st); michael@0: } michael@0: return st; michael@0: fail: michael@0: opus_fft_free(st); michael@0: return NULL; michael@0: } michael@0: michael@0: kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem ) michael@0: { michael@0: return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL); michael@0: } michael@0: michael@0: void opus_fft_free(const kiss_fft_state *cfg) michael@0: { michael@0: if (cfg) michael@0: { michael@0: opus_free((opus_int16*)cfg->bitrev); michael@0: if (cfg->shift < 0) michael@0: opus_free((kiss_twiddle_cpx*)cfg->twiddles); michael@0: opus_free((kiss_fft_state*)cfg); michael@0: } michael@0: } michael@0: michael@0: #endif /* CUSTOM_MODES */ michael@0: michael@0: void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) michael@0: { michael@0: int m2, m; michael@0: int p; michael@0: int L; michael@0: int fstride[MAXFACTORS]; michael@0: int i; michael@0: int shift; michael@0: michael@0: /* st->shift can be -1 */ michael@0: shift = st->shift>0 ? st->shift : 0; michael@0: michael@0: celt_assert2 (fin != fout, "In-place FFT not supported"); michael@0: /* Bit-reverse the input */ michael@0: for (i=0;infft;i++) michael@0: { michael@0: fout[st->bitrev[i]] = fin[i]; michael@0: #ifndef FIXED_POINT michael@0: fout[st->bitrev[i]].r *= st->scale; michael@0: fout[st->bitrev[i]].i *= st->scale; michael@0: #endif michael@0: } michael@0: michael@0: fstride[0] = 1; michael@0: L=0; michael@0: do { michael@0: p = st->factors[2*L]; michael@0: m = st->factors[2*L+1]; michael@0: fstride[L+1] = fstride[L]*p; michael@0: L++; michael@0: } while(m!=1); michael@0: m = st->factors[2*L-1]; michael@0: for (i=L-1;i>=0;i--) michael@0: { michael@0: if (i!=0) michael@0: m2 = st->factors[2*i-1]; michael@0: else michael@0: m2 = 1; michael@0: switch (st->factors[2*i]) michael@0: { michael@0: case 2: michael@0: kf_bfly2(fout,fstride[i]<shift can be -1 */ michael@0: shift = st->shift>0 ? st->shift : 0; michael@0: celt_assert2 (fin != fout, "In-place FFT not supported"); michael@0: /* Bit-reverse the input */ michael@0: for (i=0;infft;i++) michael@0: fout[st->bitrev[i]] = fin[i]; michael@0: michael@0: fstride[0] = 1; michael@0: L=0; michael@0: do { michael@0: p = st->factors[2*L]; michael@0: m = st->factors[2*L+1]; michael@0: fstride[L+1] = fstride[L]*p; michael@0: L++; michael@0: } while(m!=1); michael@0: m = st->factors[2*L-1]; michael@0: for (i=L-1;i>=0;i--) michael@0: { michael@0: if (i!=0) michael@0: m2 = st->factors[2*i-1]; michael@0: else michael@0: m2 = 1; michael@0: switch (st->factors[2*i]) michael@0: { michael@0: case 2: michael@0: ki_bfly2(fout,fstride[i]<