media/libopus/celt/kiss_fft.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*Copyright (c) 2003-2004, Mark Borgerding
michael@0 2 Lots of modifications by Jean-Marc Valin
michael@0 3 Copyright (c) 2005-2007, Xiph.Org Foundation
michael@0 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
michael@0 5
michael@0 6 All rights reserved.
michael@0 7
michael@0 8 Redistribution and use in source and binary forms, with or without
michael@0 9 modification, are permitted provided that the following conditions are met:
michael@0 10
michael@0 11 * Redistributions of source code must retain the above copyright notice,
michael@0 12 this list of conditions and the following disclaimer.
michael@0 13 * Redistributions in binary form must reproduce the above copyright notice,
michael@0 14 this list of conditions and the following disclaimer in the
michael@0 15 documentation and/or other materials provided with the distribution.
michael@0 16
michael@0 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
michael@0 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
michael@0 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
michael@0 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
michael@0 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
michael@0 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
michael@0 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
michael@0 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
michael@0 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
michael@0 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
michael@0 27 POSSIBILITY OF SUCH DAMAGE.*/
michael@0 28
michael@0 29 /* This code is originally from Mark Borgerding's KISS-FFT but has been
michael@0 30 heavily modified to better suit Opus */
michael@0 31
michael@0 32 #ifndef SKIP_CONFIG_H
michael@0 33 # ifdef HAVE_CONFIG_H
michael@0 34 # include "config.h"
michael@0 35 # endif
michael@0 36 #endif
michael@0 37
michael@0 38 #include "_kiss_fft_guts.h"
michael@0 39 #include "arch.h"
michael@0 40 #include "os_support.h"
michael@0 41 #include "mathops.h"
michael@0 42 #include "stack_alloc.h"
michael@0 43
michael@0 44 /* The guts header contains all the multiplication and addition macros that are defined for
michael@0 45 complex numbers. It also delares the kf_ internal functions.
michael@0 46 */
michael@0 47
michael@0 48 static void kf_bfly2(
michael@0 49 kiss_fft_cpx * Fout,
michael@0 50 const size_t fstride,
michael@0 51 const kiss_fft_state *st,
michael@0 52 int m,
michael@0 53 int N,
michael@0 54 int mm
michael@0 55 )
michael@0 56 {
michael@0 57 kiss_fft_cpx * Fout2;
michael@0 58 const kiss_twiddle_cpx * tw1;
michael@0 59 int i,j;
michael@0 60 kiss_fft_cpx * Fout_beg = Fout;
michael@0 61 for (i=0;i<N;i++)
michael@0 62 {
michael@0 63 Fout = Fout_beg + i*mm;
michael@0 64 Fout2 = Fout + m;
michael@0 65 tw1 = st->twiddles;
michael@0 66 for(j=0;j<m;j++)
michael@0 67 {
michael@0 68 kiss_fft_cpx t;
michael@0 69 Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
michael@0 70 Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
michael@0 71 C_MUL (t, *Fout2 , *tw1);
michael@0 72 tw1 += fstride;
michael@0 73 C_SUB( *Fout2 , *Fout , t );
michael@0 74 C_ADDTO( *Fout , t );
michael@0 75 ++Fout2;
michael@0 76 ++Fout;
michael@0 77 }
michael@0 78 }
michael@0 79 }
michael@0 80
michael@0 81 static void ki_bfly2(
michael@0 82 kiss_fft_cpx * Fout,
michael@0 83 const size_t fstride,
michael@0 84 const kiss_fft_state *st,
michael@0 85 int m,
michael@0 86 int N,
michael@0 87 int mm
michael@0 88 )
michael@0 89 {
michael@0 90 kiss_fft_cpx * Fout2;
michael@0 91 const kiss_twiddle_cpx * tw1;
michael@0 92 kiss_fft_cpx t;
michael@0 93 int i,j;
michael@0 94 kiss_fft_cpx * Fout_beg = Fout;
michael@0 95 for (i=0;i<N;i++)
michael@0 96 {
michael@0 97 Fout = Fout_beg + i*mm;
michael@0 98 Fout2 = Fout + m;
michael@0 99 tw1 = st->twiddles;
michael@0 100 for(j=0;j<m;j++)
michael@0 101 {
michael@0 102 C_MULC (t, *Fout2 , *tw1);
michael@0 103 tw1 += fstride;
michael@0 104 C_SUB( *Fout2 , *Fout , t );
michael@0 105 C_ADDTO( *Fout , t );
michael@0 106 ++Fout2;
michael@0 107 ++Fout;
michael@0 108 }
michael@0 109 }
michael@0 110 }
michael@0 111
michael@0 112 static void kf_bfly4(
michael@0 113 kiss_fft_cpx * Fout,
michael@0 114 const size_t fstride,
michael@0 115 const kiss_fft_state *st,
michael@0 116 int m,
michael@0 117 int N,
michael@0 118 int mm
michael@0 119 )
michael@0 120 {
michael@0 121 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
michael@0 122 kiss_fft_cpx scratch[6];
michael@0 123 const size_t m2=2*m;
michael@0 124 const size_t m3=3*m;
michael@0 125 int i, j;
michael@0 126
michael@0 127 kiss_fft_cpx * Fout_beg = Fout;
michael@0 128 for (i=0;i<N;i++)
michael@0 129 {
michael@0 130 Fout = Fout_beg + i*mm;
michael@0 131 tw3 = tw2 = tw1 = st->twiddles;
michael@0 132 for (j=0;j<m;j++)
michael@0 133 {
michael@0 134 C_MUL4(scratch[0],Fout[m] , *tw1 );
michael@0 135 C_MUL4(scratch[1],Fout[m2] , *tw2 );
michael@0 136 C_MUL4(scratch[2],Fout[m3] , *tw3 );
michael@0 137
michael@0 138 Fout->r = PSHR32(Fout->r, 2);
michael@0 139 Fout->i = PSHR32(Fout->i, 2);
michael@0 140 C_SUB( scratch[5] , *Fout, scratch[1] );
michael@0 141 C_ADDTO(*Fout, scratch[1]);
michael@0 142 C_ADD( scratch[3] , scratch[0] , scratch[2] );
michael@0 143 C_SUB( scratch[4] , scratch[0] , scratch[2] );
michael@0 144 C_SUB( Fout[m2], *Fout, scratch[3] );
michael@0 145 tw1 += fstride;
michael@0 146 tw2 += fstride*2;
michael@0 147 tw3 += fstride*3;
michael@0 148 C_ADDTO( *Fout , scratch[3] );
michael@0 149
michael@0 150 Fout[m].r = scratch[5].r + scratch[4].i;
michael@0 151 Fout[m].i = scratch[5].i - scratch[4].r;
michael@0 152 Fout[m3].r = scratch[5].r - scratch[4].i;
michael@0 153 Fout[m3].i = scratch[5].i + scratch[4].r;
michael@0 154 ++Fout;
michael@0 155 }
michael@0 156 }
michael@0 157 }
michael@0 158
michael@0 159 static void ki_bfly4(
michael@0 160 kiss_fft_cpx * Fout,
michael@0 161 const size_t fstride,
michael@0 162 const kiss_fft_state *st,
michael@0 163 int m,
michael@0 164 int N,
michael@0 165 int mm
michael@0 166 )
michael@0 167 {
michael@0 168 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
michael@0 169 kiss_fft_cpx scratch[6];
michael@0 170 const size_t m2=2*m;
michael@0 171 const size_t m3=3*m;
michael@0 172 int i, j;
michael@0 173
michael@0 174 kiss_fft_cpx * Fout_beg = Fout;
michael@0 175 for (i=0;i<N;i++)
michael@0 176 {
michael@0 177 Fout = Fout_beg + i*mm;
michael@0 178 tw3 = tw2 = tw1 = st->twiddles;
michael@0 179 for (j=0;j<m;j++)
michael@0 180 {
michael@0 181 C_MULC(scratch[0],Fout[m] , *tw1 );
michael@0 182 C_MULC(scratch[1],Fout[m2] , *tw2 );
michael@0 183 C_MULC(scratch[2],Fout[m3] , *tw3 );
michael@0 184
michael@0 185 C_SUB( scratch[5] , *Fout, scratch[1] );
michael@0 186 C_ADDTO(*Fout, scratch[1]);
michael@0 187 C_ADD( scratch[3] , scratch[0] , scratch[2] );
michael@0 188 C_SUB( scratch[4] , scratch[0] , scratch[2] );
michael@0 189 C_SUB( Fout[m2], *Fout, scratch[3] );
michael@0 190 tw1 += fstride;
michael@0 191 tw2 += fstride*2;
michael@0 192 tw3 += fstride*3;
michael@0 193 C_ADDTO( *Fout , scratch[3] );
michael@0 194
michael@0 195 Fout[m].r = scratch[5].r - scratch[4].i;
michael@0 196 Fout[m].i = scratch[5].i + scratch[4].r;
michael@0 197 Fout[m3].r = scratch[5].r + scratch[4].i;
michael@0 198 Fout[m3].i = scratch[5].i - scratch[4].r;
michael@0 199 ++Fout;
michael@0 200 }
michael@0 201 }
michael@0 202 }
michael@0 203
michael@0 204 #ifndef RADIX_TWO_ONLY
michael@0 205
michael@0 206 static void kf_bfly3(
michael@0 207 kiss_fft_cpx * Fout,
michael@0 208 const size_t fstride,
michael@0 209 const kiss_fft_state *st,
michael@0 210 int m,
michael@0 211 int N,
michael@0 212 int mm
michael@0 213 )
michael@0 214 {
michael@0 215 int i;
michael@0 216 size_t k;
michael@0 217 const size_t m2 = 2*m;
michael@0 218 const kiss_twiddle_cpx *tw1,*tw2;
michael@0 219 kiss_fft_cpx scratch[5];
michael@0 220 kiss_twiddle_cpx epi3;
michael@0 221
michael@0 222 kiss_fft_cpx * Fout_beg = Fout;
michael@0 223 epi3 = st->twiddles[fstride*m];
michael@0 224 for (i=0;i<N;i++)
michael@0 225 {
michael@0 226 Fout = Fout_beg + i*mm;
michael@0 227 tw1=tw2=st->twiddles;
michael@0 228 k=m;
michael@0 229 do {
michael@0 230 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
michael@0 231
michael@0 232 C_MUL(scratch[1],Fout[m] , *tw1);
michael@0 233 C_MUL(scratch[2],Fout[m2] , *tw2);
michael@0 234
michael@0 235 C_ADD(scratch[3],scratch[1],scratch[2]);
michael@0 236 C_SUB(scratch[0],scratch[1],scratch[2]);
michael@0 237 tw1 += fstride;
michael@0 238 tw2 += fstride*2;
michael@0 239
michael@0 240 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
michael@0 241 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
michael@0 242
michael@0 243 C_MULBYSCALAR( scratch[0] , epi3.i );
michael@0 244
michael@0 245 C_ADDTO(*Fout,scratch[3]);
michael@0 246
michael@0 247 Fout[m2].r = Fout[m].r + scratch[0].i;
michael@0 248 Fout[m2].i = Fout[m].i - scratch[0].r;
michael@0 249
michael@0 250 Fout[m].r -= scratch[0].i;
michael@0 251 Fout[m].i += scratch[0].r;
michael@0 252
michael@0 253 ++Fout;
michael@0 254 } while(--k);
michael@0 255 }
michael@0 256 }
michael@0 257
michael@0 258 static void ki_bfly3(
michael@0 259 kiss_fft_cpx * Fout,
michael@0 260 const size_t fstride,
michael@0 261 const kiss_fft_state *st,
michael@0 262 int m,
michael@0 263 int N,
michael@0 264 int mm
michael@0 265 )
michael@0 266 {
michael@0 267 int i, k;
michael@0 268 const size_t m2 = 2*m;
michael@0 269 const kiss_twiddle_cpx *tw1,*tw2;
michael@0 270 kiss_fft_cpx scratch[5];
michael@0 271 kiss_twiddle_cpx epi3;
michael@0 272
michael@0 273 kiss_fft_cpx * Fout_beg = Fout;
michael@0 274 epi3 = st->twiddles[fstride*m];
michael@0 275 for (i=0;i<N;i++)
michael@0 276 {
michael@0 277 Fout = Fout_beg + i*mm;
michael@0 278 tw1=tw2=st->twiddles;
michael@0 279 k=m;
michael@0 280 do{
michael@0 281
michael@0 282 C_MULC(scratch[1],Fout[m] , *tw1);
michael@0 283 C_MULC(scratch[2],Fout[m2] , *tw2);
michael@0 284
michael@0 285 C_ADD(scratch[3],scratch[1],scratch[2]);
michael@0 286 C_SUB(scratch[0],scratch[1],scratch[2]);
michael@0 287 tw1 += fstride;
michael@0 288 tw2 += fstride*2;
michael@0 289
michael@0 290 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
michael@0 291 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
michael@0 292
michael@0 293 C_MULBYSCALAR( scratch[0] , -epi3.i );
michael@0 294
michael@0 295 C_ADDTO(*Fout,scratch[3]);
michael@0 296
michael@0 297 Fout[m2].r = Fout[m].r + scratch[0].i;
michael@0 298 Fout[m2].i = Fout[m].i - scratch[0].r;
michael@0 299
michael@0 300 Fout[m].r -= scratch[0].i;
michael@0 301 Fout[m].i += scratch[0].r;
michael@0 302
michael@0 303 ++Fout;
michael@0 304 }while(--k);
michael@0 305 }
michael@0 306 }
michael@0 307
michael@0 308 static void kf_bfly5(
michael@0 309 kiss_fft_cpx * Fout,
michael@0 310 const size_t fstride,
michael@0 311 const kiss_fft_state *st,
michael@0 312 int m,
michael@0 313 int N,
michael@0 314 int mm
michael@0 315 )
michael@0 316 {
michael@0 317 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
michael@0 318 int i, u;
michael@0 319 kiss_fft_cpx scratch[13];
michael@0 320 const kiss_twiddle_cpx * twiddles = st->twiddles;
michael@0 321 const kiss_twiddle_cpx *tw;
michael@0 322 kiss_twiddle_cpx ya,yb;
michael@0 323 kiss_fft_cpx * Fout_beg = Fout;
michael@0 324
michael@0 325 ya = twiddles[fstride*m];
michael@0 326 yb = twiddles[fstride*2*m];
michael@0 327 tw=st->twiddles;
michael@0 328
michael@0 329 for (i=0;i<N;i++)
michael@0 330 {
michael@0 331 Fout = Fout_beg + i*mm;
michael@0 332 Fout0=Fout;
michael@0 333 Fout1=Fout0+m;
michael@0 334 Fout2=Fout0+2*m;
michael@0 335 Fout3=Fout0+3*m;
michael@0 336 Fout4=Fout0+4*m;
michael@0 337
michael@0 338 for ( u=0; u<m; ++u ) {
michael@0 339 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
michael@0 340 scratch[0] = *Fout0;
michael@0 341
michael@0 342 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
michael@0 343 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
michael@0 344 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
michael@0 345 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
michael@0 346
michael@0 347 C_ADD( scratch[7],scratch[1],scratch[4]);
michael@0 348 C_SUB( scratch[10],scratch[1],scratch[4]);
michael@0 349 C_ADD( scratch[8],scratch[2],scratch[3]);
michael@0 350 C_SUB( scratch[9],scratch[2],scratch[3]);
michael@0 351
michael@0 352 Fout0->r += scratch[7].r + scratch[8].r;
michael@0 353 Fout0->i += scratch[7].i + scratch[8].i;
michael@0 354
michael@0 355 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
michael@0 356 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
michael@0 357
michael@0 358 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
michael@0 359 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
michael@0 360
michael@0 361 C_SUB(*Fout1,scratch[5],scratch[6]);
michael@0 362 C_ADD(*Fout4,scratch[5],scratch[6]);
michael@0 363
michael@0 364 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
michael@0 365 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
michael@0 366 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
michael@0 367 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
michael@0 368
michael@0 369 C_ADD(*Fout2,scratch[11],scratch[12]);
michael@0 370 C_SUB(*Fout3,scratch[11],scratch[12]);
michael@0 371
michael@0 372 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
michael@0 373 }
michael@0 374 }
michael@0 375 }
michael@0 376
michael@0 377 static void ki_bfly5(
michael@0 378 kiss_fft_cpx * Fout,
michael@0 379 const size_t fstride,
michael@0 380 const kiss_fft_state *st,
michael@0 381 int m,
michael@0 382 int N,
michael@0 383 int mm
michael@0 384 )
michael@0 385 {
michael@0 386 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
michael@0 387 int i, u;
michael@0 388 kiss_fft_cpx scratch[13];
michael@0 389 const kiss_twiddle_cpx * twiddles = st->twiddles;
michael@0 390 const kiss_twiddle_cpx *tw;
michael@0 391 kiss_twiddle_cpx ya,yb;
michael@0 392 kiss_fft_cpx * Fout_beg = Fout;
michael@0 393
michael@0 394 ya = twiddles[fstride*m];
michael@0 395 yb = twiddles[fstride*2*m];
michael@0 396 tw=st->twiddles;
michael@0 397
michael@0 398 for (i=0;i<N;i++)
michael@0 399 {
michael@0 400 Fout = Fout_beg + i*mm;
michael@0 401 Fout0=Fout;
michael@0 402 Fout1=Fout0+m;
michael@0 403 Fout2=Fout0+2*m;
michael@0 404 Fout3=Fout0+3*m;
michael@0 405 Fout4=Fout0+4*m;
michael@0 406
michael@0 407 for ( u=0; u<m; ++u ) {
michael@0 408 scratch[0] = *Fout0;
michael@0 409
michael@0 410 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
michael@0 411 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
michael@0 412 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
michael@0 413 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
michael@0 414
michael@0 415 C_ADD( scratch[7],scratch[1],scratch[4]);
michael@0 416 C_SUB( scratch[10],scratch[1],scratch[4]);
michael@0 417 C_ADD( scratch[8],scratch[2],scratch[3]);
michael@0 418 C_SUB( scratch[9],scratch[2],scratch[3]);
michael@0 419
michael@0 420 Fout0->r += scratch[7].r + scratch[8].r;
michael@0 421 Fout0->i += scratch[7].i + scratch[8].i;
michael@0 422
michael@0 423 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
michael@0 424 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
michael@0 425
michael@0 426 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
michael@0 427 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
michael@0 428
michael@0 429 C_SUB(*Fout1,scratch[5],scratch[6]);
michael@0 430 C_ADD(*Fout4,scratch[5],scratch[6]);
michael@0 431
michael@0 432 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
michael@0 433 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
michael@0 434 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
michael@0 435 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
michael@0 436
michael@0 437 C_ADD(*Fout2,scratch[11],scratch[12]);
michael@0 438 C_SUB(*Fout3,scratch[11],scratch[12]);
michael@0 439
michael@0 440 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
michael@0 441 }
michael@0 442 }
michael@0 443 }
michael@0 444
michael@0 445 #endif
michael@0 446
michael@0 447
michael@0 448 #ifdef CUSTOM_MODES
michael@0 449
michael@0 450 static
michael@0 451 void compute_bitrev_table(
michael@0 452 int Fout,
michael@0 453 opus_int16 *f,
michael@0 454 const size_t fstride,
michael@0 455 int in_stride,
michael@0 456 opus_int16 * factors,
michael@0 457 const kiss_fft_state *st
michael@0 458 )
michael@0 459 {
michael@0 460 const int p=*factors++; /* the radix */
michael@0 461 const int m=*factors++; /* stage's fft length/p */
michael@0 462
michael@0 463 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
michael@0 464 if (m==1)
michael@0 465 {
michael@0 466 int j;
michael@0 467 for (j=0;j<p;j++)
michael@0 468 {
michael@0 469 *f = Fout+j;
michael@0 470 f += fstride*in_stride;
michael@0 471 }
michael@0 472 } else {
michael@0 473 int j;
michael@0 474 for (j=0;j<p;j++)
michael@0 475 {
michael@0 476 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
michael@0 477 f += fstride*in_stride;
michael@0 478 Fout += m;
michael@0 479 }
michael@0 480 }
michael@0 481 }
michael@0 482
michael@0 483 /* facbuf is populated by p1,m1,p2,m2, ...
michael@0 484 where
michael@0 485 p[i] * m[i] = m[i-1]
michael@0 486 m0 = n */
michael@0 487 static
michael@0 488 int kf_factor(int n,opus_int16 * facbuf)
michael@0 489 {
michael@0 490 int p=4;
michael@0 491
michael@0 492 /*factor out powers of 4, powers of 2, then any remaining primes */
michael@0 493 do {
michael@0 494 while (n % p) {
michael@0 495 switch (p) {
michael@0 496 case 4: p = 2; break;
michael@0 497 case 2: p = 3; break;
michael@0 498 default: p += 2; break;
michael@0 499 }
michael@0 500 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
michael@0 501 p = n; /* no more factors, skip to end */
michael@0 502 }
michael@0 503 n /= p;
michael@0 504 #ifdef RADIX_TWO_ONLY
michael@0 505 if (p!=2 && p != 4)
michael@0 506 #else
michael@0 507 if (p>5)
michael@0 508 #endif
michael@0 509 {
michael@0 510 return 0;
michael@0 511 }
michael@0 512 *facbuf++ = p;
michael@0 513 *facbuf++ = n;
michael@0 514 } while (n > 1);
michael@0 515 return 1;
michael@0 516 }
michael@0 517
michael@0 518 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
michael@0 519 {
michael@0 520 int i;
michael@0 521 #ifdef FIXED_POINT
michael@0 522 for (i=0;i<nfft;++i) {
michael@0 523 opus_val32 phase = -i;
michael@0 524 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
michael@0 525 }
michael@0 526 #else
michael@0 527 for (i=0;i<nfft;++i) {
michael@0 528 const double pi=3.14159265358979323846264338327;
michael@0 529 double phase = ( -2*pi /nfft ) * i;
michael@0 530 kf_cexp(twiddles+i, phase );
michael@0 531 }
michael@0 532 #endif
michael@0 533 }
michael@0 534
michael@0 535 /*
michael@0 536 *
michael@0 537 * Allocates all necessary storage space for the fft and ifft.
michael@0 538 * The return value is a contiguous block of memory. As such,
michael@0 539 * It can be freed with free().
michael@0 540 * */
michael@0 541 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
michael@0 542 {
michael@0 543 kiss_fft_state *st=NULL;
michael@0 544 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
michael@0 545
michael@0 546 if ( lenmem==NULL ) {
michael@0 547 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
michael@0 548 }else{
michael@0 549 if (mem != NULL && *lenmem >= memneeded)
michael@0 550 st = (kiss_fft_state*)mem;
michael@0 551 *lenmem = memneeded;
michael@0 552 }
michael@0 553 if (st) {
michael@0 554 opus_int16 *bitrev;
michael@0 555 kiss_twiddle_cpx *twiddles;
michael@0 556
michael@0 557 st->nfft=nfft;
michael@0 558 #ifndef FIXED_POINT
michael@0 559 st->scale = 1.f/nfft;
michael@0 560 #endif
michael@0 561 if (base != NULL)
michael@0 562 {
michael@0 563 st->twiddles = base->twiddles;
michael@0 564 st->shift = 0;
michael@0 565 while (nfft<<st->shift != base->nfft && st->shift < 32)
michael@0 566 st->shift++;
michael@0 567 if (st->shift>=32)
michael@0 568 goto fail;
michael@0 569 } else {
michael@0 570 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
michael@0 571 compute_twiddles(twiddles, nfft);
michael@0 572 st->shift = -1;
michael@0 573 }
michael@0 574 if (!kf_factor(nfft,st->factors))
michael@0 575 {
michael@0 576 goto fail;
michael@0 577 }
michael@0 578
michael@0 579 /* bitrev */
michael@0 580 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
michael@0 581 if (st->bitrev==NULL)
michael@0 582 goto fail;
michael@0 583 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
michael@0 584 }
michael@0 585 return st;
michael@0 586 fail:
michael@0 587 opus_fft_free(st);
michael@0 588 return NULL;
michael@0 589 }
michael@0 590
michael@0 591 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
michael@0 592 {
michael@0 593 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
michael@0 594 }
michael@0 595
michael@0 596 void opus_fft_free(const kiss_fft_state *cfg)
michael@0 597 {
michael@0 598 if (cfg)
michael@0 599 {
michael@0 600 opus_free((opus_int16*)cfg->bitrev);
michael@0 601 if (cfg->shift < 0)
michael@0 602 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
michael@0 603 opus_free((kiss_fft_state*)cfg);
michael@0 604 }
michael@0 605 }
michael@0 606
michael@0 607 #endif /* CUSTOM_MODES */
michael@0 608
michael@0 609 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
michael@0 610 {
michael@0 611 int m2, m;
michael@0 612 int p;
michael@0 613 int L;
michael@0 614 int fstride[MAXFACTORS];
michael@0 615 int i;
michael@0 616 int shift;
michael@0 617
michael@0 618 /* st->shift can be -1 */
michael@0 619 shift = st->shift>0 ? st->shift : 0;
michael@0 620
michael@0 621 celt_assert2 (fin != fout, "In-place FFT not supported");
michael@0 622 /* Bit-reverse the input */
michael@0 623 for (i=0;i<st->nfft;i++)
michael@0 624 {
michael@0 625 fout[st->bitrev[i]] = fin[i];
michael@0 626 #ifndef FIXED_POINT
michael@0 627 fout[st->bitrev[i]].r *= st->scale;
michael@0 628 fout[st->bitrev[i]].i *= st->scale;
michael@0 629 #endif
michael@0 630 }
michael@0 631
michael@0 632 fstride[0] = 1;
michael@0 633 L=0;
michael@0 634 do {
michael@0 635 p = st->factors[2*L];
michael@0 636 m = st->factors[2*L+1];
michael@0 637 fstride[L+1] = fstride[L]*p;
michael@0 638 L++;
michael@0 639 } while(m!=1);
michael@0 640 m = st->factors[2*L-1];
michael@0 641 for (i=L-1;i>=0;i--)
michael@0 642 {
michael@0 643 if (i!=0)
michael@0 644 m2 = st->factors[2*i-1];
michael@0 645 else
michael@0 646 m2 = 1;
michael@0 647 switch (st->factors[2*i])
michael@0 648 {
michael@0 649 case 2:
michael@0 650 kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
michael@0 651 break;
michael@0 652 case 4:
michael@0 653 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
michael@0 654 break;
michael@0 655 #ifndef RADIX_TWO_ONLY
michael@0 656 case 3:
michael@0 657 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
michael@0 658 break;
michael@0 659 case 5:
michael@0 660 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
michael@0 661 break;
michael@0 662 #endif
michael@0 663 }
michael@0 664 m = m2;
michael@0 665 }
michael@0 666 }
michael@0 667
michael@0 668 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
michael@0 669 {
michael@0 670 int m2, m;
michael@0 671 int p;
michael@0 672 int L;
michael@0 673 int fstride[MAXFACTORS];
michael@0 674 int i;
michael@0 675 int shift;
michael@0 676
michael@0 677 /* st->shift can be -1 */
michael@0 678 shift = st->shift>0 ? st->shift : 0;
michael@0 679 celt_assert2 (fin != fout, "In-place FFT not supported");
michael@0 680 /* Bit-reverse the input */
michael@0 681 for (i=0;i<st->nfft;i++)
michael@0 682 fout[st->bitrev[i]] = fin[i];
michael@0 683
michael@0 684 fstride[0] = 1;
michael@0 685 L=0;
michael@0 686 do {
michael@0 687 p = st->factors[2*L];
michael@0 688 m = st->factors[2*L+1];
michael@0 689 fstride[L+1] = fstride[L]*p;
michael@0 690 L++;
michael@0 691 } while(m!=1);
michael@0 692 m = st->factors[2*L-1];
michael@0 693 for (i=L-1;i>=0;i--)
michael@0 694 {
michael@0 695 if (i!=0)
michael@0 696 m2 = st->factors[2*i-1];
michael@0 697 else
michael@0 698 m2 = 1;
michael@0 699 switch (st->factors[2*i])
michael@0 700 {
michael@0 701 case 2:
michael@0 702 ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
michael@0 703 break;
michael@0 704 case 4:
michael@0 705 ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
michael@0 706 break;
michael@0 707 #ifndef RADIX_TWO_ONLY
michael@0 708 case 3:
michael@0 709 ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
michael@0 710 break;
michael@0 711 case 5:
michael@0 712 ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
michael@0 713 break;
michael@0 714 #endif
michael@0 715 }
michael@0 716 m = m2;
michael@0 717 }
michael@0 718 }
michael@0 719

mercurial