media/kiss_fft/kiss_fft.c

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /*
     2 Copyright (c) 2003-2010, Mark Borgerding
     4 All rights reserved.
     6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
     8     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
     9     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
    10     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
    12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    13 */
    16 #include "_kiss_fft_guts.h"
    17 /* The guts header contains all the multiplication and addition macros that are defined for
    18  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
    19  */
    21 static void kf_bfly2(
    22         kiss_fft_cpx * Fout,
    23         const size_t fstride,
    24         const kiss_fft_cfg st,
    25         int m
    26         )
    27 {
    28     kiss_fft_cpx * Fout2;
    29     kiss_fft_cpx * tw1 = st->twiddles;
    30     kiss_fft_cpx t;
    31     Fout2 = Fout + m;
    32     do{
    33         C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
    35         C_MUL (t,  *Fout2 , *tw1);
    36         tw1 += fstride;
    37         C_SUB( *Fout2 ,  *Fout , t );
    38         C_ADDTO( *Fout ,  t );
    39         ++Fout2;
    40         ++Fout;
    41     }while (--m);
    42 }
    44 static void kf_bfly4(
    45         kiss_fft_cpx * Fout,
    46         const size_t fstride,
    47         const kiss_fft_cfg st,
    48         const size_t m
    49         )
    50 {
    51     kiss_fft_cpx *tw1,*tw2,*tw3;
    52     kiss_fft_cpx scratch[6];
    53     size_t k=m;
    54     const size_t m2=2*m;
    55     const size_t m3=3*m;
    58     tw3 = tw2 = tw1 = st->twiddles;
    60     do {
    61         C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
    63         C_MUL(scratch[0],Fout[m] , *tw1 );
    64         C_MUL(scratch[1],Fout[m2] , *tw2 );
    65         C_MUL(scratch[2],Fout[m3] , *tw3 );
    67         C_SUB( scratch[5] , *Fout, scratch[1] );
    68         C_ADDTO(*Fout, scratch[1]);
    69         C_ADD( scratch[3] , scratch[0] , scratch[2] );
    70         C_SUB( scratch[4] , scratch[0] , scratch[2] );
    71         C_SUB( Fout[m2], *Fout, scratch[3] );
    72         tw1 += fstride;
    73         tw2 += fstride*2;
    74         tw3 += fstride*3;
    75         C_ADDTO( *Fout , scratch[3] );
    77         if(st->inverse) {
    78             Fout[m].r = scratch[5].r - scratch[4].i;
    79             Fout[m].i = scratch[5].i + scratch[4].r;
    80             Fout[m3].r = scratch[5].r + scratch[4].i;
    81             Fout[m3].i = scratch[5].i - scratch[4].r;
    82         }else{
    83             Fout[m].r = scratch[5].r + scratch[4].i;
    84             Fout[m].i = scratch[5].i - scratch[4].r;
    85             Fout[m3].r = scratch[5].r - scratch[4].i;
    86             Fout[m3].i = scratch[5].i + scratch[4].r;
    87         }
    88         ++Fout;
    89     }while(--k);
    90 }
    92 static void kf_bfly3(
    93          kiss_fft_cpx * Fout,
    94          const size_t fstride,
    95          const kiss_fft_cfg st,
    96          size_t m
    97          )
    98 {
    99      size_t k=m;
   100      const size_t m2 = 2*m;
   101      kiss_fft_cpx *tw1,*tw2;
   102      kiss_fft_cpx scratch[5];
   103      kiss_fft_cpx epi3;
   104      epi3 = st->twiddles[fstride*m];
   106      tw1=tw2=st->twiddles;
   108      do{
   109          C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
   111          C_MUL(scratch[1],Fout[m] , *tw1);
   112          C_MUL(scratch[2],Fout[m2] , *tw2);
   114          C_ADD(scratch[3],scratch[1],scratch[2]);
   115          C_SUB(scratch[0],scratch[1],scratch[2]);
   116          tw1 += fstride;
   117          tw2 += fstride*2;
   119          Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
   120          Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
   122          C_MULBYSCALAR( scratch[0] , epi3.i );
   124          C_ADDTO(*Fout,scratch[3]);
   126          Fout[m2].r = Fout[m].r + scratch[0].i;
   127          Fout[m2].i = Fout[m].i - scratch[0].r;
   129          Fout[m].r -= scratch[0].i;
   130          Fout[m].i += scratch[0].r;
   132          ++Fout;
   133      }while(--k);
   134 }
   136 static void kf_bfly5(
   137         kiss_fft_cpx * Fout,
   138         const size_t fstride,
   139         const kiss_fft_cfg st,
   140         int m
   141         )
   142 {
   143     kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
   144     int u;
   145     kiss_fft_cpx scratch[13];
   146     kiss_fft_cpx * twiddles = st->twiddles;
   147     kiss_fft_cpx *tw;
   148     kiss_fft_cpx ya,yb;
   149     ya = twiddles[fstride*m];
   150     yb = twiddles[fstride*2*m];
   152     Fout0=Fout;
   153     Fout1=Fout0+m;
   154     Fout2=Fout0+2*m;
   155     Fout3=Fout0+3*m;
   156     Fout4=Fout0+4*m;
   158     tw=st->twiddles;
   159     for ( u=0; u<m; ++u ) {
   160         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
   161         scratch[0] = *Fout0;
   163         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
   164         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
   165         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
   166         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
   168         C_ADD( scratch[7],scratch[1],scratch[4]);
   169         C_SUB( scratch[10],scratch[1],scratch[4]);
   170         C_ADD( scratch[8],scratch[2],scratch[3]);
   171         C_SUB( scratch[9],scratch[2],scratch[3]);
   173         Fout0->r += scratch[7].r + scratch[8].r;
   174         Fout0->i += scratch[7].i + scratch[8].i;
   176         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
   177         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
   179         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
   180         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
   182         C_SUB(*Fout1,scratch[5],scratch[6]);
   183         C_ADD(*Fout4,scratch[5],scratch[6]);
   185         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
   186         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
   187         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
   188         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
   190         C_ADD(*Fout2,scratch[11],scratch[12]);
   191         C_SUB(*Fout3,scratch[11],scratch[12]);
   193         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
   194     }
   195 }
   197 /* perform the butterfly for one stage of a mixed radix FFT */
   198 static void kf_bfly_generic(
   199         kiss_fft_cpx * Fout,
   200         const size_t fstride,
   201         const kiss_fft_cfg st,
   202         int m,
   203         int p
   204         )
   205 {
   206     int u,k,q1,q;
   207     kiss_fft_cpx * twiddles = st->twiddles;
   208     kiss_fft_cpx t;
   209     int Norig = st->nfft;
   211     kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
   213     for ( u=0; u<m; ++u ) {
   214         k=u;
   215         for ( q1=0 ; q1<p ; ++q1 ) {
   216             scratch[q1] = Fout[ k  ];
   217             C_FIXDIV(scratch[q1],p);
   218             k += m;
   219         }
   221         k=u;
   222         for ( q1=0 ; q1<p ; ++q1 ) {
   223             int twidx=0;
   224             Fout[ k ] = scratch[0];
   225             for (q=1;q<p;++q ) {
   226                 twidx += fstride * k;
   227                 if (twidx>=Norig) twidx-=Norig;
   228                 C_MUL(t,scratch[q] , twiddles[twidx] );
   229                 C_ADDTO( Fout[ k ] ,t);
   230             }
   231             k += m;
   232         }
   233     }
   234     KISS_FFT_TMP_FREE(scratch);
   235 }
   237 static
   238 void kf_work(
   239         kiss_fft_cpx * Fout,
   240         const kiss_fft_cpx * f,
   241         const size_t fstride,
   242         int in_stride,
   243         int * factors,
   244         const kiss_fft_cfg st
   245         )
   246 {
   247     kiss_fft_cpx * Fout_beg=Fout;
   248     const int p=*factors++; /* the radix  */
   249     const int m=*factors++; /* stage's fft length/p */
   250     const kiss_fft_cpx * Fout_end = Fout + p*m;
   252 #ifdef _OPENMP
   253     // use openmp extensions at the 
   254     // top-level (not recursive)
   255     if (fstride==1 && p<=5)
   256     {
   257         int k;
   259         // execute the p different work units in different threads
   260 #       pragma omp parallel for
   261         for (k=0;k<p;++k) 
   262             kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
   263         // all threads have joined by this point
   265         switch (p) {
   266             case 2: kf_bfly2(Fout,fstride,st,m); break;
   267             case 3: kf_bfly3(Fout,fstride,st,m); break; 
   268             case 4: kf_bfly4(Fout,fstride,st,m); break;
   269             case 5: kf_bfly5(Fout,fstride,st,m); break; 
   270             default: kf_bfly_generic(Fout,fstride,st,m,p); break;
   271         }
   272         return;
   273     }
   274 #endif
   276     if (m==1) {
   277         do{
   278             *Fout = *f;
   279             f += fstride*in_stride;
   280         }while(++Fout != Fout_end );
   281     }else{
   282         do{
   283             // recursive call:
   284             // DFT of size m*p performed by doing
   285             // p instances of smaller DFTs of size m, 
   286             // each one takes a decimated version of the input
   287             kf_work( Fout , f, fstride*p, in_stride, factors,st);
   288             f += fstride*in_stride;
   289         }while( (Fout += m) != Fout_end );
   290     }
   292     Fout=Fout_beg;
   294     // recombine the p smaller DFTs 
   295     switch (p) {
   296         case 2: kf_bfly2(Fout,fstride,st,m); break;
   297         case 3: kf_bfly3(Fout,fstride,st,m); break; 
   298         case 4: kf_bfly4(Fout,fstride,st,m); break;
   299         case 5: kf_bfly5(Fout,fstride,st,m); break; 
   300         default: kf_bfly_generic(Fout,fstride,st,m,p); break;
   301     }
   302 }
   304 /*  facbuf is populated by p1,m1,p2,m2, ...
   305     where 
   306     p[i] * m[i] = m[i-1]
   307     m0 = n                  */
   308 static 
   309 void kf_factor(int n,int * facbuf)
   310 {
   311     int p=4;
   312     double floor_sqrt;
   313     floor_sqrt = floor( sqrt((double)n) );
   315     /*factor out powers of 4, powers of 2, then any remaining primes */
   316     do {
   317         while (n % p) {
   318             switch (p) {
   319                 case 4: p = 2; break;
   320                 case 2: p = 3; break;
   321                 default: p += 2; break;
   322             }
   323             if (p > floor_sqrt)
   324                 p = n;          /* no more factors, skip to end */
   325         }
   326         n /= p;
   327         *facbuf++ = p;
   328         *facbuf++ = n;
   329     } while (n > 1);
   330 }
   332 /*
   333  *
   334  * User-callable function to allocate all necessary storage space for the fft.
   335  *
   336  * The return value is a contiguous block of memory, allocated with malloc.  As such,
   337  * It can be freed with free(), rather than a kiss_fft-specific function.
   338  * */
   339 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
   340 {
   341     kiss_fft_cfg st=NULL;
   342     size_t memneeded = sizeof(struct kiss_fft_state)
   343         + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
   345     if ( lenmem==NULL ) {
   346         st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
   347     }else{
   348         if (mem != NULL && *lenmem >= memneeded)
   349             st = (kiss_fft_cfg)mem;
   350         *lenmem = memneeded;
   351     }
   352     if (st) {
   353         int i;
   354         st->nfft=nfft;
   355         st->inverse = inverse_fft;
   357         for (i=0;i<nfft;++i) {
   358             const double pi=3.141592653589793238462643383279502884197169399375105820974944;
   359             double phase = -2*pi*i / nfft;
   360             if (st->inverse)
   361                 phase *= -1;
   362             kf_cexp(st->twiddles+i, phase );
   363         }
   365         kf_factor(nfft,st->factors);
   366     }
   367     return st;
   368 }
   371 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
   372 {
   373     if (fin == fout) {
   374         //NOTE: this is not really an in-place FFT algorithm.
   375         //It just performs an out-of-place FFT into a temp buffer
   376         kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
   377         kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
   378         memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
   379         KISS_FFT_TMP_FREE(tmpbuf);
   380     }else{
   381         kf_work( fout, fin, 1,in_stride, st->factors,st );
   382     }
   383 }
   385 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
   386 {
   387     kiss_fft_stride(cfg,fin,fout,1);
   388 }
   391 void kiss_fft_cleanup(void)
   392 {
   393     // nothing needed any more
   394 }
   396 int kiss_fft_next_fast_size(int n)
   397 {
   398     while(1) {
   399         int m=n;
   400         while ( (m%2) == 0 ) m/=2;
   401         while ( (m%3) == 0 ) m/=3;
   402         while ( (m%5) == 0 ) m/=5;
   403         if (m<=1)
   404             break; /* n is completely factorable by twos, threes, and fives */
   405         n++;
   406     }
   407     return n;
   408 }

mercurial