media/libopus/celt/mathops.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libopus/celt/mathops.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,208 @@
     1.4 +/* Copyright (c) 2002-2008 Jean-Marc Valin
     1.5 +   Copyright (c) 2007-2008 CSIRO
     1.6 +   Copyright (c) 2007-2009 Xiph.Org Foundation
     1.7 +   Written by Jean-Marc Valin */
     1.8 +/**
     1.9 +   @file mathops.h
    1.10 +   @brief Various math functions
    1.11 +*/
    1.12 +/*
    1.13 +   Redistribution and use in source and binary forms, with or without
    1.14 +   modification, are permitted provided that the following conditions
    1.15 +   are met:
    1.16 +
    1.17 +   - Redistributions of source code must retain the above copyright
    1.18 +   notice, this list of conditions and the following disclaimer.
    1.19 +
    1.20 +   - Redistributions in binary form must reproduce the above copyright
    1.21 +   notice, this list of conditions and the following disclaimer in the
    1.22 +   documentation and/or other materials provided with the distribution.
    1.23 +
    1.24 +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    1.25 +   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    1.26 +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    1.27 +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
    1.28 +   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    1.29 +   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    1.30 +   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    1.31 +   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    1.32 +   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    1.33 +   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    1.34 +   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    1.35 +*/
    1.36 +
    1.37 +#ifdef HAVE_CONFIG_H
    1.38 +#include "config.h"
    1.39 +#endif
    1.40 +
    1.41 +#include "mathops.h"
    1.42 +
    1.43 +/*Compute floor(sqrt(_val)) with exact arithmetic.
    1.44 +  This has been tested on all possible 32-bit inputs.*/
    1.45 +unsigned isqrt32(opus_uint32 _val){
    1.46 +  unsigned b;
    1.47 +  unsigned g;
    1.48 +  int      bshift;
    1.49 +  /*Uses the second method from
    1.50 +     http://www.azillionmonkeys.com/qed/sqroot.html
    1.51 +    The main idea is to search for the largest binary digit b such that
    1.52 +     (g+b)*(g+b) <= _val, and add it to the solution g.*/
    1.53 +  g=0;
    1.54 +  bshift=(EC_ILOG(_val)-1)>>1;
    1.55 +  b=1U<<bshift;
    1.56 +  do{
    1.57 +    opus_uint32 t;
    1.58 +    t=(((opus_uint32)g<<1)+b)<<bshift;
    1.59 +    if(t<=_val){
    1.60 +      g+=b;
    1.61 +      _val-=t;
    1.62 +    }
    1.63 +    b>>=1;
    1.64 +    bshift--;
    1.65 +  }
    1.66 +  while(bshift>=0);
    1.67 +  return g;
    1.68 +}
    1.69 +
    1.70 +#ifdef FIXED_POINT
    1.71 +
    1.72 +opus_val32 frac_div32(opus_val32 a, opus_val32 b)
    1.73 +{
    1.74 +   opus_val16 rcp;
    1.75 +   opus_val32 result, rem;
    1.76 +   int shift = celt_ilog2(b)-29;
    1.77 +   a = VSHR32(a,shift);
    1.78 +   b = VSHR32(b,shift);
    1.79 +   /* 16-bit reciprocal */
    1.80 +   rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
    1.81 +   result = MULT16_32_Q15(rcp, a);
    1.82 +   rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
    1.83 +   result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
    1.84 +   if (result >= 536870912)       /*  2^29 */
    1.85 +      return 2147483647;          /*  2^31 - 1 */
    1.86 +   else if (result <= -536870912) /* -2^29 */
    1.87 +      return -2147483647;         /* -2^31 */
    1.88 +   else
    1.89 +      return SHL32(result, 2);
    1.90 +}
    1.91 +
    1.92 +/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
    1.93 +opus_val16 celt_rsqrt_norm(opus_val32 x)
    1.94 +{
    1.95 +   opus_val16 n;
    1.96 +   opus_val16 r;
    1.97 +   opus_val16 r2;
    1.98 +   opus_val16 y;
    1.99 +   /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
   1.100 +   n = x-32768;
   1.101 +   /* Get a rough initial guess for the root.
   1.102 +      The optimal minimax quadratic approximation (using relative error) is
   1.103 +       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
   1.104 +      Coefficients here, and the final result r, are Q14.*/
   1.105 +   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
   1.106 +   /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
   1.107 +      We can compute the result from n and r using Q15 multiplies with some
   1.108 +       adjustment, carefully done to avoid overflow.
   1.109 +      Range of y is [-1564,1594]. */
   1.110 +   r2 = MULT16_16_Q15(r, r);
   1.111 +   y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
   1.112 +   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
   1.113 +      This yields the Q14 reciprocal square root of the Q16 x, with a maximum
   1.114 +       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
   1.115 +       peak absolute error of 2.26591/16384. */
   1.116 +   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
   1.117 +              SUB16(MULT16_16_Q15(y, 12288), 16384))));
   1.118 +}
   1.119 +
   1.120 +/** Sqrt approximation (QX input, QX/2 output) */
   1.121 +opus_val32 celt_sqrt(opus_val32 x)
   1.122 +{
   1.123 +   int k;
   1.124 +   opus_val16 n;
   1.125 +   opus_val32 rt;
   1.126 +   static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
   1.127 +   if (x==0)
   1.128 +      return 0;
   1.129 +   else if (x>=1073741824)
   1.130 +      return 32767;
   1.131 +   k = (celt_ilog2(x)>>1)-7;
   1.132 +   x = VSHR32(x, 2*k);
   1.133 +   n = x-32768;
   1.134 +   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
   1.135 +              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
   1.136 +   rt = VSHR32(rt,7-k);
   1.137 +   return rt;
   1.138 +}
   1.139 +
   1.140 +#define L1 32767
   1.141 +#define L2 -7651
   1.142 +#define L3 8277
   1.143 +#define L4 -626
   1.144 +
   1.145 +static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
   1.146 +{
   1.147 +   opus_val16 x2;
   1.148 +
   1.149 +   x2 = MULT16_16_P15(x,x);
   1.150 +   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
   1.151 +                                                                                ))))))));
   1.152 +}
   1.153 +
   1.154 +#undef L1
   1.155 +#undef L2
   1.156 +#undef L3
   1.157 +#undef L4
   1.158 +
   1.159 +opus_val16 celt_cos_norm(opus_val32 x)
   1.160 +{
   1.161 +   x = x&0x0001ffff;
   1.162 +   if (x>SHL32(EXTEND32(1), 16))
   1.163 +      x = SUB32(SHL32(EXTEND32(1), 17),x);
   1.164 +   if (x&0x00007fff)
   1.165 +   {
   1.166 +      if (x<SHL32(EXTEND32(1), 15))
   1.167 +      {
   1.168 +         return _celt_cos_pi_2(EXTRACT16(x));
   1.169 +      } else {
   1.170 +         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
   1.171 +      }
   1.172 +   } else {
   1.173 +      if (x&0x0000ffff)
   1.174 +         return 0;
   1.175 +      else if (x&0x0001ffff)
   1.176 +         return -32767;
   1.177 +      else
   1.178 +         return 32767;
   1.179 +   }
   1.180 +}
   1.181 +
   1.182 +/** Reciprocal approximation (Q15 input, Q16 output) */
   1.183 +opus_val32 celt_rcp(opus_val32 x)
   1.184 +{
   1.185 +   int i;
   1.186 +   opus_val16 n;
   1.187 +   opus_val16 r;
   1.188 +   celt_assert2(x>0, "celt_rcp() only defined for positive values");
   1.189 +   i = celt_ilog2(x);
   1.190 +   /* n is Q15 with range [0,1). */
   1.191 +   n = VSHR32(x,i-15)-32768;
   1.192 +   /* Start with a linear approximation:
   1.193 +      r = 1.8823529411764706-0.9411764705882353*n.
   1.194 +      The coefficients and the result are Q14 in the range [15420,30840].*/
   1.195 +   r = ADD16(30840, MULT16_16_Q15(-15420, n));
   1.196 +   /* Perform two Newton iterations:
   1.197 +      r -= r*((r*n)-1.Q15)
   1.198 +         = r*((r*n)+(r-1.Q15)). */
   1.199 +   r = SUB16(r, MULT16_16_Q15(r,
   1.200 +             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
   1.201 +   /* We subtract an extra 1 in the second iteration to avoid overflow; it also
   1.202 +       neatly compensates for truncation error in the rest of the process. */
   1.203 +   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
   1.204 +             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
   1.205 +   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
   1.206 +       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
   1.207 +       error of 1.24665/32768. */
   1.208 +   return VSHR32(EXTEND32(r),i-16);
   1.209 +}
   1.210 +
   1.211 +#endif

mercurial