1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libopus/celt/mathops.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,208 @@ 1.4 +/* Copyright (c) 2002-2008 Jean-Marc Valin 1.5 + Copyright (c) 2007-2008 CSIRO 1.6 + Copyright (c) 2007-2009 Xiph.Org Foundation 1.7 + Written by Jean-Marc Valin */ 1.8 +/** 1.9 + @file mathops.h 1.10 + @brief Various math functions 1.11 +*/ 1.12 +/* 1.13 + Redistribution and use in source and binary forms, with or without 1.14 + modification, are permitted provided that the following conditions 1.15 + are met: 1.16 + 1.17 + - Redistributions of source code must retain the above copyright 1.18 + notice, this list of conditions and the following disclaimer. 1.19 + 1.20 + - Redistributions in binary form must reproduce the above copyright 1.21 + notice, this list of conditions and the following disclaimer in the 1.22 + documentation and/or other materials provided with the distribution. 1.23 + 1.24 + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.25 + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.26 + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.27 + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 1.28 + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 1.29 + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 1.30 + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 1.31 + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 1.32 + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 1.33 + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 1.34 + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.35 +*/ 1.36 + 1.37 +#ifdef HAVE_CONFIG_H 1.38 +#include "config.h" 1.39 +#endif 1.40 + 1.41 +#include "mathops.h" 1.42 + 1.43 +/*Compute floor(sqrt(_val)) with exact arithmetic. 1.44 + This has been tested on all possible 32-bit inputs.*/ 1.45 +unsigned isqrt32(opus_uint32 _val){ 1.46 + unsigned b; 1.47 + unsigned g; 1.48 + int bshift; 1.49 + /*Uses the second method from 1.50 + http://www.azillionmonkeys.com/qed/sqroot.html 1.51 + The main idea is to search for the largest binary digit b such that 1.52 + (g+b)*(g+b) <= _val, and add it to the solution g.*/ 1.53 + g=0; 1.54 + bshift=(EC_ILOG(_val)-1)>>1; 1.55 + b=1U<<bshift; 1.56 + do{ 1.57 + opus_uint32 t; 1.58 + t=(((opus_uint32)g<<1)+b)<<bshift; 1.59 + if(t<=_val){ 1.60 + g+=b; 1.61 + _val-=t; 1.62 + } 1.63 + b>>=1; 1.64 + bshift--; 1.65 + } 1.66 + while(bshift>=0); 1.67 + return g; 1.68 +} 1.69 + 1.70 +#ifdef FIXED_POINT 1.71 + 1.72 +opus_val32 frac_div32(opus_val32 a, opus_val32 b) 1.73 +{ 1.74 + opus_val16 rcp; 1.75 + opus_val32 result, rem; 1.76 + int shift = celt_ilog2(b)-29; 1.77 + a = VSHR32(a,shift); 1.78 + b = VSHR32(b,shift); 1.79 + /* 16-bit reciprocal */ 1.80 + rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); 1.81 + result = MULT16_32_Q15(rcp, a); 1.82 + rem = PSHR32(a,2)-MULT32_32_Q31(result, b); 1.83 + result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); 1.84 + if (result >= 536870912) /* 2^29 */ 1.85 + return 2147483647; /* 2^31 - 1 */ 1.86 + else if (result <= -536870912) /* -2^29 */ 1.87 + return -2147483647; /* -2^31 */ 1.88 + else 1.89 + return SHL32(result, 2); 1.90 +} 1.91 + 1.92 +/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ 1.93 +opus_val16 celt_rsqrt_norm(opus_val32 x) 1.94 +{ 1.95 + opus_val16 n; 1.96 + opus_val16 r; 1.97 + opus_val16 r2; 1.98 + opus_val16 y; 1.99 + /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ 1.100 + n = x-32768; 1.101 + /* Get a rough initial guess for the root. 1.102 + The optimal minimax quadratic approximation (using relative error) is 1.103 + r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). 1.104 + Coefficients here, and the final result r, are Q14.*/ 1.105 + r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); 1.106 + /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. 1.107 + We can compute the result from n and r using Q15 multiplies with some 1.108 + adjustment, carefully done to avoid overflow. 1.109 + Range of y is [-1564,1594]. */ 1.110 + r2 = MULT16_16_Q15(r, r); 1.111 + y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); 1.112 + /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). 1.113 + This yields the Q14 reciprocal square root of the Q16 x, with a maximum 1.114 + relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a 1.115 + peak absolute error of 2.26591/16384. */ 1.116 + return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, 1.117 + SUB16(MULT16_16_Q15(y, 12288), 16384)))); 1.118 +} 1.119 + 1.120 +/** Sqrt approximation (QX input, QX/2 output) */ 1.121 +opus_val32 celt_sqrt(opus_val32 x) 1.122 +{ 1.123 + int k; 1.124 + opus_val16 n; 1.125 + opus_val32 rt; 1.126 + static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664}; 1.127 + if (x==0) 1.128 + return 0; 1.129 + else if (x>=1073741824) 1.130 + return 32767; 1.131 + k = (celt_ilog2(x)>>1)-7; 1.132 + x = VSHR32(x, 2*k); 1.133 + n = x-32768; 1.134 + rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], 1.135 + MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); 1.136 + rt = VSHR32(rt,7-k); 1.137 + return rt; 1.138 +} 1.139 + 1.140 +#define L1 32767 1.141 +#define L2 -7651 1.142 +#define L3 8277 1.143 +#define L4 -626 1.144 + 1.145 +static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) 1.146 +{ 1.147 + opus_val16 x2; 1.148 + 1.149 + x2 = MULT16_16_P15(x,x); 1.150 + return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 1.151 + )))))))); 1.152 +} 1.153 + 1.154 +#undef L1 1.155 +#undef L2 1.156 +#undef L3 1.157 +#undef L4 1.158 + 1.159 +opus_val16 celt_cos_norm(opus_val32 x) 1.160 +{ 1.161 + x = x&0x0001ffff; 1.162 + if (x>SHL32(EXTEND32(1), 16)) 1.163 + x = SUB32(SHL32(EXTEND32(1), 17),x); 1.164 + if (x&0x00007fff) 1.165 + { 1.166 + if (x<SHL32(EXTEND32(1), 15)) 1.167 + { 1.168 + return _celt_cos_pi_2(EXTRACT16(x)); 1.169 + } else { 1.170 + return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x))); 1.171 + } 1.172 + } else { 1.173 + if (x&0x0000ffff) 1.174 + return 0; 1.175 + else if (x&0x0001ffff) 1.176 + return -32767; 1.177 + else 1.178 + return 32767; 1.179 + } 1.180 +} 1.181 + 1.182 +/** Reciprocal approximation (Q15 input, Q16 output) */ 1.183 +opus_val32 celt_rcp(opus_val32 x) 1.184 +{ 1.185 + int i; 1.186 + opus_val16 n; 1.187 + opus_val16 r; 1.188 + celt_assert2(x>0, "celt_rcp() only defined for positive values"); 1.189 + i = celt_ilog2(x); 1.190 + /* n is Q15 with range [0,1). */ 1.191 + n = VSHR32(x,i-15)-32768; 1.192 + /* Start with a linear approximation: 1.193 + r = 1.8823529411764706-0.9411764705882353*n. 1.194 + The coefficients and the result are Q14 in the range [15420,30840].*/ 1.195 + r = ADD16(30840, MULT16_16_Q15(-15420, n)); 1.196 + /* Perform two Newton iterations: 1.197 + r -= r*((r*n)-1.Q15) 1.198 + = r*((r*n)+(r-1.Q15)). */ 1.199 + r = SUB16(r, MULT16_16_Q15(r, 1.200 + ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); 1.201 + /* We subtract an extra 1 in the second iteration to avoid overflow; it also 1.202 + neatly compensates for truncation error in the rest of the process. */ 1.203 + r = SUB16(r, ADD16(1, MULT16_16_Q15(r, 1.204 + ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); 1.205 + /* r is now the Q15 solution to 2/(n+1), with a maximum relative error 1.206 + of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute 1.207 + error of 1.24665/32768. */ 1.208 + return VSHR32(EXTEND32(r),i-16); 1.209 +} 1.210 + 1.211 +#endif