|
1 /* Copyright (c) 2002-2008 Jean-Marc Valin |
|
2 Copyright (c) 2007-2008 CSIRO |
|
3 Copyright (c) 2007-2009 Xiph.Org Foundation |
|
4 Written by Jean-Marc Valin */ |
|
5 /** |
|
6 @file mathops.h |
|
7 @brief Various math functions |
|
8 */ |
|
9 /* |
|
10 Redistribution and use in source and binary forms, with or without |
|
11 modification, are permitted provided that the following conditions |
|
12 are met: |
|
13 |
|
14 - Redistributions of source code must retain the above copyright |
|
15 notice, this list of conditions and the following disclaimer. |
|
16 |
|
17 - Redistributions in binary form must reproduce the above copyright |
|
18 notice, this list of conditions and the following disclaimer in the |
|
19 documentation and/or other materials provided with the distribution. |
|
20 |
|
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
|
25 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
32 */ |
|
33 |
|
34 #ifdef HAVE_CONFIG_H |
|
35 #include "config.h" |
|
36 #endif |
|
37 |
|
38 #include "mathops.h" |
|
39 |
|
40 /*Compute floor(sqrt(_val)) with exact arithmetic. |
|
41 This has been tested on all possible 32-bit inputs.*/ |
|
42 unsigned isqrt32(opus_uint32 _val){ |
|
43 unsigned b; |
|
44 unsigned g; |
|
45 int bshift; |
|
46 /*Uses the second method from |
|
47 http://www.azillionmonkeys.com/qed/sqroot.html |
|
48 The main idea is to search for the largest binary digit b such that |
|
49 (g+b)*(g+b) <= _val, and add it to the solution g.*/ |
|
50 g=0; |
|
51 bshift=(EC_ILOG(_val)-1)>>1; |
|
52 b=1U<<bshift; |
|
53 do{ |
|
54 opus_uint32 t; |
|
55 t=(((opus_uint32)g<<1)+b)<<bshift; |
|
56 if(t<=_val){ |
|
57 g+=b; |
|
58 _val-=t; |
|
59 } |
|
60 b>>=1; |
|
61 bshift--; |
|
62 } |
|
63 while(bshift>=0); |
|
64 return g; |
|
65 } |
|
66 |
|
67 #ifdef FIXED_POINT |
|
68 |
|
69 opus_val32 frac_div32(opus_val32 a, opus_val32 b) |
|
70 { |
|
71 opus_val16 rcp; |
|
72 opus_val32 result, rem; |
|
73 int shift = celt_ilog2(b)-29; |
|
74 a = VSHR32(a,shift); |
|
75 b = VSHR32(b,shift); |
|
76 /* 16-bit reciprocal */ |
|
77 rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); |
|
78 result = MULT16_32_Q15(rcp, a); |
|
79 rem = PSHR32(a,2)-MULT32_32_Q31(result, b); |
|
80 result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); |
|
81 if (result >= 536870912) /* 2^29 */ |
|
82 return 2147483647; /* 2^31 - 1 */ |
|
83 else if (result <= -536870912) /* -2^29 */ |
|
84 return -2147483647; /* -2^31 */ |
|
85 else |
|
86 return SHL32(result, 2); |
|
87 } |
|
88 |
|
89 /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ |
|
90 opus_val16 celt_rsqrt_norm(opus_val32 x) |
|
91 { |
|
92 opus_val16 n; |
|
93 opus_val16 r; |
|
94 opus_val16 r2; |
|
95 opus_val16 y; |
|
96 /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ |
|
97 n = x-32768; |
|
98 /* Get a rough initial guess for the root. |
|
99 The optimal minimax quadratic approximation (using relative error) is |
|
100 r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). |
|
101 Coefficients here, and the final result r, are Q14.*/ |
|
102 r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); |
|
103 /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. |
|
104 We can compute the result from n and r using Q15 multiplies with some |
|
105 adjustment, carefully done to avoid overflow. |
|
106 Range of y is [-1564,1594]. */ |
|
107 r2 = MULT16_16_Q15(r, r); |
|
108 y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); |
|
109 /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). |
|
110 This yields the Q14 reciprocal square root of the Q16 x, with a maximum |
|
111 relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a |
|
112 peak absolute error of 2.26591/16384. */ |
|
113 return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, |
|
114 SUB16(MULT16_16_Q15(y, 12288), 16384)))); |
|
115 } |
|
116 |
|
117 /** Sqrt approximation (QX input, QX/2 output) */ |
|
118 opus_val32 celt_sqrt(opus_val32 x) |
|
119 { |
|
120 int k; |
|
121 opus_val16 n; |
|
122 opus_val32 rt; |
|
123 static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664}; |
|
124 if (x==0) |
|
125 return 0; |
|
126 else if (x>=1073741824) |
|
127 return 32767; |
|
128 k = (celt_ilog2(x)>>1)-7; |
|
129 x = VSHR32(x, 2*k); |
|
130 n = x-32768; |
|
131 rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], |
|
132 MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); |
|
133 rt = VSHR32(rt,7-k); |
|
134 return rt; |
|
135 } |
|
136 |
|
137 #define L1 32767 |
|
138 #define L2 -7651 |
|
139 #define L3 8277 |
|
140 #define L4 -626 |
|
141 |
|
142 static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) |
|
143 { |
|
144 opus_val16 x2; |
|
145 |
|
146 x2 = MULT16_16_P15(x,x); |
|
147 return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 |
|
148 )))))))); |
|
149 } |
|
150 |
|
151 #undef L1 |
|
152 #undef L2 |
|
153 #undef L3 |
|
154 #undef L4 |
|
155 |
|
156 opus_val16 celt_cos_norm(opus_val32 x) |
|
157 { |
|
158 x = x&0x0001ffff; |
|
159 if (x>SHL32(EXTEND32(1), 16)) |
|
160 x = SUB32(SHL32(EXTEND32(1), 17),x); |
|
161 if (x&0x00007fff) |
|
162 { |
|
163 if (x<SHL32(EXTEND32(1), 15)) |
|
164 { |
|
165 return _celt_cos_pi_2(EXTRACT16(x)); |
|
166 } else { |
|
167 return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x))); |
|
168 } |
|
169 } else { |
|
170 if (x&0x0000ffff) |
|
171 return 0; |
|
172 else if (x&0x0001ffff) |
|
173 return -32767; |
|
174 else |
|
175 return 32767; |
|
176 } |
|
177 } |
|
178 |
|
179 /** Reciprocal approximation (Q15 input, Q16 output) */ |
|
180 opus_val32 celt_rcp(opus_val32 x) |
|
181 { |
|
182 int i; |
|
183 opus_val16 n; |
|
184 opus_val16 r; |
|
185 celt_assert2(x>0, "celt_rcp() only defined for positive values"); |
|
186 i = celt_ilog2(x); |
|
187 /* n is Q15 with range [0,1). */ |
|
188 n = VSHR32(x,i-15)-32768; |
|
189 /* Start with a linear approximation: |
|
190 r = 1.8823529411764706-0.9411764705882353*n. |
|
191 The coefficients and the result are Q14 in the range [15420,30840].*/ |
|
192 r = ADD16(30840, MULT16_16_Q15(-15420, n)); |
|
193 /* Perform two Newton iterations: |
|
194 r -= r*((r*n)-1.Q15) |
|
195 = r*((r*n)+(r-1.Q15)). */ |
|
196 r = SUB16(r, MULT16_16_Q15(r, |
|
197 ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); |
|
198 /* We subtract an extra 1 in the second iteration to avoid overflow; it also |
|
199 neatly compensates for truncation error in the rest of the process. */ |
|
200 r = SUB16(r, ADD16(1, MULT16_16_Q15(r, |
|
201 ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); |
|
202 /* r is now the Q15 solution to 2/(n+1), with a maximum relative error |
|
203 of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute |
|
204 error of 1.24665/32768. */ |
|
205 return VSHR32(EXTEND32(r),i-16); |
|
206 } |
|
207 |
|
208 #endif |