1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libopus/celt/vq.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,415 @@ 1.4 +/* Copyright (c) 2007-2008 CSIRO 1.5 + Copyright (c) 2007-2009 Xiph.Org Foundation 1.6 + Written by Jean-Marc Valin */ 1.7 +/* 1.8 + Redistribution and use in source and binary forms, with or without 1.9 + modification, are permitted provided that the following conditions 1.10 + are met: 1.11 + 1.12 + - Redistributions of source code must retain the above copyright 1.13 + notice, this list of conditions and the following disclaimer. 1.14 + 1.15 + - Redistributions in binary form must reproduce the above copyright 1.16 + notice, this list of conditions and the following disclaimer in the 1.17 + documentation and/or other materials provided with the distribution. 1.18 + 1.19 + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1.20 + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1.21 + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1.22 + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 1.23 + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 1.24 + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 1.25 + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 1.26 + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 1.27 + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 1.28 + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 1.29 + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.30 +*/ 1.31 + 1.32 +#ifdef HAVE_CONFIG_H 1.33 +#include "config.h" 1.34 +#endif 1.35 + 1.36 +#include "mathops.h" 1.37 +#include "cwrs.h" 1.38 +#include "vq.h" 1.39 +#include "arch.h" 1.40 +#include "os_support.h" 1.41 +#include "bands.h" 1.42 +#include "rate.h" 1.43 + 1.44 +static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 1.45 +{ 1.46 + int i; 1.47 + celt_norm *Xptr; 1.48 + Xptr = X; 1.49 + for (i=0;i<len-stride;i++) 1.50 + { 1.51 + celt_norm x1, x2; 1.52 + x1 = Xptr[0]; 1.53 + x2 = Xptr[stride]; 1.54 + Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 1.55 + *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 1.56 + } 1.57 + Xptr = &X[len-2*stride-1]; 1.58 + for (i=len-2*stride-1;i>=0;i--) 1.59 + { 1.60 + celt_norm x1, x2; 1.61 + x1 = Xptr[0]; 1.62 + x2 = Xptr[stride]; 1.63 + Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 1.64 + *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 1.65 + } 1.66 +} 1.67 + 1.68 +static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 1.69 +{ 1.70 + static const int SPREAD_FACTOR[3]={15,10,5}; 1.71 + int i; 1.72 + opus_val16 c, s; 1.73 + opus_val16 gain, theta; 1.74 + int stride2=0; 1.75 + int factor; 1.76 + 1.77 + if (2*K>=len || spread==SPREAD_NONE) 1.78 + return; 1.79 + factor = SPREAD_FACTOR[spread-1]; 1.80 + 1.81 + gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); 1.82 + theta = HALF16(MULT16_16_Q15(gain,gain)); 1.83 + 1.84 + c = celt_cos_norm(EXTEND32(theta)); 1.85 + s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ 1.86 + 1.87 + if (len>=8*stride) 1.88 + { 1.89 + stride2 = 1; 1.90 + /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. 1.91 + It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ 1.92 + while ((stride2*stride2+stride2)*stride + (stride>>2) < len) 1.93 + stride2++; 1.94 + } 1.95 + /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 1.96 + extract_collapse_mask().*/ 1.97 + len /= stride; 1.98 + for (i=0;i<stride;i++) 1.99 + { 1.100 + if (dir < 0) 1.101 + { 1.102 + if (stride2) 1.103 + exp_rotation1(X+i*len, len, stride2, s, c); 1.104 + exp_rotation1(X+i*len, len, 1, c, s); 1.105 + } else { 1.106 + exp_rotation1(X+i*len, len, 1, c, -s); 1.107 + if (stride2) 1.108 + exp_rotation1(X+i*len, len, stride2, s, -c); 1.109 + } 1.110 + } 1.111 +} 1.112 + 1.113 +/** Takes the pitch vector and the decoded residual vector, computes the gain 1.114 + that will give ||p+g*y||=1 and mixes the residual with the pitch. */ 1.115 +static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, 1.116 + int N, opus_val32 Ryy, opus_val16 gain) 1.117 +{ 1.118 + int i; 1.119 +#ifdef FIXED_POINT 1.120 + int k; 1.121 +#endif 1.122 + opus_val32 t; 1.123 + opus_val16 g; 1.124 + 1.125 +#ifdef FIXED_POINT 1.126 + k = celt_ilog2(Ryy)>>1; 1.127 +#endif 1.128 + t = VSHR32(Ryy, 2*(k-7)); 1.129 + g = MULT16_16_P15(celt_rsqrt_norm(t),gain); 1.130 + 1.131 + i=0; 1.132 + do 1.133 + X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); 1.134 + while (++i < N); 1.135 +} 1.136 + 1.137 +static unsigned extract_collapse_mask(int *iy, int N, int B) 1.138 +{ 1.139 + unsigned collapse_mask; 1.140 + int N0; 1.141 + int i; 1.142 + if (B<=1) 1.143 + return 1; 1.144 + /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 1.145 + exp_rotation().*/ 1.146 + N0 = N/B; 1.147 + collapse_mask = 0; 1.148 + i=0; do { 1.149 + int j; 1.150 + j=0; do { 1.151 + collapse_mask |= (iy[i*N0+j]!=0)<<i; 1.152 + } while (++j<N0); 1.153 + } while (++i<B); 1.154 + return collapse_mask; 1.155 +} 1.156 + 1.157 +unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc 1.158 +#ifdef RESYNTH 1.159 + , opus_val16 gain 1.160 +#endif 1.161 + ) 1.162 +{ 1.163 + VARDECL(celt_norm, y); 1.164 + VARDECL(int, iy); 1.165 + VARDECL(opus_val16, signx); 1.166 + int i, j; 1.167 + opus_val16 s; 1.168 + int pulsesLeft; 1.169 + opus_val32 sum; 1.170 + opus_val32 xy; 1.171 + opus_val16 yy; 1.172 + unsigned collapse_mask; 1.173 + SAVE_STACK; 1.174 + 1.175 + celt_assert2(K>0, "alg_quant() needs at least one pulse"); 1.176 + celt_assert2(N>1, "alg_quant() needs at least two dimensions"); 1.177 + 1.178 + ALLOC(y, N, celt_norm); 1.179 + ALLOC(iy, N, int); 1.180 + ALLOC(signx, N, opus_val16); 1.181 + 1.182 + exp_rotation(X, N, 1, B, K, spread); 1.183 + 1.184 + /* Get rid of the sign */ 1.185 + sum = 0; 1.186 + j=0; do { 1.187 + if (X[j]>0) 1.188 + signx[j]=1; 1.189 + else { 1.190 + signx[j]=-1; 1.191 + X[j]=-X[j]; 1.192 + } 1.193 + iy[j] = 0; 1.194 + y[j] = 0; 1.195 + } while (++j<N); 1.196 + 1.197 + xy = yy = 0; 1.198 + 1.199 + pulsesLeft = K; 1.200 + 1.201 + /* Do a pre-search by projecting on the pyramid */ 1.202 + if (K > (N>>1)) 1.203 + { 1.204 + opus_val16 rcp; 1.205 + j=0; do { 1.206 + sum += X[j]; 1.207 + } while (++j<N); 1.208 + 1.209 + /* If X is too small, just replace it with a pulse at 0 */ 1.210 +#ifdef FIXED_POINT 1.211 + if (sum <= K) 1.212 +#else 1.213 + /* Prevents infinities and NaNs from causing too many pulses 1.214 + to be allocated. 64 is an approximation of infinity here. */ 1.215 + if (!(sum > EPSILON && sum < 64)) 1.216 +#endif 1.217 + { 1.218 + X[0] = QCONST16(1.f,14); 1.219 + j=1; do 1.220 + X[j]=0; 1.221 + while (++j<N); 1.222 + sum = QCONST16(1.f,14); 1.223 + } 1.224 + rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); 1.225 + j=0; do { 1.226 +#ifdef FIXED_POINT 1.227 + /* It's really important to round *towards zero* here */ 1.228 + iy[j] = MULT16_16_Q15(X[j],rcp); 1.229 +#else 1.230 + iy[j] = (int)floor(rcp*X[j]); 1.231 +#endif 1.232 + y[j] = (celt_norm)iy[j]; 1.233 + yy = MAC16_16(yy, y[j],y[j]); 1.234 + xy = MAC16_16(xy, X[j],y[j]); 1.235 + y[j] *= 2; 1.236 + pulsesLeft -= iy[j]; 1.237 + } while (++j<N); 1.238 + } 1.239 + celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); 1.240 + 1.241 + /* This should never happen, but just in case it does (e.g. on silence) 1.242 + we fill the first bin with pulses. */ 1.243 +#ifdef FIXED_POINT_DEBUG 1.244 + celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); 1.245 +#endif 1.246 + if (pulsesLeft > N+3) 1.247 + { 1.248 + opus_val16 tmp = (opus_val16)pulsesLeft; 1.249 + yy = MAC16_16(yy, tmp, tmp); 1.250 + yy = MAC16_16(yy, tmp, y[0]); 1.251 + iy[0] += pulsesLeft; 1.252 + pulsesLeft=0; 1.253 + } 1.254 + 1.255 + s = 1; 1.256 + for (i=0;i<pulsesLeft;i++) 1.257 + { 1.258 + int best_id; 1.259 + opus_val32 best_num = -VERY_LARGE16; 1.260 + opus_val16 best_den = 0; 1.261 +#ifdef FIXED_POINT 1.262 + int rshift; 1.263 +#endif 1.264 +#ifdef FIXED_POINT 1.265 + rshift = 1+celt_ilog2(K-pulsesLeft+i+1); 1.266 +#endif 1.267 + best_id = 0; 1.268 + /* The squared magnitude term gets added anyway, so we might as well 1.269 + add it outside the loop */ 1.270 + yy = ADD32(yy, 1); 1.271 + j=0; 1.272 + do { 1.273 + opus_val16 Rxy, Ryy; 1.274 + /* Temporary sums of the new pulse(s) */ 1.275 + Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); 1.276 + /* We're multiplying y[j] by two so we don't have to do it here */ 1.277 + Ryy = ADD16(yy, y[j]); 1.278 + 1.279 + /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that 1.280 + Rxy is positive because the sign is pre-computed) */ 1.281 + Rxy = MULT16_16_Q15(Rxy,Rxy); 1.282 + /* The idea is to check for num/den >= best_num/best_den, but that way 1.283 + we can do it without any division */ 1.284 + /* OPT: Make sure to use conditional moves here */ 1.285 + if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) 1.286 + { 1.287 + best_den = Ryy; 1.288 + best_num = Rxy; 1.289 + best_id = j; 1.290 + } 1.291 + } while (++j<N); 1.292 + 1.293 + /* Updating the sums of the new pulse(s) */ 1.294 + xy = ADD32(xy, EXTEND32(X[best_id])); 1.295 + /* We're multiplying y[j] by two so we don't have to do it here */ 1.296 + yy = ADD16(yy, y[best_id]); 1.297 + 1.298 + /* Only now that we've made the final choice, update y/iy */ 1.299 + /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ 1.300 + y[best_id] += 2*s; 1.301 + iy[best_id]++; 1.302 + } 1.303 + 1.304 + /* Put the original sign back */ 1.305 + j=0; 1.306 + do { 1.307 + X[j] = MULT16_16(signx[j],X[j]); 1.308 + if (signx[j] < 0) 1.309 + iy[j] = -iy[j]; 1.310 + } while (++j<N); 1.311 + encode_pulses(iy, N, K, enc); 1.312 + 1.313 +#ifdef RESYNTH 1.314 + normalise_residual(iy, X, N, yy, gain); 1.315 + exp_rotation(X, N, -1, B, K, spread); 1.316 +#endif 1.317 + 1.318 + collapse_mask = extract_collapse_mask(iy, N, B); 1.319 + RESTORE_STACK; 1.320 + return collapse_mask; 1.321 +} 1.322 + 1.323 +/** Decode pulse vector and combine the result with the pitch vector to produce 1.324 + the final normalised signal in the current band. */ 1.325 +unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 1.326 + ec_dec *dec, opus_val16 gain) 1.327 +{ 1.328 + int i; 1.329 + opus_val32 Ryy; 1.330 + unsigned collapse_mask; 1.331 + VARDECL(int, iy); 1.332 + SAVE_STACK; 1.333 + 1.334 + celt_assert2(K>0, "alg_unquant() needs at least one pulse"); 1.335 + celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); 1.336 + ALLOC(iy, N, int); 1.337 + decode_pulses(iy, N, K, dec); 1.338 + Ryy = 0; 1.339 + i=0; 1.340 + do { 1.341 + Ryy = MAC16_16(Ryy, iy[i], iy[i]); 1.342 + } while (++i < N); 1.343 + normalise_residual(iy, X, N, Ryy, gain); 1.344 + exp_rotation(X, N, -1, B, K, spread); 1.345 + collapse_mask = extract_collapse_mask(iy, N, B); 1.346 + RESTORE_STACK; 1.347 + return collapse_mask; 1.348 +} 1.349 + 1.350 +void renormalise_vector(celt_norm *X, int N, opus_val16 gain) 1.351 +{ 1.352 + int i; 1.353 +#ifdef FIXED_POINT 1.354 + int k; 1.355 +#endif 1.356 + opus_val32 E = EPSILON; 1.357 + opus_val16 g; 1.358 + opus_val32 t; 1.359 + celt_norm *xptr = X; 1.360 + for (i=0;i<N;i++) 1.361 + { 1.362 + E = MAC16_16(E, *xptr, *xptr); 1.363 + xptr++; 1.364 + } 1.365 +#ifdef FIXED_POINT 1.366 + k = celt_ilog2(E)>>1; 1.367 +#endif 1.368 + t = VSHR32(E, 2*(k-7)); 1.369 + g = MULT16_16_P15(celt_rsqrt_norm(t),gain); 1.370 + 1.371 + xptr = X; 1.372 + for (i=0;i<N;i++) 1.373 + { 1.374 + *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); 1.375 + xptr++; 1.376 + } 1.377 + /*return celt_sqrt(E);*/ 1.378 +} 1.379 + 1.380 +int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) 1.381 +{ 1.382 + int i; 1.383 + int itheta; 1.384 + opus_val16 mid, side; 1.385 + opus_val32 Emid, Eside; 1.386 + 1.387 + Emid = Eside = EPSILON; 1.388 + if (stereo) 1.389 + { 1.390 + for (i=0;i<N;i++) 1.391 + { 1.392 + celt_norm m, s; 1.393 + m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); 1.394 + s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); 1.395 + Emid = MAC16_16(Emid, m, m); 1.396 + Eside = MAC16_16(Eside, s, s); 1.397 + } 1.398 + } else { 1.399 + for (i=0;i<N;i++) 1.400 + { 1.401 + celt_norm m, s; 1.402 + m = X[i]; 1.403 + s = Y[i]; 1.404 + Emid = MAC16_16(Emid, m, m); 1.405 + Eside = MAC16_16(Eside, s, s); 1.406 + } 1.407 + } 1.408 + mid = celt_sqrt(Emid); 1.409 + side = celt_sqrt(Eside); 1.410 +#ifdef FIXED_POINT 1.411 + /* 0.63662 = 2/pi */ 1.412 + itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); 1.413 +#else 1.414 + itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); 1.415 +#endif 1.416 + 1.417 + return itheta; 1.418 +}