media/libopus/celt/vq.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* Copyright (c) 2007-2008 CSIRO
michael@0 2 Copyright (c) 2007-2009 Xiph.Org Foundation
michael@0 3 Written by Jean-Marc Valin */
michael@0 4 /*
michael@0 5 Redistribution and use in source and binary forms, with or without
michael@0 6 modification, are permitted provided that the following conditions
michael@0 7 are met:
michael@0 8
michael@0 9 - Redistributions of source code must retain the above copyright
michael@0 10 notice, this list of conditions and the following disclaimer.
michael@0 11
michael@0 12 - Redistributions in binary form must reproduce the above copyright
michael@0 13 notice, this list of conditions and the following disclaimer in the
michael@0 14 documentation and/or other materials provided with the distribution.
michael@0 15
michael@0 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
michael@0 17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
michael@0 18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
michael@0 19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
michael@0 20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
michael@0 21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
michael@0 22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
michael@0 23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
michael@0 24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
michael@0 25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
michael@0 26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
michael@0 27 */
michael@0 28
michael@0 29 #ifdef HAVE_CONFIG_H
michael@0 30 #include "config.h"
michael@0 31 #endif
michael@0 32
michael@0 33 #include "mathops.h"
michael@0 34 #include "cwrs.h"
michael@0 35 #include "vq.h"
michael@0 36 #include "arch.h"
michael@0 37 #include "os_support.h"
michael@0 38 #include "bands.h"
michael@0 39 #include "rate.h"
michael@0 40
michael@0 41 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
michael@0 42 {
michael@0 43 int i;
michael@0 44 celt_norm *Xptr;
michael@0 45 Xptr = X;
michael@0 46 for (i=0;i<len-stride;i++)
michael@0 47 {
michael@0 48 celt_norm x1, x2;
michael@0 49 x1 = Xptr[0];
michael@0 50 x2 = Xptr[stride];
michael@0 51 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
michael@0 52 *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
michael@0 53 }
michael@0 54 Xptr = &X[len-2*stride-1];
michael@0 55 for (i=len-2*stride-1;i>=0;i--)
michael@0 56 {
michael@0 57 celt_norm x1, x2;
michael@0 58 x1 = Xptr[0];
michael@0 59 x2 = Xptr[stride];
michael@0 60 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
michael@0 61 *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
michael@0 62 }
michael@0 63 }
michael@0 64
michael@0 65 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
michael@0 66 {
michael@0 67 static const int SPREAD_FACTOR[3]={15,10,5};
michael@0 68 int i;
michael@0 69 opus_val16 c, s;
michael@0 70 opus_val16 gain, theta;
michael@0 71 int stride2=0;
michael@0 72 int factor;
michael@0 73
michael@0 74 if (2*K>=len || spread==SPREAD_NONE)
michael@0 75 return;
michael@0 76 factor = SPREAD_FACTOR[spread-1];
michael@0 77
michael@0 78 gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
michael@0 79 theta = HALF16(MULT16_16_Q15(gain,gain));
michael@0 80
michael@0 81 c = celt_cos_norm(EXTEND32(theta));
michael@0 82 s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */
michael@0 83
michael@0 84 if (len>=8*stride)
michael@0 85 {
michael@0 86 stride2 = 1;
michael@0 87 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
michael@0 88 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
michael@0 89 while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
michael@0 90 stride2++;
michael@0 91 }
michael@0 92 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
michael@0 93 extract_collapse_mask().*/
michael@0 94 len /= stride;
michael@0 95 for (i=0;i<stride;i++)
michael@0 96 {
michael@0 97 if (dir < 0)
michael@0 98 {
michael@0 99 if (stride2)
michael@0 100 exp_rotation1(X+i*len, len, stride2, s, c);
michael@0 101 exp_rotation1(X+i*len, len, 1, c, s);
michael@0 102 } else {
michael@0 103 exp_rotation1(X+i*len, len, 1, c, -s);
michael@0 104 if (stride2)
michael@0 105 exp_rotation1(X+i*len, len, stride2, s, -c);
michael@0 106 }
michael@0 107 }
michael@0 108 }
michael@0 109
michael@0 110 /** Takes the pitch vector and the decoded residual vector, computes the gain
michael@0 111 that will give ||p+g*y||=1 and mixes the residual with the pitch. */
michael@0 112 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
michael@0 113 int N, opus_val32 Ryy, opus_val16 gain)
michael@0 114 {
michael@0 115 int i;
michael@0 116 #ifdef FIXED_POINT
michael@0 117 int k;
michael@0 118 #endif
michael@0 119 opus_val32 t;
michael@0 120 opus_val16 g;
michael@0 121
michael@0 122 #ifdef FIXED_POINT
michael@0 123 k = celt_ilog2(Ryy)>>1;
michael@0 124 #endif
michael@0 125 t = VSHR32(Ryy, 2*(k-7));
michael@0 126 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
michael@0 127
michael@0 128 i=0;
michael@0 129 do
michael@0 130 X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
michael@0 131 while (++i < N);
michael@0 132 }
michael@0 133
michael@0 134 static unsigned extract_collapse_mask(int *iy, int N, int B)
michael@0 135 {
michael@0 136 unsigned collapse_mask;
michael@0 137 int N0;
michael@0 138 int i;
michael@0 139 if (B<=1)
michael@0 140 return 1;
michael@0 141 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
michael@0 142 exp_rotation().*/
michael@0 143 N0 = N/B;
michael@0 144 collapse_mask = 0;
michael@0 145 i=0; do {
michael@0 146 int j;
michael@0 147 j=0; do {
michael@0 148 collapse_mask |= (iy[i*N0+j]!=0)<<i;
michael@0 149 } while (++j<N0);
michael@0 150 } while (++i<B);
michael@0 151 return collapse_mask;
michael@0 152 }
michael@0 153
michael@0 154 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
michael@0 155 #ifdef RESYNTH
michael@0 156 , opus_val16 gain
michael@0 157 #endif
michael@0 158 )
michael@0 159 {
michael@0 160 VARDECL(celt_norm, y);
michael@0 161 VARDECL(int, iy);
michael@0 162 VARDECL(opus_val16, signx);
michael@0 163 int i, j;
michael@0 164 opus_val16 s;
michael@0 165 int pulsesLeft;
michael@0 166 opus_val32 sum;
michael@0 167 opus_val32 xy;
michael@0 168 opus_val16 yy;
michael@0 169 unsigned collapse_mask;
michael@0 170 SAVE_STACK;
michael@0 171
michael@0 172 celt_assert2(K>0, "alg_quant() needs at least one pulse");
michael@0 173 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
michael@0 174
michael@0 175 ALLOC(y, N, celt_norm);
michael@0 176 ALLOC(iy, N, int);
michael@0 177 ALLOC(signx, N, opus_val16);
michael@0 178
michael@0 179 exp_rotation(X, N, 1, B, K, spread);
michael@0 180
michael@0 181 /* Get rid of the sign */
michael@0 182 sum = 0;
michael@0 183 j=0; do {
michael@0 184 if (X[j]>0)
michael@0 185 signx[j]=1;
michael@0 186 else {
michael@0 187 signx[j]=-1;
michael@0 188 X[j]=-X[j];
michael@0 189 }
michael@0 190 iy[j] = 0;
michael@0 191 y[j] = 0;
michael@0 192 } while (++j<N);
michael@0 193
michael@0 194 xy = yy = 0;
michael@0 195
michael@0 196 pulsesLeft = K;
michael@0 197
michael@0 198 /* Do a pre-search by projecting on the pyramid */
michael@0 199 if (K > (N>>1))
michael@0 200 {
michael@0 201 opus_val16 rcp;
michael@0 202 j=0; do {
michael@0 203 sum += X[j];
michael@0 204 } while (++j<N);
michael@0 205
michael@0 206 /* If X is too small, just replace it with a pulse at 0 */
michael@0 207 #ifdef FIXED_POINT
michael@0 208 if (sum <= K)
michael@0 209 #else
michael@0 210 /* Prevents infinities and NaNs from causing too many pulses
michael@0 211 to be allocated. 64 is an approximation of infinity here. */
michael@0 212 if (!(sum > EPSILON && sum < 64))
michael@0 213 #endif
michael@0 214 {
michael@0 215 X[0] = QCONST16(1.f,14);
michael@0 216 j=1; do
michael@0 217 X[j]=0;
michael@0 218 while (++j<N);
michael@0 219 sum = QCONST16(1.f,14);
michael@0 220 }
michael@0 221 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
michael@0 222 j=0; do {
michael@0 223 #ifdef FIXED_POINT
michael@0 224 /* It's really important to round *towards zero* here */
michael@0 225 iy[j] = MULT16_16_Q15(X[j],rcp);
michael@0 226 #else
michael@0 227 iy[j] = (int)floor(rcp*X[j]);
michael@0 228 #endif
michael@0 229 y[j] = (celt_norm)iy[j];
michael@0 230 yy = MAC16_16(yy, y[j],y[j]);
michael@0 231 xy = MAC16_16(xy, X[j],y[j]);
michael@0 232 y[j] *= 2;
michael@0 233 pulsesLeft -= iy[j];
michael@0 234 } while (++j<N);
michael@0 235 }
michael@0 236 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
michael@0 237
michael@0 238 /* This should never happen, but just in case it does (e.g. on silence)
michael@0 239 we fill the first bin with pulses. */
michael@0 240 #ifdef FIXED_POINT_DEBUG
michael@0 241 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
michael@0 242 #endif
michael@0 243 if (pulsesLeft > N+3)
michael@0 244 {
michael@0 245 opus_val16 tmp = (opus_val16)pulsesLeft;
michael@0 246 yy = MAC16_16(yy, tmp, tmp);
michael@0 247 yy = MAC16_16(yy, tmp, y[0]);
michael@0 248 iy[0] += pulsesLeft;
michael@0 249 pulsesLeft=0;
michael@0 250 }
michael@0 251
michael@0 252 s = 1;
michael@0 253 for (i=0;i<pulsesLeft;i++)
michael@0 254 {
michael@0 255 int best_id;
michael@0 256 opus_val32 best_num = -VERY_LARGE16;
michael@0 257 opus_val16 best_den = 0;
michael@0 258 #ifdef FIXED_POINT
michael@0 259 int rshift;
michael@0 260 #endif
michael@0 261 #ifdef FIXED_POINT
michael@0 262 rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
michael@0 263 #endif
michael@0 264 best_id = 0;
michael@0 265 /* The squared magnitude term gets added anyway, so we might as well
michael@0 266 add it outside the loop */
michael@0 267 yy = ADD32(yy, 1);
michael@0 268 j=0;
michael@0 269 do {
michael@0 270 opus_val16 Rxy, Ryy;
michael@0 271 /* Temporary sums of the new pulse(s) */
michael@0 272 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
michael@0 273 /* We're multiplying y[j] by two so we don't have to do it here */
michael@0 274 Ryy = ADD16(yy, y[j]);
michael@0 275
michael@0 276 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
michael@0 277 Rxy is positive because the sign is pre-computed) */
michael@0 278 Rxy = MULT16_16_Q15(Rxy,Rxy);
michael@0 279 /* The idea is to check for num/den >= best_num/best_den, but that way
michael@0 280 we can do it without any division */
michael@0 281 /* OPT: Make sure to use conditional moves here */
michael@0 282 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
michael@0 283 {
michael@0 284 best_den = Ryy;
michael@0 285 best_num = Rxy;
michael@0 286 best_id = j;
michael@0 287 }
michael@0 288 } while (++j<N);
michael@0 289
michael@0 290 /* Updating the sums of the new pulse(s) */
michael@0 291 xy = ADD32(xy, EXTEND32(X[best_id]));
michael@0 292 /* We're multiplying y[j] by two so we don't have to do it here */
michael@0 293 yy = ADD16(yy, y[best_id]);
michael@0 294
michael@0 295 /* Only now that we've made the final choice, update y/iy */
michael@0 296 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
michael@0 297 y[best_id] += 2*s;
michael@0 298 iy[best_id]++;
michael@0 299 }
michael@0 300
michael@0 301 /* Put the original sign back */
michael@0 302 j=0;
michael@0 303 do {
michael@0 304 X[j] = MULT16_16(signx[j],X[j]);
michael@0 305 if (signx[j] < 0)
michael@0 306 iy[j] = -iy[j];
michael@0 307 } while (++j<N);
michael@0 308 encode_pulses(iy, N, K, enc);
michael@0 309
michael@0 310 #ifdef RESYNTH
michael@0 311 normalise_residual(iy, X, N, yy, gain);
michael@0 312 exp_rotation(X, N, -1, B, K, spread);
michael@0 313 #endif
michael@0 314
michael@0 315 collapse_mask = extract_collapse_mask(iy, N, B);
michael@0 316 RESTORE_STACK;
michael@0 317 return collapse_mask;
michael@0 318 }
michael@0 319
michael@0 320 /** Decode pulse vector and combine the result with the pitch vector to produce
michael@0 321 the final normalised signal in the current band. */
michael@0 322 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
michael@0 323 ec_dec *dec, opus_val16 gain)
michael@0 324 {
michael@0 325 int i;
michael@0 326 opus_val32 Ryy;
michael@0 327 unsigned collapse_mask;
michael@0 328 VARDECL(int, iy);
michael@0 329 SAVE_STACK;
michael@0 330
michael@0 331 celt_assert2(K>0, "alg_unquant() needs at least one pulse");
michael@0 332 celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
michael@0 333 ALLOC(iy, N, int);
michael@0 334 decode_pulses(iy, N, K, dec);
michael@0 335 Ryy = 0;
michael@0 336 i=0;
michael@0 337 do {
michael@0 338 Ryy = MAC16_16(Ryy, iy[i], iy[i]);
michael@0 339 } while (++i < N);
michael@0 340 normalise_residual(iy, X, N, Ryy, gain);
michael@0 341 exp_rotation(X, N, -1, B, K, spread);
michael@0 342 collapse_mask = extract_collapse_mask(iy, N, B);
michael@0 343 RESTORE_STACK;
michael@0 344 return collapse_mask;
michael@0 345 }
michael@0 346
michael@0 347 void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
michael@0 348 {
michael@0 349 int i;
michael@0 350 #ifdef FIXED_POINT
michael@0 351 int k;
michael@0 352 #endif
michael@0 353 opus_val32 E = EPSILON;
michael@0 354 opus_val16 g;
michael@0 355 opus_val32 t;
michael@0 356 celt_norm *xptr = X;
michael@0 357 for (i=0;i<N;i++)
michael@0 358 {
michael@0 359 E = MAC16_16(E, *xptr, *xptr);
michael@0 360 xptr++;
michael@0 361 }
michael@0 362 #ifdef FIXED_POINT
michael@0 363 k = celt_ilog2(E)>>1;
michael@0 364 #endif
michael@0 365 t = VSHR32(E, 2*(k-7));
michael@0 366 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
michael@0 367
michael@0 368 xptr = X;
michael@0 369 for (i=0;i<N;i++)
michael@0 370 {
michael@0 371 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
michael@0 372 xptr++;
michael@0 373 }
michael@0 374 /*return celt_sqrt(E);*/
michael@0 375 }
michael@0 376
michael@0 377 int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
michael@0 378 {
michael@0 379 int i;
michael@0 380 int itheta;
michael@0 381 opus_val16 mid, side;
michael@0 382 opus_val32 Emid, Eside;
michael@0 383
michael@0 384 Emid = Eside = EPSILON;
michael@0 385 if (stereo)
michael@0 386 {
michael@0 387 for (i=0;i<N;i++)
michael@0 388 {
michael@0 389 celt_norm m, s;
michael@0 390 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
michael@0 391 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
michael@0 392 Emid = MAC16_16(Emid, m, m);
michael@0 393 Eside = MAC16_16(Eside, s, s);
michael@0 394 }
michael@0 395 } else {
michael@0 396 for (i=0;i<N;i++)
michael@0 397 {
michael@0 398 celt_norm m, s;
michael@0 399 m = X[i];
michael@0 400 s = Y[i];
michael@0 401 Emid = MAC16_16(Emid, m, m);
michael@0 402 Eside = MAC16_16(Eside, s, s);
michael@0 403 }
michael@0 404 }
michael@0 405 mid = celt_sqrt(Emid);
michael@0 406 side = celt_sqrt(Eside);
michael@0 407 #ifdef FIXED_POINT
michael@0 408 /* 0.63662 = 2/pi */
michael@0 409 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
michael@0 410 #else
michael@0 411 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
michael@0 412 #endif
michael@0 413
michael@0 414 return itheta;
michael@0 415 }

mercurial