The Tor Browser: media/libopus/celt/vq.c@b8a032363ba2

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* Copyright (c) 2007-2008 CSIRO

     2    Copyright (c) 2007-2009 Xiph.Org Foundation

     3    Written by Jean-Marc Valin */

     4 /*

     5    Redistribution and use in source and binary forms, with or without

     6    modification, are permitted provided that the following conditions

     7    are met:

     9    - Redistributions of source code must retain the above copyright

    10    notice, this list of conditions and the following disclaimer.

    12    - Redistributions in binary form must reproduce the above copyright

    13    notice, this list of conditions and the following disclaimer in the

    14    documentation and/or other materials provided with the distribution.

    16    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

    17    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

    18    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

    19    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

    20    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

    21    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

    22    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

    23    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

    24    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

    25    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

    26    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

    27 */

    29 #ifdef HAVE_CONFIG_H

    30 #include "config.h"

    31 #endif

    33 #include "mathops.h"

    34 #include "cwrs.h"

    35 #include "vq.h"

    36 #include "arch.h"

    37 #include "os_support.h"

    38 #include "bands.h"

    39 #include "rate.h"

    41 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)

    42 {

    43    int i;

    44    celt_norm *Xptr;

    45    Xptr = X;

    46    for (i=0;i<len-stride;i++)

    47    {

    48       celt_norm x1, x2;

    49       x1 = Xptr[0];

    50       x2 = Xptr[stride];

    51       Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));

    52       *Xptr++      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));

    53    }

    54    Xptr = &X[len-2*stride-1];

    55    for (i=len-2*stride-1;i>=0;i--)

    56    {

    57       celt_norm x1, x2;

    58       x1 = Xptr[0];

    59       x2 = Xptr[stride];

    60       Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));

    61       *Xptr--      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));

    62    }

    63 }

    65 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)

    66 {

    67    static const int SPREAD_FACTOR[3]={15,10,5};

    68    int i;

    69    opus_val16 c, s;

    70    opus_val16 gain, theta;

    71    int stride2=0;

    72    int factor;

    74    if (2*K>=len || spread==SPREAD_NONE)

    75       return;

    76    factor = SPREAD_FACTOR[spread-1];

    78    gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));

    79    theta = HALF16(MULT16_16_Q15(gain,gain));

    81    c = celt_cos_norm(EXTEND32(theta));

    82    s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */

    84    if (len>=8*stride)

    85    {

    86       stride2 = 1;

    87       /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.

    88          It's basically incrementing long as (stride2+0.5)^2 < len/stride. */

    89       while ((stride2*stride2+stride2)*stride + (stride>>2) < len)

    90          stride2++;

    91    }

    92    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for

    93       extract_collapse_mask().*/

    94    len /= stride;

    95    for (i=0;i<stride;i++)

    96    {

    97       if (dir < 0)

    98       {

    99          if (stride2)

   100             exp_rotation1(X+i*len, len, stride2, s, c);

   101          exp_rotation1(X+i*len, len, 1, c, s);

   102       } else {

   103          exp_rotation1(X+i*len, len, 1, c, -s);

   104          if (stride2)

   105             exp_rotation1(X+i*len, len, stride2, s, -c);

   106       }

   107    }

   108 }

   110 /** Takes the pitch vector and the decoded residual vector, computes the gain

   111     that will give ||p+g*y||=1 and mixes the residual with the pitch. */

   112 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,

   113       int N, opus_val32 Ryy, opus_val16 gain)

   114 {

   115    int i;

   116 #ifdef FIXED_POINT

   117    int k;

   118 #endif

   119    opus_val32 t;

   120    opus_val16 g;

   122 #ifdef FIXED_POINT

   123    k = celt_ilog2(Ryy)>>1;

   124 #endif

   125    t = VSHR32(Ryy, 2*(k-7));

   126    g = MULT16_16_P15(celt_rsqrt_norm(t),gain);

   128    i=0;

   129    do

   130       X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));

   131    while (++i < N);

   132 }

   134 static unsigned extract_collapse_mask(int *iy, int N, int B)

   135 {

   136    unsigned collapse_mask;

   137    int N0;

   138    int i;

   139    if (B<=1)

   140       return 1;

   141    /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for

   142       exp_rotation().*/

   143    N0 = N/B;

   144    collapse_mask = 0;

   145    i=0; do {

   146       int j;

   147       j=0; do {

   148          collapse_mask |= (iy[i*N0+j]!=0)<<i;

   149       } while (++j<N0);

   150    } while (++i<B);

   151    return collapse_mask;

   152 }

   154 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc

   155 #ifdef RESYNTH

   156    , opus_val16 gain

   157 #endif

   158    )

   159 {

   160    VARDECL(celt_norm, y);

   161    VARDECL(int, iy);

   162    VARDECL(opus_val16, signx);

   163    int i, j;

   164    opus_val16 s;

   165    int pulsesLeft;

   166    opus_val32 sum;

   167    opus_val32 xy;

   168    opus_val16 yy;

   169    unsigned collapse_mask;

   170    SAVE_STACK;

   172    celt_assert2(K>0, "alg_quant() needs at least one pulse");

   173    celt_assert2(N>1, "alg_quant() needs at least two dimensions");

   175    ALLOC(y, N, celt_norm);

   176    ALLOC(iy, N, int);

   177    ALLOC(signx, N, opus_val16);

   179    exp_rotation(X, N, 1, B, K, spread);

   181    /* Get rid of the sign */

   182    sum = 0;

   183    j=0; do {

   184       if (X[j]>0)

   185          signx[j]=1;

   186       else {

   187          signx[j]=-1;

   188          X[j]=-X[j];

   189       }

   190       iy[j] = 0;

   191       y[j] = 0;

   192    } while (++j<N);

   194    xy = yy = 0;

   196    pulsesLeft = K;

   198    /* Do a pre-search by projecting on the pyramid */

   199    if (K > (N>>1))

   200    {

   201       opus_val16 rcp;

   202       j=0; do {

   203          sum += X[j];

   204       }  while (++j<N);

   206       /* If X is too small, just replace it with a pulse at 0 */

   207 #ifdef FIXED_POINT

   208       if (sum <= K)

   209 #else

   210       /* Prevents infinities and NaNs from causing too many pulses

   211          to be allocated. 64 is an approximation of infinity here. */

   212       if (!(sum > EPSILON && sum < 64))

   213 #endif

   214       {

   215          X[0] = QCONST16(1.f,14);

   216          j=1; do

   217             X[j]=0;

   218          while (++j<N);

   219          sum = QCONST16(1.f,14);

   220       }

   221       rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));

   222       j=0; do {

   223 #ifdef FIXED_POINT

   224          /* It's really important to round *towards zero* here */

   225          iy[j] = MULT16_16_Q15(X[j],rcp);

   226 #else

   227          iy[j] = (int)floor(rcp*X[j]);

   228 #endif

   229          y[j] = (celt_norm)iy[j];

   230          yy = MAC16_16(yy, y[j],y[j]);

   231          xy = MAC16_16(xy, X[j],y[j]);

   232          y[j] *= 2;

   233          pulsesLeft -= iy[j];

   234       }  while (++j<N);

   235    }

   236    celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");

   238    /* This should never happen, but just in case it does (e.g. on silence)

   239       we fill the first bin with pulses. */

   240 #ifdef FIXED_POINT_DEBUG

   241    celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");

   242 #endif

   243    if (pulsesLeft > N+3)

   244    {

   245       opus_val16 tmp = (opus_val16)pulsesLeft;

   246       yy = MAC16_16(yy, tmp, tmp);

   247       yy = MAC16_16(yy, tmp, y[0]);

   248       iy[0] += pulsesLeft;

   249       pulsesLeft=0;

   250    }

   252    s = 1;

   253    for (i=0;i<pulsesLeft;i++)

   254    {

   255       int best_id;

   256       opus_val32 best_num = -VERY_LARGE16;

   257       opus_val16 best_den = 0;

   258 #ifdef FIXED_POINT

   259       int rshift;

   260 #endif

   261 #ifdef FIXED_POINT

   262       rshift = 1+celt_ilog2(K-pulsesLeft+i+1);

   263 #endif

   264       best_id = 0;

   265       /* The squared magnitude term gets added anyway, so we might as well

   266          add it outside the loop */

   267       yy = ADD32(yy, 1);

   268       j=0;

   269       do {

   270          opus_val16 Rxy, Ryy;

   271          /* Temporary sums of the new pulse(s) */

   272          Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));

   273          /* We're multiplying y[j] by two so we don't have to do it here */

   274          Ryy = ADD16(yy, y[j]);

   276          /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that

   277             Rxy is positive because the sign is pre-computed) */

   278          Rxy = MULT16_16_Q15(Rxy,Rxy);

   279          /* The idea is to check for num/den >= best_num/best_den, but that way

   280             we can do it without any division */

   281          /* OPT: Make sure to use conditional moves here */

   282          if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))

   283          {

   284             best_den = Ryy;

   285             best_num = Rxy;

   286             best_id = j;

   287          }

   288       } while (++j<N);

   290       /* Updating the sums of the new pulse(s) */

   291       xy = ADD32(xy, EXTEND32(X[best_id]));

   292       /* We're multiplying y[j] by two so we don't have to do it here */

   293       yy = ADD16(yy, y[best_id]);

   295       /* Only now that we've made the final choice, update y/iy */

   296       /* Multiplying y[j] by 2 so we don't have to do it everywhere else */

   297       y[best_id] += 2*s;

   298       iy[best_id]++;

   299    }

   301    /* Put the original sign back */

   302    j=0;

   303    do {

   304       X[j] = MULT16_16(signx[j],X[j]);

   305       if (signx[j] < 0)

   306          iy[j] = -iy[j];

   307    } while (++j<N);

   308    encode_pulses(iy, N, K, enc);

   310 #ifdef RESYNTH

   311    normalise_residual(iy, X, N, yy, gain);

   312    exp_rotation(X, N, -1, B, K, spread);

   313 #endif

   315    collapse_mask = extract_collapse_mask(iy, N, B);

   316    RESTORE_STACK;

   317    return collapse_mask;

   318 }

   320 /** Decode pulse vector and combine the result with the pitch vector to produce

   321     the final normalised signal in the current band. */

   322 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,

   323       ec_dec *dec, opus_val16 gain)

   324 {

   325    int i;

   326    opus_val32 Ryy;

   327    unsigned collapse_mask;

   328    VARDECL(int, iy);

   329    SAVE_STACK;

   331    celt_assert2(K>0, "alg_unquant() needs at least one pulse");

   332    celt_assert2(N>1, "alg_unquant() needs at least two dimensions");

   333    ALLOC(iy, N, int);

   334    decode_pulses(iy, N, K, dec);

   335    Ryy = 0;

   336    i=0;

   337    do {

   338       Ryy = MAC16_16(Ryy, iy[i], iy[i]);

   339    } while (++i < N);

   340    normalise_residual(iy, X, N, Ryy, gain);

   341    exp_rotation(X, N, -1, B, K, spread);

   342    collapse_mask = extract_collapse_mask(iy, N, B);

   343    RESTORE_STACK;

   344    return collapse_mask;

   345 }

   347 void renormalise_vector(celt_norm *X, int N, opus_val16 gain)

   348 {

   349    int i;

   350 #ifdef FIXED_POINT

   351    int k;

   352 #endif

   353    opus_val32 E = EPSILON;

   354    opus_val16 g;

   355    opus_val32 t;

   356    celt_norm *xptr = X;

   357    for (i=0;i<N;i++)

   358    {

   359       E = MAC16_16(E, *xptr, *xptr);

   360       xptr++;

   361    }

   362 #ifdef FIXED_POINT

   363    k = celt_ilog2(E)>>1;

   364 #endif

   365    t = VSHR32(E, 2*(k-7));

   366    g = MULT16_16_P15(celt_rsqrt_norm(t),gain);

   368    xptr = X;

   369    for (i=0;i<N;i++)

   370    {

   371       *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));

   372       xptr++;

   373    }

   374    /*return celt_sqrt(E);*/

   375 }

   377 int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)

   378 {

   379    int i;

   380    int itheta;

   381    opus_val16 mid, side;

   382    opus_val32 Emid, Eside;

   384    Emid = Eside = EPSILON;

   385    if (stereo)

   386    {

   387       for (i=0;i<N;i++)

   388       {

   389          celt_norm m, s;

   390          m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));

   391          s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));

   392          Emid = MAC16_16(Emid, m, m);

   393          Eside = MAC16_16(Eside, s, s);

   394       }

   395    } else {

   396       for (i=0;i<N;i++)

   397       {

   398          celt_norm m, s;

   399          m = X[i];

   400          s = Y[i];

   401          Emid = MAC16_16(Emid, m, m);

   402          Eside = MAC16_16(Eside, s, s);

   403       }

   404    }

   405    mid = celt_sqrt(Emid);

   406    side = celt_sqrt(Eside);

   407 #ifdef FIXED_POINT

   408    /* 0.63662 = 2/pi */

   409    itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));

   410 #else

   411    itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));

   412 #endif

   414    return itheta;

   415 }

The Tor Browser / file revision

media/libopus/celt/vq.c@b8a032363ba2

media/libopus/celt/vq.c