|
1 /* Copyright (c) 2007-2008 CSIRO |
|
2 Copyright (c) 2007-2009 Xiph.Org Foundation |
|
3 Written by Jean-Marc Valin */ |
|
4 /* |
|
5 Redistribution and use in source and binary forms, with or without |
|
6 modification, are permitted provided that the following conditions |
|
7 are met: |
|
8 |
|
9 - Redistributions of source code must retain the above copyright |
|
10 notice, this list of conditions and the following disclaimer. |
|
11 |
|
12 - Redistributions in binary form must reproduce the above copyright |
|
13 notice, this list of conditions and the following disclaimer in the |
|
14 documentation and/or other materials provided with the distribution. |
|
15 |
|
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
|
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #ifdef HAVE_CONFIG_H |
|
30 #include "config.h" |
|
31 #endif |
|
32 |
|
33 #include "mathops.h" |
|
34 #include "cwrs.h" |
|
35 #include "vq.h" |
|
36 #include "arch.h" |
|
37 #include "os_support.h" |
|
38 #include "bands.h" |
|
39 #include "rate.h" |
|
40 |
|
41 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) |
|
42 { |
|
43 int i; |
|
44 celt_norm *Xptr; |
|
45 Xptr = X; |
|
46 for (i=0;i<len-stride;i++) |
|
47 { |
|
48 celt_norm x1, x2; |
|
49 x1 = Xptr[0]; |
|
50 x2 = Xptr[stride]; |
|
51 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); |
|
52 *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); |
|
53 } |
|
54 Xptr = &X[len-2*stride-1]; |
|
55 for (i=len-2*stride-1;i>=0;i--) |
|
56 { |
|
57 celt_norm x1, x2; |
|
58 x1 = Xptr[0]; |
|
59 x2 = Xptr[stride]; |
|
60 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); |
|
61 *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); |
|
62 } |
|
63 } |
|
64 |
|
65 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) |
|
66 { |
|
67 static const int SPREAD_FACTOR[3]={15,10,5}; |
|
68 int i; |
|
69 opus_val16 c, s; |
|
70 opus_val16 gain, theta; |
|
71 int stride2=0; |
|
72 int factor; |
|
73 |
|
74 if (2*K>=len || spread==SPREAD_NONE) |
|
75 return; |
|
76 factor = SPREAD_FACTOR[spread-1]; |
|
77 |
|
78 gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); |
|
79 theta = HALF16(MULT16_16_Q15(gain,gain)); |
|
80 |
|
81 c = celt_cos_norm(EXTEND32(theta)); |
|
82 s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ |
|
83 |
|
84 if (len>=8*stride) |
|
85 { |
|
86 stride2 = 1; |
|
87 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. |
|
88 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ |
|
89 while ((stride2*stride2+stride2)*stride + (stride>>2) < len) |
|
90 stride2++; |
|
91 } |
|
92 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
|
93 extract_collapse_mask().*/ |
|
94 len /= stride; |
|
95 for (i=0;i<stride;i++) |
|
96 { |
|
97 if (dir < 0) |
|
98 { |
|
99 if (stride2) |
|
100 exp_rotation1(X+i*len, len, stride2, s, c); |
|
101 exp_rotation1(X+i*len, len, 1, c, s); |
|
102 } else { |
|
103 exp_rotation1(X+i*len, len, 1, c, -s); |
|
104 if (stride2) |
|
105 exp_rotation1(X+i*len, len, stride2, s, -c); |
|
106 } |
|
107 } |
|
108 } |
|
109 |
|
110 /** Takes the pitch vector and the decoded residual vector, computes the gain |
|
111 that will give ||p+g*y||=1 and mixes the residual with the pitch. */ |
|
112 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, |
|
113 int N, opus_val32 Ryy, opus_val16 gain) |
|
114 { |
|
115 int i; |
|
116 #ifdef FIXED_POINT |
|
117 int k; |
|
118 #endif |
|
119 opus_val32 t; |
|
120 opus_val16 g; |
|
121 |
|
122 #ifdef FIXED_POINT |
|
123 k = celt_ilog2(Ryy)>>1; |
|
124 #endif |
|
125 t = VSHR32(Ryy, 2*(k-7)); |
|
126 g = MULT16_16_P15(celt_rsqrt_norm(t),gain); |
|
127 |
|
128 i=0; |
|
129 do |
|
130 X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); |
|
131 while (++i < N); |
|
132 } |
|
133 |
|
134 static unsigned extract_collapse_mask(int *iy, int N, int B) |
|
135 { |
|
136 unsigned collapse_mask; |
|
137 int N0; |
|
138 int i; |
|
139 if (B<=1) |
|
140 return 1; |
|
141 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
|
142 exp_rotation().*/ |
|
143 N0 = N/B; |
|
144 collapse_mask = 0; |
|
145 i=0; do { |
|
146 int j; |
|
147 j=0; do { |
|
148 collapse_mask |= (iy[i*N0+j]!=0)<<i; |
|
149 } while (++j<N0); |
|
150 } while (++i<B); |
|
151 return collapse_mask; |
|
152 } |
|
153 |
|
154 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc |
|
155 #ifdef RESYNTH |
|
156 , opus_val16 gain |
|
157 #endif |
|
158 ) |
|
159 { |
|
160 VARDECL(celt_norm, y); |
|
161 VARDECL(int, iy); |
|
162 VARDECL(opus_val16, signx); |
|
163 int i, j; |
|
164 opus_val16 s; |
|
165 int pulsesLeft; |
|
166 opus_val32 sum; |
|
167 opus_val32 xy; |
|
168 opus_val16 yy; |
|
169 unsigned collapse_mask; |
|
170 SAVE_STACK; |
|
171 |
|
172 celt_assert2(K>0, "alg_quant() needs at least one pulse"); |
|
173 celt_assert2(N>1, "alg_quant() needs at least two dimensions"); |
|
174 |
|
175 ALLOC(y, N, celt_norm); |
|
176 ALLOC(iy, N, int); |
|
177 ALLOC(signx, N, opus_val16); |
|
178 |
|
179 exp_rotation(X, N, 1, B, K, spread); |
|
180 |
|
181 /* Get rid of the sign */ |
|
182 sum = 0; |
|
183 j=0; do { |
|
184 if (X[j]>0) |
|
185 signx[j]=1; |
|
186 else { |
|
187 signx[j]=-1; |
|
188 X[j]=-X[j]; |
|
189 } |
|
190 iy[j] = 0; |
|
191 y[j] = 0; |
|
192 } while (++j<N); |
|
193 |
|
194 xy = yy = 0; |
|
195 |
|
196 pulsesLeft = K; |
|
197 |
|
198 /* Do a pre-search by projecting on the pyramid */ |
|
199 if (K > (N>>1)) |
|
200 { |
|
201 opus_val16 rcp; |
|
202 j=0; do { |
|
203 sum += X[j]; |
|
204 } while (++j<N); |
|
205 |
|
206 /* If X is too small, just replace it with a pulse at 0 */ |
|
207 #ifdef FIXED_POINT |
|
208 if (sum <= K) |
|
209 #else |
|
210 /* Prevents infinities and NaNs from causing too many pulses |
|
211 to be allocated. 64 is an approximation of infinity here. */ |
|
212 if (!(sum > EPSILON && sum < 64)) |
|
213 #endif |
|
214 { |
|
215 X[0] = QCONST16(1.f,14); |
|
216 j=1; do |
|
217 X[j]=0; |
|
218 while (++j<N); |
|
219 sum = QCONST16(1.f,14); |
|
220 } |
|
221 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); |
|
222 j=0; do { |
|
223 #ifdef FIXED_POINT |
|
224 /* It's really important to round *towards zero* here */ |
|
225 iy[j] = MULT16_16_Q15(X[j],rcp); |
|
226 #else |
|
227 iy[j] = (int)floor(rcp*X[j]); |
|
228 #endif |
|
229 y[j] = (celt_norm)iy[j]; |
|
230 yy = MAC16_16(yy, y[j],y[j]); |
|
231 xy = MAC16_16(xy, X[j],y[j]); |
|
232 y[j] *= 2; |
|
233 pulsesLeft -= iy[j]; |
|
234 } while (++j<N); |
|
235 } |
|
236 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); |
|
237 |
|
238 /* This should never happen, but just in case it does (e.g. on silence) |
|
239 we fill the first bin with pulses. */ |
|
240 #ifdef FIXED_POINT_DEBUG |
|
241 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); |
|
242 #endif |
|
243 if (pulsesLeft > N+3) |
|
244 { |
|
245 opus_val16 tmp = (opus_val16)pulsesLeft; |
|
246 yy = MAC16_16(yy, tmp, tmp); |
|
247 yy = MAC16_16(yy, tmp, y[0]); |
|
248 iy[0] += pulsesLeft; |
|
249 pulsesLeft=0; |
|
250 } |
|
251 |
|
252 s = 1; |
|
253 for (i=0;i<pulsesLeft;i++) |
|
254 { |
|
255 int best_id; |
|
256 opus_val32 best_num = -VERY_LARGE16; |
|
257 opus_val16 best_den = 0; |
|
258 #ifdef FIXED_POINT |
|
259 int rshift; |
|
260 #endif |
|
261 #ifdef FIXED_POINT |
|
262 rshift = 1+celt_ilog2(K-pulsesLeft+i+1); |
|
263 #endif |
|
264 best_id = 0; |
|
265 /* The squared magnitude term gets added anyway, so we might as well |
|
266 add it outside the loop */ |
|
267 yy = ADD32(yy, 1); |
|
268 j=0; |
|
269 do { |
|
270 opus_val16 Rxy, Ryy; |
|
271 /* Temporary sums of the new pulse(s) */ |
|
272 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); |
|
273 /* We're multiplying y[j] by two so we don't have to do it here */ |
|
274 Ryy = ADD16(yy, y[j]); |
|
275 |
|
276 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that |
|
277 Rxy is positive because the sign is pre-computed) */ |
|
278 Rxy = MULT16_16_Q15(Rxy,Rxy); |
|
279 /* The idea is to check for num/den >= best_num/best_den, but that way |
|
280 we can do it without any division */ |
|
281 /* OPT: Make sure to use conditional moves here */ |
|
282 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) |
|
283 { |
|
284 best_den = Ryy; |
|
285 best_num = Rxy; |
|
286 best_id = j; |
|
287 } |
|
288 } while (++j<N); |
|
289 |
|
290 /* Updating the sums of the new pulse(s) */ |
|
291 xy = ADD32(xy, EXTEND32(X[best_id])); |
|
292 /* We're multiplying y[j] by two so we don't have to do it here */ |
|
293 yy = ADD16(yy, y[best_id]); |
|
294 |
|
295 /* Only now that we've made the final choice, update y/iy */ |
|
296 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ |
|
297 y[best_id] += 2*s; |
|
298 iy[best_id]++; |
|
299 } |
|
300 |
|
301 /* Put the original sign back */ |
|
302 j=0; |
|
303 do { |
|
304 X[j] = MULT16_16(signx[j],X[j]); |
|
305 if (signx[j] < 0) |
|
306 iy[j] = -iy[j]; |
|
307 } while (++j<N); |
|
308 encode_pulses(iy, N, K, enc); |
|
309 |
|
310 #ifdef RESYNTH |
|
311 normalise_residual(iy, X, N, yy, gain); |
|
312 exp_rotation(X, N, -1, B, K, spread); |
|
313 #endif |
|
314 |
|
315 collapse_mask = extract_collapse_mask(iy, N, B); |
|
316 RESTORE_STACK; |
|
317 return collapse_mask; |
|
318 } |
|
319 |
|
320 /** Decode pulse vector and combine the result with the pitch vector to produce |
|
321 the final normalised signal in the current band. */ |
|
322 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
|
323 ec_dec *dec, opus_val16 gain) |
|
324 { |
|
325 int i; |
|
326 opus_val32 Ryy; |
|
327 unsigned collapse_mask; |
|
328 VARDECL(int, iy); |
|
329 SAVE_STACK; |
|
330 |
|
331 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); |
|
332 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); |
|
333 ALLOC(iy, N, int); |
|
334 decode_pulses(iy, N, K, dec); |
|
335 Ryy = 0; |
|
336 i=0; |
|
337 do { |
|
338 Ryy = MAC16_16(Ryy, iy[i], iy[i]); |
|
339 } while (++i < N); |
|
340 normalise_residual(iy, X, N, Ryy, gain); |
|
341 exp_rotation(X, N, -1, B, K, spread); |
|
342 collapse_mask = extract_collapse_mask(iy, N, B); |
|
343 RESTORE_STACK; |
|
344 return collapse_mask; |
|
345 } |
|
346 |
|
347 void renormalise_vector(celt_norm *X, int N, opus_val16 gain) |
|
348 { |
|
349 int i; |
|
350 #ifdef FIXED_POINT |
|
351 int k; |
|
352 #endif |
|
353 opus_val32 E = EPSILON; |
|
354 opus_val16 g; |
|
355 opus_val32 t; |
|
356 celt_norm *xptr = X; |
|
357 for (i=0;i<N;i++) |
|
358 { |
|
359 E = MAC16_16(E, *xptr, *xptr); |
|
360 xptr++; |
|
361 } |
|
362 #ifdef FIXED_POINT |
|
363 k = celt_ilog2(E)>>1; |
|
364 #endif |
|
365 t = VSHR32(E, 2*(k-7)); |
|
366 g = MULT16_16_P15(celt_rsqrt_norm(t),gain); |
|
367 |
|
368 xptr = X; |
|
369 for (i=0;i<N;i++) |
|
370 { |
|
371 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); |
|
372 xptr++; |
|
373 } |
|
374 /*return celt_sqrt(E);*/ |
|
375 } |
|
376 |
|
377 int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) |
|
378 { |
|
379 int i; |
|
380 int itheta; |
|
381 opus_val16 mid, side; |
|
382 opus_val32 Emid, Eside; |
|
383 |
|
384 Emid = Eside = EPSILON; |
|
385 if (stereo) |
|
386 { |
|
387 for (i=0;i<N;i++) |
|
388 { |
|
389 celt_norm m, s; |
|
390 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); |
|
391 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); |
|
392 Emid = MAC16_16(Emid, m, m); |
|
393 Eside = MAC16_16(Eside, s, s); |
|
394 } |
|
395 } else { |
|
396 for (i=0;i<N;i++) |
|
397 { |
|
398 celt_norm m, s; |
|
399 m = X[i]; |
|
400 s = Y[i]; |
|
401 Emid = MAC16_16(Emid, m, m); |
|
402 Eside = MAC16_16(Eside, s, s); |
|
403 } |
|
404 } |
|
405 mid = celt_sqrt(Emid); |
|
406 side = celt_sqrt(Eside); |
|
407 #ifdef FIXED_POINT |
|
408 /* 0.63662 = 2/pi */ |
|
409 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); |
|
410 #else |
|
411 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); |
|
412 #endif |
|
413 |
|
414 return itheta; |
|
415 } |