|
1 /*********************************************************************** |
|
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved. |
|
3 Redistribution and use in source and binary forms, with or without |
|
4 modification, are permitted provided that the following conditions |
|
5 are met: |
|
6 - Redistributions of source code must retain the above copyright notice, |
|
7 this list of conditions and the following disclaimer. |
|
8 - Redistributions in binary form must reproduce the above copyright |
|
9 notice, this list of conditions and the following disclaimer in the |
|
10 documentation and/or other materials provided with the distribution. |
|
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the |
|
12 names of specific contributors, may be used to endorse or promote |
|
13 products derived from this software without specific prior written |
|
14 permission. |
|
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
25 POSSIBILITY OF SUCH DAMAGE. |
|
26 ***********************************************************************/ |
|
27 |
|
28 #ifdef HAVE_CONFIG_H |
|
29 #include "config.h" |
|
30 #endif |
|
31 |
|
32 #include "main_FLP.h" |
|
33 #include "tuning_parameters.h" |
|
34 |
|
35 /* Compute gain to make warped filter coefficients have a zero mean log frequency response on a */ |
|
36 /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */ |
|
37 /* Note: A monic filter is one with the first coefficient equal to 1.0. In Silk we omit the first */ |
|
38 /* coefficient in an array of coefficients, for monic filters. */ |
|
39 static OPUS_INLINE silk_float warped_gain( |
|
40 const silk_float *coefs, |
|
41 silk_float lambda, |
|
42 opus_int order |
|
43 ) { |
|
44 opus_int i; |
|
45 silk_float gain; |
|
46 |
|
47 lambda = -lambda; |
|
48 gain = coefs[ order - 1 ]; |
|
49 for( i = order - 2; i >= 0; i-- ) { |
|
50 gain = lambda * gain + coefs[ i ]; |
|
51 } |
|
52 return (silk_float)( 1.0f / ( 1.0f - lambda * gain ) ); |
|
53 } |
|
54 |
|
55 /* Convert warped filter coefficients to monic pseudo-warped coefficients and limit maximum */ |
|
56 /* amplitude of monic warped coefficients by using bandwidth expansion on the true coefficients */ |
|
57 static OPUS_INLINE void warped_true2monic_coefs( |
|
58 silk_float *coefs_syn, |
|
59 silk_float *coefs_ana, |
|
60 silk_float lambda, |
|
61 silk_float limit, |
|
62 opus_int order |
|
63 ) { |
|
64 opus_int i, iter, ind = 0; |
|
65 silk_float tmp, maxabs, chirp, gain_syn, gain_ana; |
|
66 |
|
67 /* Convert to monic coefficients */ |
|
68 for( i = order - 1; i > 0; i-- ) { |
|
69 coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; |
|
70 coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; |
|
71 } |
|
72 gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); |
|
73 gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); |
|
74 for( i = 0; i < order; i++ ) { |
|
75 coefs_syn[ i ] *= gain_syn; |
|
76 coefs_ana[ i ] *= gain_ana; |
|
77 } |
|
78 |
|
79 /* Limit */ |
|
80 for( iter = 0; iter < 10; iter++ ) { |
|
81 /* Find maximum absolute value */ |
|
82 maxabs = -1.0f; |
|
83 for( i = 0; i < order; i++ ) { |
|
84 tmp = silk_max( silk_abs_float( coefs_syn[ i ] ), silk_abs_float( coefs_ana[ i ] ) ); |
|
85 if( tmp > maxabs ) { |
|
86 maxabs = tmp; |
|
87 ind = i; |
|
88 } |
|
89 } |
|
90 if( maxabs <= limit ) { |
|
91 /* Coefficients are within range - done */ |
|
92 return; |
|
93 } |
|
94 |
|
95 /* Convert back to true warped coefficients */ |
|
96 for( i = 1; i < order; i++ ) { |
|
97 coefs_syn[ i - 1 ] += lambda * coefs_syn[ i ]; |
|
98 coefs_ana[ i - 1 ] += lambda * coefs_ana[ i ]; |
|
99 } |
|
100 gain_syn = 1.0f / gain_syn; |
|
101 gain_ana = 1.0f / gain_ana; |
|
102 for( i = 0; i < order; i++ ) { |
|
103 coefs_syn[ i ] *= gain_syn; |
|
104 coefs_ana[ i ] *= gain_ana; |
|
105 } |
|
106 |
|
107 /* Apply bandwidth expansion */ |
|
108 chirp = 0.99f - ( 0.8f + 0.1f * iter ) * ( maxabs - limit ) / ( maxabs * ( ind + 1 ) ); |
|
109 silk_bwexpander_FLP( coefs_syn, order, chirp ); |
|
110 silk_bwexpander_FLP( coefs_ana, order, chirp ); |
|
111 |
|
112 /* Convert to monic warped coefficients */ |
|
113 for( i = order - 1; i > 0; i-- ) { |
|
114 coefs_syn[ i - 1 ] -= lambda * coefs_syn[ i ]; |
|
115 coefs_ana[ i - 1 ] -= lambda * coefs_ana[ i ]; |
|
116 } |
|
117 gain_syn = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_syn[ 0 ] ); |
|
118 gain_ana = ( 1.0f - lambda * lambda ) / ( 1.0f + lambda * coefs_ana[ 0 ] ); |
|
119 for( i = 0; i < order; i++ ) { |
|
120 coefs_syn[ i ] *= gain_syn; |
|
121 coefs_ana[ i ] *= gain_ana; |
|
122 } |
|
123 } |
|
124 silk_assert( 0 ); |
|
125 } |
|
126 |
|
127 /* Compute noise shaping coefficients and initial gain values */ |
|
128 void silk_noise_shape_analysis_FLP( |
|
129 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ |
|
130 silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ |
|
131 const silk_float *pitch_res, /* I LPC residual from pitch analysis */ |
|
132 const silk_float *x /* I Input signal [frame_length + la_shape] */ |
|
133 ) |
|
134 { |
|
135 silk_shape_state_FLP *psShapeSt = &psEnc->sShape; |
|
136 opus_int k, nSamples; |
|
137 silk_float SNR_adj_dB, HarmBoost, HarmShapeGain, Tilt; |
|
138 silk_float nrg, pre_nrg, log_energy, log_energy_prev, energy_variation; |
|
139 silk_float delta, BWExp1, BWExp2, gain_mult, gain_add, strength, b, warping; |
|
140 silk_float x_windowed[ SHAPE_LPC_WIN_MAX ]; |
|
141 silk_float auto_corr[ MAX_SHAPE_LPC_ORDER + 1 ]; |
|
142 const silk_float *x_ptr, *pitch_res_ptr; |
|
143 |
|
144 /* Point to start of first LPC analysis block */ |
|
145 x_ptr = x - psEnc->sCmn.la_shape; |
|
146 |
|
147 /****************/ |
|
148 /* GAIN CONTROL */ |
|
149 /****************/ |
|
150 SNR_adj_dB = psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ); |
|
151 |
|
152 /* Input quality is the average of the quality in the lowest two VAD bands */ |
|
153 psEncCtrl->input_quality = 0.5f * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] + psEnc->sCmn.input_quality_bands_Q15[ 1 ] ) * ( 1.0f / 32768.0f ); |
|
154 |
|
155 /* Coding quality level, between 0.0 and 1.0 */ |
|
156 psEncCtrl->coding_quality = silk_sigmoid( 0.25f * ( SNR_adj_dB - 20.0f ) ); |
|
157 |
|
158 if( psEnc->sCmn.useCBR == 0 ) { |
|
159 /* Reduce coding SNR during low speech activity */ |
|
160 b = 1.0f - psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); |
|
161 SNR_adj_dB -= BG_SNR_DECR_dB * psEncCtrl->coding_quality * ( 0.5f + 0.5f * psEncCtrl->input_quality ) * b * b; |
|
162 } |
|
163 |
|
164 if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { |
|
165 /* Reduce gains for periodic signals */ |
|
166 SNR_adj_dB += HARM_SNR_INCR_dB * psEnc->LTPCorr; |
|
167 } else { |
|
168 /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */ |
|
169 SNR_adj_dB += ( -0.4f * psEnc->sCmn.SNR_dB_Q7 * ( 1 / 128.0f ) + 6.0f ) * ( 1.0f - psEncCtrl->input_quality ); |
|
170 } |
|
171 |
|
172 /*************************/ |
|
173 /* SPARSENESS PROCESSING */ |
|
174 /*************************/ |
|
175 /* Set quantizer offset */ |
|
176 if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { |
|
177 /* Initially set to 0; may be overruled in process_gains(..) */ |
|
178 psEnc->sCmn.indices.quantOffsetType = 0; |
|
179 psEncCtrl->sparseness = 0.0f; |
|
180 } else { |
|
181 /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */ |
|
182 nSamples = 2 * psEnc->sCmn.fs_kHz; |
|
183 energy_variation = 0.0f; |
|
184 log_energy_prev = 0.0f; |
|
185 pitch_res_ptr = pitch_res; |
|
186 for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) { |
|
187 nrg = ( silk_float )nSamples + ( silk_float )silk_energy_FLP( pitch_res_ptr, nSamples ); |
|
188 log_energy = silk_log2( nrg ); |
|
189 if( k > 0 ) { |
|
190 energy_variation += silk_abs_float( log_energy - log_energy_prev ); |
|
191 } |
|
192 log_energy_prev = log_energy; |
|
193 pitch_res_ptr += nSamples; |
|
194 } |
|
195 psEncCtrl->sparseness = silk_sigmoid( 0.4f * ( energy_variation - 5.0f ) ); |
|
196 |
|
197 /* Set quantization offset depending on sparseness measure */ |
|
198 if( psEncCtrl->sparseness > SPARSENESS_THRESHOLD_QNT_OFFSET ) { |
|
199 psEnc->sCmn.indices.quantOffsetType = 0; |
|
200 } else { |
|
201 psEnc->sCmn.indices.quantOffsetType = 1; |
|
202 } |
|
203 |
|
204 /* Increase coding SNR for sparse signals */ |
|
205 SNR_adj_dB += SPARSE_SNR_INCR_dB * ( psEncCtrl->sparseness - 0.5f ); |
|
206 } |
|
207 |
|
208 /*******************************/ |
|
209 /* Control bandwidth expansion */ |
|
210 /*******************************/ |
|
211 /* More BWE for signals with high prediction gain */ |
|
212 strength = FIND_PITCH_WHITE_NOISE_FRACTION * psEncCtrl->predGain; /* between 0.0 and 1.0 */ |
|
213 BWExp1 = BWExp2 = BANDWIDTH_EXPANSION / ( 1.0f + strength * strength ); |
|
214 delta = LOW_RATE_BANDWIDTH_EXPANSION_DELTA * ( 1.0f - 0.75f * psEncCtrl->coding_quality ); |
|
215 BWExp1 -= delta; |
|
216 BWExp2 += delta; |
|
217 /* BWExp1 will be applied after BWExp2, so make it relative */ |
|
218 BWExp1 /= BWExp2; |
|
219 |
|
220 if( psEnc->sCmn.warping_Q16 > 0 ) { |
|
221 /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */ |
|
222 warping = (silk_float)psEnc->sCmn.warping_Q16 / 65536.0f + 0.01f * psEncCtrl->coding_quality; |
|
223 } else { |
|
224 warping = 0.0f; |
|
225 } |
|
226 |
|
227 /********************************************/ |
|
228 /* Compute noise shaping AR coefs and gains */ |
|
229 /********************************************/ |
|
230 for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { |
|
231 /* Apply window: sine slope followed by flat part followed by cosine slope */ |
|
232 opus_int shift, slope_part, flat_part; |
|
233 flat_part = psEnc->sCmn.fs_kHz * 3; |
|
234 slope_part = ( psEnc->sCmn.shapeWinLength - flat_part ) / 2; |
|
235 |
|
236 silk_apply_sine_window_FLP( x_windowed, x_ptr, 1, slope_part ); |
|
237 shift = slope_part; |
|
238 silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(silk_float) ); |
|
239 shift += flat_part; |
|
240 silk_apply_sine_window_FLP( x_windowed + shift, x_ptr + shift, 2, slope_part ); |
|
241 |
|
242 /* Update pointer: next LPC analysis block */ |
|
243 x_ptr += psEnc->sCmn.subfr_length; |
|
244 |
|
245 if( psEnc->sCmn.warping_Q16 > 0 ) { |
|
246 /* Calculate warped auto correlation */ |
|
247 silk_warped_autocorrelation_FLP( auto_corr, x_windowed, warping, |
|
248 psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder ); |
|
249 } else { |
|
250 /* Calculate regular auto correlation */ |
|
251 silk_autocorrelation_FLP( auto_corr, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1 ); |
|
252 } |
|
253 |
|
254 /* Add white noise, as a fraction of energy */ |
|
255 auto_corr[ 0 ] += auto_corr[ 0 ] * SHAPE_WHITE_NOISE_FRACTION; |
|
256 |
|
257 /* Convert correlations to prediction coefficients, and compute residual energy */ |
|
258 nrg = silk_levinsondurbin_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], auto_corr, psEnc->sCmn.shapingLPCOrder ); |
|
259 psEncCtrl->Gains[ k ] = ( silk_float )sqrt( nrg ); |
|
260 |
|
261 if( psEnc->sCmn.warping_Q16 > 0 ) { |
|
262 /* Adjust gain for warping */ |
|
263 psEncCtrl->Gains[ k ] *= warped_gain( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], warping, psEnc->sCmn.shapingLPCOrder ); |
|
264 } |
|
265 |
|
266 /* Bandwidth expansion for synthesis filter shaping */ |
|
267 silk_bwexpander_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp2 ); |
|
268 |
|
269 /* Compute noise shaping filter coefficients */ |
|
270 silk_memcpy( |
|
271 &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], |
|
272 &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], |
|
273 psEnc->sCmn.shapingLPCOrder * sizeof( silk_float ) ); |
|
274 |
|
275 /* Bandwidth expansion for analysis filter shaping */ |
|
276 silk_bwexpander_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder, BWExp1 ); |
|
277 |
|
278 /* Ratio of prediction gains, in energy domain */ |
|
279 pre_nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); |
|
280 nrg = silk_LPC_inverse_pred_gain_FLP( &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], psEnc->sCmn.shapingLPCOrder ); |
|
281 psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ); |
|
282 |
|
283 /* Convert to monic warped prediction coefficients and limit absolute values */ |
|
284 warped_true2monic_coefs( &psEncCtrl->AR2[ k * MAX_SHAPE_LPC_ORDER ], &psEncCtrl->AR1[ k * MAX_SHAPE_LPC_ORDER ], |
|
285 warping, 3.999f, psEnc->sCmn.shapingLPCOrder ); |
|
286 } |
|
287 |
|
288 /*****************/ |
|
289 /* Gain tweaking */ |
|
290 /*****************/ |
|
291 /* Increase gains during low speech activity */ |
|
292 gain_mult = (silk_float)pow( 2.0f, -0.16f * SNR_adj_dB ); |
|
293 gain_add = (silk_float)pow( 2.0f, 0.16f * MIN_QGAIN_DB ); |
|
294 for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { |
|
295 psEncCtrl->Gains[ k ] *= gain_mult; |
|
296 psEncCtrl->Gains[ k ] += gain_add; |
|
297 } |
|
298 |
|
299 gain_mult = 1.0f + INPUT_TILT + psEncCtrl->coding_quality * HIGH_RATE_INPUT_TILT; |
|
300 for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { |
|
301 psEncCtrl->GainsPre[ k ] *= gain_mult; |
|
302 } |
|
303 |
|
304 /************************************************/ |
|
305 /* Control low-frequency shaping and noise tilt */ |
|
306 /************************************************/ |
|
307 /* Less low frequency shaping for noisy inputs */ |
|
308 strength = LOW_FREQ_SHAPING * ( 1.0f + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * ( psEnc->sCmn.input_quality_bands_Q15[ 0 ] * ( 1.0f / 32768.0f ) - 1.0f ) ); |
|
309 strength *= psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); |
|
310 if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) { |
|
311 /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */ |
|
312 /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/ |
|
313 for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { |
|
314 b = 0.2f / psEnc->sCmn.fs_kHz + 3.0f / psEncCtrl->pitchL[ k ]; |
|
315 psEncCtrl->LF_MA_shp[ k ] = -1.0f + b; |
|
316 psEncCtrl->LF_AR_shp[ k ] = 1.0f - b - b * strength; |
|
317 } |
|
318 Tilt = - HP_NOISE_COEF - |
|
319 (1 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * psEnc->sCmn.speech_activity_Q8 * ( 1.0f / 256.0f ); |
|
320 } else { |
|
321 b = 1.3f / psEnc->sCmn.fs_kHz; |
|
322 psEncCtrl->LF_MA_shp[ 0 ] = -1.0f + b; |
|
323 psEncCtrl->LF_AR_shp[ 0 ] = 1.0f - b - b * strength * 0.6f; |
|
324 for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) { |
|
325 psEncCtrl->LF_MA_shp[ k ] = psEncCtrl->LF_MA_shp[ 0 ]; |
|
326 psEncCtrl->LF_AR_shp[ k ] = psEncCtrl->LF_AR_shp[ 0 ]; |
|
327 } |
|
328 Tilt = -HP_NOISE_COEF; |
|
329 } |
|
330 |
|
331 /****************************/ |
|
332 /* HARMONIC SHAPING CONTROL */ |
|
333 /****************************/ |
|
334 /* Control boosting of harmonic frequencies */ |
|
335 HarmBoost = LOW_RATE_HARMONIC_BOOST * ( 1.0f - psEncCtrl->coding_quality ) * psEnc->LTPCorr; |
|
336 |
|
337 /* More harmonic boost for noisy input signals */ |
|
338 HarmBoost += LOW_INPUT_QUALITY_HARMONIC_BOOST * ( 1.0f - psEncCtrl->input_quality ); |
|
339 |
|
340 if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) { |
|
341 /* Harmonic noise shaping */ |
|
342 HarmShapeGain = HARMONIC_SHAPING; |
|
343 |
|
344 /* More harmonic noise shaping for high bitrates or noisy input */ |
|
345 HarmShapeGain += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING * |
|
346 ( 1.0f - ( 1.0f - psEncCtrl->coding_quality ) * psEncCtrl->input_quality ); |
|
347 |
|
348 /* Less harmonic noise shaping for less periodic signals */ |
|
349 HarmShapeGain *= ( silk_float )sqrt( psEnc->LTPCorr ); |
|
350 } else { |
|
351 HarmShapeGain = 0.0f; |
|
352 } |
|
353 |
|
354 /*************************/ |
|
355 /* Smooth over subframes */ |
|
356 /*************************/ |
|
357 for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { |
|
358 psShapeSt->HarmBoost_smth += SUBFR_SMTH_COEF * ( HarmBoost - psShapeSt->HarmBoost_smth ); |
|
359 psEncCtrl->HarmBoost[ k ] = psShapeSt->HarmBoost_smth; |
|
360 psShapeSt->HarmShapeGain_smth += SUBFR_SMTH_COEF * ( HarmShapeGain - psShapeSt->HarmShapeGain_smth ); |
|
361 psEncCtrl->HarmShapeGain[ k ] = psShapeSt->HarmShapeGain_smth; |
|
362 psShapeSt->Tilt_smth += SUBFR_SMTH_COEF * ( Tilt - psShapeSt->Tilt_smth ); |
|
363 psEncCtrl->Tilt[ k ] = psShapeSt->Tilt_smth; |
|
364 } |
|
365 } |