1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libopus/silk/dec_API.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,397 @@ 1.4 +/*********************************************************************** 1.5 +Copyright (c) 2006-2011, Skype Limited. All rights reserved. 1.6 +Redistribution and use in source and binary forms, with or without 1.7 +modification, are permitted provided that the following conditions 1.8 +are met: 1.9 +- Redistributions of source code must retain the above copyright notice, 1.10 +this list of conditions and the following disclaimer. 1.11 +- Redistributions in binary form must reproduce the above copyright 1.12 +notice, this list of conditions and the following disclaimer in the 1.13 +documentation and/or other materials provided with the distribution. 1.14 +- Neither the name of Internet Society, IETF or IETF Trust, nor the 1.15 +names of specific contributors, may be used to endorse or promote 1.16 +products derived from this software without specific prior written 1.17 +permission. 1.18 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 1.19 +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.20 +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1.21 +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 1.22 +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 1.23 +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 1.24 +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 1.25 +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 1.26 +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 1.27 +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 1.28 +POSSIBILITY OF SUCH DAMAGE. 1.29 +***********************************************************************/ 1.30 + 1.31 +#ifdef HAVE_CONFIG_H 1.32 +#include "config.h" 1.33 +#endif 1.34 +#include "API.h" 1.35 +#include "main.h" 1.36 +#include "stack_alloc.h" 1.37 + 1.38 +/************************/ 1.39 +/* Decoder Super Struct */ 1.40 +/************************/ 1.41 +typedef struct { 1.42 + silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; 1.43 + stereo_dec_state sStereo; 1.44 + opus_int nChannelsAPI; 1.45 + opus_int nChannelsInternal; 1.46 + opus_int prev_decode_only_middle; 1.47 +} silk_decoder; 1.48 + 1.49 +/*********************/ 1.50 +/* Decoder functions */ 1.51 +/*********************/ 1.52 + 1.53 +opus_int silk_Get_Decoder_Size( /* O Returns error code */ 1.54 + opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ 1.55 +) 1.56 +{ 1.57 + opus_int ret = SILK_NO_ERROR; 1.58 + 1.59 + *decSizeBytes = sizeof( silk_decoder ); 1.60 + 1.61 + return ret; 1.62 +} 1.63 + 1.64 +/* Reset decoder state */ 1.65 +opus_int silk_InitDecoder( /* O Returns error code */ 1.66 + void *decState /* I/O State */ 1.67 +) 1.68 +{ 1.69 + opus_int n, ret = SILK_NO_ERROR; 1.70 + silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; 1.71 + 1.72 + for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { 1.73 + ret = silk_init_decoder( &channel_state[ n ] ); 1.74 + } 1.75 + silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); 1.76 + /* Not strictly needed, but it's cleaner that way */ 1.77 + ((silk_decoder *)decState)->prev_decode_only_middle = 0; 1.78 + 1.79 + return ret; 1.80 +} 1.81 + 1.82 +/* Decode a frame */ 1.83 +opus_int silk_Decode( /* O Returns error code */ 1.84 + void* decState, /* I/O State */ 1.85 + silk_DecControlStruct* decControl, /* I/O Control Structure */ 1.86 + opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ 1.87 + opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ 1.88 + ec_dec *psRangeDec, /* I/O Compressor data structure */ 1.89 + opus_int16 *samplesOut, /* O Decoded output speech vector */ 1.90 + opus_int32 *nSamplesOut /* O Number of samples decoded */ 1.91 +) 1.92 +{ 1.93 + opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; 1.94 + opus_int32 nSamplesOutDec, LBRR_symbol; 1.95 + opus_int16 *samplesOut1_tmp[ 2 ]; 1.96 + VARDECL( opus_int16, samplesOut1_tmp_storage ); 1.97 + VARDECL( opus_int16, samplesOut2_tmp ); 1.98 + opus_int32 MS_pred_Q13[ 2 ] = { 0 }; 1.99 + opus_int16 *resample_out_ptr; 1.100 + silk_decoder *psDec = ( silk_decoder * )decState; 1.101 + silk_decoder_state *channel_state = psDec->channel_state; 1.102 + opus_int has_side; 1.103 + opus_int stereo_to_mono; 1.104 + SAVE_STACK; 1.105 + 1.106 + silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); 1.107 + 1.108 + /**********************************/ 1.109 + /* Test if first frame in payload */ 1.110 + /**********************************/ 1.111 + if( newPacketFlag ) { 1.112 + for( n = 0; n < decControl->nChannelsInternal; n++ ) { 1.113 + channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ 1.114 + } 1.115 + } 1.116 + 1.117 + /* If Mono -> Stereo transition in bitstream: init state of second channel */ 1.118 + if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { 1.119 + ret += silk_init_decoder( &channel_state[ 1 ] ); 1.120 + } 1.121 + 1.122 + stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && 1.123 + ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); 1.124 + 1.125 + if( channel_state[ 0 ].nFramesDecoded == 0 ) { 1.126 + for( n = 0; n < decControl->nChannelsInternal; n++ ) { 1.127 + opus_int fs_kHz_dec; 1.128 + if( decControl->payloadSize_ms == 0 ) { 1.129 + /* Assuming packet loss, use 10 ms */ 1.130 + channel_state[ n ].nFramesPerPacket = 1; 1.131 + channel_state[ n ].nb_subfr = 2; 1.132 + } else if( decControl->payloadSize_ms == 10 ) { 1.133 + channel_state[ n ].nFramesPerPacket = 1; 1.134 + channel_state[ n ].nb_subfr = 2; 1.135 + } else if( decControl->payloadSize_ms == 20 ) { 1.136 + channel_state[ n ].nFramesPerPacket = 1; 1.137 + channel_state[ n ].nb_subfr = 4; 1.138 + } else if( decControl->payloadSize_ms == 40 ) { 1.139 + channel_state[ n ].nFramesPerPacket = 2; 1.140 + channel_state[ n ].nb_subfr = 4; 1.141 + } else if( decControl->payloadSize_ms == 60 ) { 1.142 + channel_state[ n ].nFramesPerPacket = 3; 1.143 + channel_state[ n ].nb_subfr = 4; 1.144 + } else { 1.145 + silk_assert( 0 ); 1.146 + RESTORE_STACK; 1.147 + return SILK_DEC_INVALID_FRAME_SIZE; 1.148 + } 1.149 + fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; 1.150 + if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { 1.151 + silk_assert( 0 ); 1.152 + RESTORE_STACK; 1.153 + return SILK_DEC_INVALID_SAMPLING_FREQUENCY; 1.154 + } 1.155 + ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); 1.156 + } 1.157 + } 1.158 + 1.159 + if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { 1.160 + silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); 1.161 + silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); 1.162 + silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); 1.163 + } 1.164 + psDec->nChannelsAPI = decControl->nChannelsAPI; 1.165 + psDec->nChannelsInternal = decControl->nChannelsInternal; 1.166 + 1.167 + if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { 1.168 + ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; 1.169 + RESTORE_STACK; 1.170 + return( ret ); 1.171 + } 1.172 + 1.173 + if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { 1.174 + /* First decoder call for this payload */ 1.175 + /* Decode VAD flags and LBRR flag */ 1.176 + for( n = 0; n < decControl->nChannelsInternal; n++ ) { 1.177 + for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { 1.178 + channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); 1.179 + } 1.180 + channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); 1.181 + } 1.182 + /* Decode LBRR flags */ 1.183 + for( n = 0; n < decControl->nChannelsInternal; n++ ) { 1.184 + silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); 1.185 + if( channel_state[ n ].LBRR_flag ) { 1.186 + if( channel_state[ n ].nFramesPerPacket == 1 ) { 1.187 + channel_state[ n ].LBRR_flags[ 0 ] = 1; 1.188 + } else { 1.189 + LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; 1.190 + for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { 1.191 + channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; 1.192 + } 1.193 + } 1.194 + } 1.195 + } 1.196 + 1.197 + if( lostFlag == FLAG_DECODE_NORMAL ) { 1.198 + /* Regular decoding: skip all LBRR data */ 1.199 + for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { 1.200 + for( n = 0; n < decControl->nChannelsInternal; n++ ) { 1.201 + if( channel_state[ n ].LBRR_flags[ i ] ) { 1.202 + opus_int pulses[ MAX_FRAME_LENGTH ]; 1.203 + opus_int condCoding; 1.204 + 1.205 + if( decControl->nChannelsInternal == 2 && n == 0 ) { 1.206 + silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); 1.207 + if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { 1.208 + silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); 1.209 + } 1.210 + } 1.211 + /* Use conditional coding if previous frame available */ 1.212 + if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { 1.213 + condCoding = CODE_CONDITIONALLY; 1.214 + } else { 1.215 + condCoding = CODE_INDEPENDENTLY; 1.216 + } 1.217 + silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); 1.218 + silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, 1.219 + channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); 1.220 + } 1.221 + } 1.222 + } 1.223 + } 1.224 + } 1.225 + 1.226 + /* Get MS predictor index */ 1.227 + if( decControl->nChannelsInternal == 2 ) { 1.228 + if( lostFlag == FLAG_DECODE_NORMAL || 1.229 + ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) 1.230 + { 1.231 + silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); 1.232 + /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ 1.233 + if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || 1.234 + ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) 1.235 + { 1.236 + silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); 1.237 + } else { 1.238 + decode_only_middle = 0; 1.239 + } 1.240 + } else { 1.241 + for( n = 0; n < 2; n++ ) { 1.242 + MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; 1.243 + } 1.244 + } 1.245 + } 1.246 + 1.247 + /* Reset side channel decoder prediction memory for first frame with side coding */ 1.248 + if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { 1.249 + silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); 1.250 + silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); 1.251 + psDec->channel_state[ 1 ].lagPrev = 100; 1.252 + psDec->channel_state[ 1 ].LastGainIndex = 10; 1.253 + psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; 1.254 + psDec->channel_state[ 1 ].first_frame_after_reset = 1; 1.255 + } 1.256 + 1.257 + ALLOC( samplesOut1_tmp_storage, 1.258 + decControl->nChannelsInternal*( 1.259 + channel_state[ 0 ].frame_length + 2 ), 1.260 + opus_int16 ); 1.261 + samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; 1.262 + samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage 1.263 + + channel_state[ 0 ].frame_length + 2; 1.264 + 1.265 + if( lostFlag == FLAG_DECODE_NORMAL ) { 1.266 + has_side = !decode_only_middle; 1.267 + } else { 1.268 + has_side = !psDec->prev_decode_only_middle 1.269 + || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); 1.270 + } 1.271 + /* Call decoder for one frame */ 1.272 + for( n = 0; n < decControl->nChannelsInternal; n++ ) { 1.273 + if( n == 0 || has_side ) { 1.274 + opus_int FrameIndex; 1.275 + opus_int condCoding; 1.276 + 1.277 + FrameIndex = channel_state[ 0 ].nFramesDecoded - n; 1.278 + /* Use independent coding if no previous frame available */ 1.279 + if( FrameIndex <= 0 ) { 1.280 + condCoding = CODE_INDEPENDENTLY; 1.281 + } else if( lostFlag == FLAG_DECODE_LBRR ) { 1.282 + condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; 1.283 + } else if( n > 0 && psDec->prev_decode_only_middle ) { 1.284 + /* If we skipped a side frame in this packet, we don't 1.285 + need LTP scaling; the LTP state is well-defined. */ 1.286 + condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; 1.287 + } else { 1.288 + condCoding = CODE_CONDITIONALLY; 1.289 + } 1.290 + ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding); 1.291 + } else { 1.292 + silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); 1.293 + } 1.294 + channel_state[ n ].nFramesDecoded++; 1.295 + } 1.296 + 1.297 + if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { 1.298 + /* Convert Mid/Side to Left/Right */ 1.299 + silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); 1.300 + } else { 1.301 + /* Buffering */ 1.302 + silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); 1.303 + silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); 1.304 + } 1.305 + 1.306 + /* Number of output samples */ 1.307 + *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); 1.308 + 1.309 + /* Set up pointers to temp buffers */ 1.310 + ALLOC( samplesOut2_tmp, 1.311 + decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 ); 1.312 + if( decControl->nChannelsAPI == 2 ) { 1.313 + resample_out_ptr = samplesOut2_tmp; 1.314 + } else { 1.315 + resample_out_ptr = samplesOut; 1.316 + } 1.317 + 1.318 + for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { 1.319 + 1.320 + /* Resample decoded signal to API_sampleRate */ 1.321 + ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); 1.322 + 1.323 + /* Interleave if stereo output and stereo stream */ 1.324 + if( decControl->nChannelsAPI == 2 ) { 1.325 + for( i = 0; i < *nSamplesOut; i++ ) { 1.326 + samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; 1.327 + } 1.328 + } 1.329 + } 1.330 + 1.331 + /* Create two channel output from mono stream */ 1.332 + if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { 1.333 + if ( stereo_to_mono ){ 1.334 + /* Resample right channel for newly collapsed stereo just in case 1.335 + we weren't doing collapsing when switching to mono */ 1.336 + ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); 1.337 + 1.338 + for( i = 0; i < *nSamplesOut; i++ ) { 1.339 + samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; 1.340 + } 1.341 + } else { 1.342 + for( i = 0; i < *nSamplesOut; i++ ) { 1.343 + samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; 1.344 + } 1.345 + } 1.346 + } 1.347 + 1.348 + /* Export pitch lag, measured at 48 kHz sampling rate */ 1.349 + if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { 1.350 + int mult_tab[ 3 ] = { 6, 4, 3 }; 1.351 + decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; 1.352 + } else { 1.353 + decControl->prevPitchLag = 0; 1.354 + } 1.355 + 1.356 + if( lostFlag == FLAG_PACKET_LOST ) { 1.357 + /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" 1.358 + if we lose packets when the energy is going down */ 1.359 + for ( i = 0; i < psDec->nChannelsInternal; i++ ) 1.360 + psDec->channel_state[ i ].LastGainIndex = 10; 1.361 + } else { 1.362 + psDec->prev_decode_only_middle = decode_only_middle; 1.363 + } 1.364 + RESTORE_STACK; 1.365 + return ret; 1.366 +} 1.367 + 1.368 +#if 0 1.369 +/* Getting table of contents for a packet */ 1.370 +opus_int silk_get_TOC( 1.371 + const opus_uint8 *payload, /* I Payload data */ 1.372 + const opus_int nBytesIn, /* I Number of input bytes */ 1.373 + const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ 1.374 + silk_TOC_struct *Silk_TOC /* O Type of content */ 1.375 +) 1.376 +{ 1.377 + opus_int i, flags, ret = SILK_NO_ERROR; 1.378 + 1.379 + if( nBytesIn < 1 ) { 1.380 + return -1; 1.381 + } 1.382 + if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { 1.383 + return -1; 1.384 + } 1.385 + 1.386 + silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); 1.387 + 1.388 + /* For stereo, extract the flags for the mid channel */ 1.389 + flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); 1.390 + 1.391 + Silk_TOC->inbandFECFlag = flags & 1; 1.392 + for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { 1.393 + flags = silk_RSHIFT( flags, 1 ); 1.394 + Silk_TOC->VADFlags[ i ] = flags & 1; 1.395 + Silk_TOC->VADFlag |= flags & 1; 1.396 + } 1.397 + 1.398 + return ret; 1.399 +} 1.400 +#endif