michael@0: /*********************************************************************** michael@0: Copyright (c) 2006-2011, Skype Limited. All rights reserved. michael@0: Redistribution and use in source and binary forms, with or without michael@0: modification, are permitted provided that the following conditions michael@0: are met: michael@0: - Redistributions of source code must retain the above copyright notice, michael@0: this list of conditions and the following disclaimer. michael@0: - Redistributions in binary form must reproduce the above copyright michael@0: notice, this list of conditions and the following disclaimer in the michael@0: documentation and/or other materials provided with the distribution. michael@0: - Neither the name of Internet Society, IETF or IETF Trust, nor the michael@0: names of specific contributors, may be used to endorse or promote michael@0: products derived from this software without specific prior written michael@0: permission. michael@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" michael@0: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE michael@0: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE michael@0: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE michael@0: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR michael@0: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF michael@0: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS michael@0: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN michael@0: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) michael@0: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE michael@0: POSSIBILITY OF SUCH DAMAGE. michael@0: ***********************************************************************/ michael@0: michael@0: #ifdef HAVE_CONFIG_H michael@0: #include "config.h" michael@0: #endif michael@0: #include "API.h" michael@0: #include "main.h" michael@0: #include "stack_alloc.h" michael@0: michael@0: /************************/ michael@0: /* Decoder Super Struct */ michael@0: /************************/ michael@0: typedef struct { michael@0: silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; michael@0: stereo_dec_state sStereo; michael@0: opus_int nChannelsAPI; michael@0: opus_int nChannelsInternal; michael@0: opus_int prev_decode_only_middle; michael@0: } silk_decoder; michael@0: michael@0: /*********************/ michael@0: /* Decoder functions */ michael@0: /*********************/ michael@0: michael@0: opus_int silk_Get_Decoder_Size( /* O Returns error code */ michael@0: opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ michael@0: ) michael@0: { michael@0: opus_int ret = SILK_NO_ERROR; michael@0: michael@0: *decSizeBytes = sizeof( silk_decoder ); michael@0: michael@0: return ret; michael@0: } michael@0: michael@0: /* Reset decoder state */ michael@0: opus_int silk_InitDecoder( /* O Returns error code */ michael@0: void *decState /* I/O State */ michael@0: ) michael@0: { michael@0: opus_int n, ret = SILK_NO_ERROR; michael@0: silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; michael@0: michael@0: for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { michael@0: ret = silk_init_decoder( &channel_state[ n ] ); michael@0: } michael@0: silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); michael@0: /* Not strictly needed, but it's cleaner that way */ michael@0: ((silk_decoder *)decState)->prev_decode_only_middle = 0; michael@0: michael@0: return ret; michael@0: } michael@0: michael@0: /* Decode a frame */ michael@0: opus_int silk_Decode( /* O Returns error code */ michael@0: void* decState, /* I/O State */ michael@0: silk_DecControlStruct* decControl, /* I/O Control Structure */ michael@0: opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ michael@0: opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ michael@0: ec_dec *psRangeDec, /* I/O Compressor data structure */ michael@0: opus_int16 *samplesOut, /* O Decoded output speech vector */ michael@0: opus_int32 *nSamplesOut /* O Number of samples decoded */ michael@0: ) michael@0: { michael@0: opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; michael@0: opus_int32 nSamplesOutDec, LBRR_symbol; michael@0: opus_int16 *samplesOut1_tmp[ 2 ]; michael@0: VARDECL( opus_int16, samplesOut1_tmp_storage ); michael@0: VARDECL( opus_int16, samplesOut2_tmp ); michael@0: opus_int32 MS_pred_Q13[ 2 ] = { 0 }; michael@0: opus_int16 *resample_out_ptr; michael@0: silk_decoder *psDec = ( silk_decoder * )decState; michael@0: silk_decoder_state *channel_state = psDec->channel_state; michael@0: opus_int has_side; michael@0: opus_int stereo_to_mono; michael@0: SAVE_STACK; michael@0: michael@0: silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); michael@0: michael@0: /**********************************/ michael@0: /* Test if first frame in payload */ michael@0: /**********************************/ michael@0: if( newPacketFlag ) { michael@0: for( n = 0; n < decControl->nChannelsInternal; n++ ) { michael@0: channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ michael@0: } michael@0: } michael@0: michael@0: /* If Mono -> Stereo transition in bitstream: init state of second channel */ michael@0: if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { michael@0: ret += silk_init_decoder( &channel_state[ 1 ] ); michael@0: } michael@0: michael@0: stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && michael@0: ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); michael@0: michael@0: if( channel_state[ 0 ].nFramesDecoded == 0 ) { michael@0: for( n = 0; n < decControl->nChannelsInternal; n++ ) { michael@0: opus_int fs_kHz_dec; michael@0: if( decControl->payloadSize_ms == 0 ) { michael@0: /* Assuming packet loss, use 10 ms */ michael@0: channel_state[ n ].nFramesPerPacket = 1; michael@0: channel_state[ n ].nb_subfr = 2; michael@0: } else if( decControl->payloadSize_ms == 10 ) { michael@0: channel_state[ n ].nFramesPerPacket = 1; michael@0: channel_state[ n ].nb_subfr = 2; michael@0: } else if( decControl->payloadSize_ms == 20 ) { michael@0: channel_state[ n ].nFramesPerPacket = 1; michael@0: channel_state[ n ].nb_subfr = 4; michael@0: } else if( decControl->payloadSize_ms == 40 ) { michael@0: channel_state[ n ].nFramesPerPacket = 2; michael@0: channel_state[ n ].nb_subfr = 4; michael@0: } else if( decControl->payloadSize_ms == 60 ) { michael@0: channel_state[ n ].nFramesPerPacket = 3; michael@0: channel_state[ n ].nb_subfr = 4; michael@0: } else { michael@0: silk_assert( 0 ); michael@0: RESTORE_STACK; michael@0: return SILK_DEC_INVALID_FRAME_SIZE; michael@0: } michael@0: fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; michael@0: if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { michael@0: silk_assert( 0 ); michael@0: RESTORE_STACK; michael@0: return SILK_DEC_INVALID_SAMPLING_FREQUENCY; michael@0: } michael@0: ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); michael@0: } michael@0: } michael@0: michael@0: if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { michael@0: silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); michael@0: silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); michael@0: silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); michael@0: } michael@0: psDec->nChannelsAPI = decControl->nChannelsAPI; michael@0: psDec->nChannelsInternal = decControl->nChannelsInternal; michael@0: michael@0: if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { michael@0: ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; michael@0: RESTORE_STACK; michael@0: return( ret ); michael@0: } michael@0: michael@0: if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { michael@0: /* First decoder call for this payload */ michael@0: /* Decode VAD flags and LBRR flag */ michael@0: for( n = 0; n < decControl->nChannelsInternal; n++ ) { michael@0: for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { michael@0: channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); michael@0: } michael@0: channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); michael@0: } michael@0: /* Decode LBRR flags */ michael@0: for( n = 0; n < decControl->nChannelsInternal; n++ ) { michael@0: silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); michael@0: if( channel_state[ n ].LBRR_flag ) { michael@0: if( channel_state[ n ].nFramesPerPacket == 1 ) { michael@0: channel_state[ n ].LBRR_flags[ 0 ] = 1; michael@0: } else { michael@0: LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; michael@0: for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { michael@0: channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: if( lostFlag == FLAG_DECODE_NORMAL ) { michael@0: /* Regular decoding: skip all LBRR data */ michael@0: for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { michael@0: for( n = 0; n < decControl->nChannelsInternal; n++ ) { michael@0: if( channel_state[ n ].LBRR_flags[ i ] ) { michael@0: opus_int pulses[ MAX_FRAME_LENGTH ]; michael@0: opus_int condCoding; michael@0: michael@0: if( decControl->nChannelsInternal == 2 && n == 0 ) { michael@0: silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); michael@0: if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { michael@0: silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); michael@0: } michael@0: } michael@0: /* Use conditional coding if previous frame available */ michael@0: if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { michael@0: condCoding = CODE_CONDITIONALLY; michael@0: } else { michael@0: condCoding = CODE_INDEPENDENTLY; michael@0: } michael@0: silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); michael@0: silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, michael@0: channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); michael@0: } michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* Get MS predictor index */ michael@0: if( decControl->nChannelsInternal == 2 ) { michael@0: if( lostFlag == FLAG_DECODE_NORMAL || michael@0: ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) michael@0: { michael@0: silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); michael@0: /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ michael@0: if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || michael@0: ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) michael@0: { michael@0: silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); michael@0: } else { michael@0: decode_only_middle = 0; michael@0: } michael@0: } else { michael@0: for( n = 0; n < 2; n++ ) { michael@0: MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* Reset side channel decoder prediction memory for first frame with side coding */ michael@0: if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { michael@0: silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); michael@0: silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); michael@0: psDec->channel_state[ 1 ].lagPrev = 100; michael@0: psDec->channel_state[ 1 ].LastGainIndex = 10; michael@0: psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; michael@0: psDec->channel_state[ 1 ].first_frame_after_reset = 1; michael@0: } michael@0: michael@0: ALLOC( samplesOut1_tmp_storage, michael@0: decControl->nChannelsInternal*( michael@0: channel_state[ 0 ].frame_length + 2 ), michael@0: opus_int16 ); michael@0: samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage; michael@0: samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage michael@0: + channel_state[ 0 ].frame_length + 2; michael@0: michael@0: if( lostFlag == FLAG_DECODE_NORMAL ) { michael@0: has_side = !decode_only_middle; michael@0: } else { michael@0: has_side = !psDec->prev_decode_only_middle michael@0: || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); michael@0: } michael@0: /* Call decoder for one frame */ michael@0: for( n = 0; n < decControl->nChannelsInternal; n++ ) { michael@0: if( n == 0 || has_side ) { michael@0: opus_int FrameIndex; michael@0: opus_int condCoding; michael@0: michael@0: FrameIndex = channel_state[ 0 ].nFramesDecoded - n; michael@0: /* Use independent coding if no previous frame available */ michael@0: if( FrameIndex <= 0 ) { michael@0: condCoding = CODE_INDEPENDENTLY; michael@0: } else if( lostFlag == FLAG_DECODE_LBRR ) { michael@0: condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; michael@0: } else if( n > 0 && psDec->prev_decode_only_middle ) { michael@0: /* If we skipped a side frame in this packet, we don't michael@0: need LTP scaling; the LTP state is well-defined. */ michael@0: condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; michael@0: } else { michael@0: condCoding = CODE_CONDITIONALLY; michael@0: } michael@0: ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding); michael@0: } else { michael@0: silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); michael@0: } michael@0: channel_state[ n ].nFramesDecoded++; michael@0: } michael@0: michael@0: if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { michael@0: /* Convert Mid/Side to Left/Right */ michael@0: silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); michael@0: } else { michael@0: /* Buffering */ michael@0: silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); michael@0: silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); michael@0: } michael@0: michael@0: /* Number of output samples */ michael@0: *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); michael@0: michael@0: /* Set up pointers to temp buffers */ michael@0: ALLOC( samplesOut2_tmp, michael@0: decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 ); michael@0: if( decControl->nChannelsAPI == 2 ) { michael@0: resample_out_ptr = samplesOut2_tmp; michael@0: } else { michael@0: resample_out_ptr = samplesOut; michael@0: } michael@0: michael@0: for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { michael@0: michael@0: /* Resample decoded signal to API_sampleRate */ michael@0: ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); michael@0: michael@0: /* Interleave if stereo output and stereo stream */ michael@0: if( decControl->nChannelsAPI == 2 ) { michael@0: for( i = 0; i < *nSamplesOut; i++ ) { michael@0: samplesOut[ n + 2 * i ] = resample_out_ptr[ i ]; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* Create two channel output from mono stream */ michael@0: if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { michael@0: if ( stereo_to_mono ){ michael@0: /* Resample right channel for newly collapsed stereo just in case michael@0: we weren't doing collapsing when switching to mono */ michael@0: ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); michael@0: michael@0: for( i = 0; i < *nSamplesOut; i++ ) { michael@0: samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ]; michael@0: } michael@0: } else { michael@0: for( i = 0; i < *nSamplesOut; i++ ) { michael@0: samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; michael@0: } michael@0: } michael@0: } michael@0: michael@0: /* Export pitch lag, measured at 48 kHz sampling rate */ michael@0: if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { michael@0: int mult_tab[ 3 ] = { 6, 4, 3 }; michael@0: decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; michael@0: } else { michael@0: decControl->prevPitchLag = 0; michael@0: } michael@0: michael@0: if( lostFlag == FLAG_PACKET_LOST ) { michael@0: /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" michael@0: if we lose packets when the energy is going down */ michael@0: for ( i = 0; i < psDec->nChannelsInternal; i++ ) michael@0: psDec->channel_state[ i ].LastGainIndex = 10; michael@0: } else { michael@0: psDec->prev_decode_only_middle = decode_only_middle; michael@0: } michael@0: RESTORE_STACK; michael@0: return ret; michael@0: } michael@0: michael@0: #if 0 michael@0: /* Getting table of contents for a packet */ michael@0: opus_int silk_get_TOC( michael@0: const opus_uint8 *payload, /* I Payload data */ michael@0: const opus_int nBytesIn, /* I Number of input bytes */ michael@0: const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ michael@0: silk_TOC_struct *Silk_TOC /* O Type of content */ michael@0: ) michael@0: { michael@0: opus_int i, flags, ret = SILK_NO_ERROR; michael@0: michael@0: if( nBytesIn < 1 ) { michael@0: return -1; michael@0: } michael@0: if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { michael@0: return -1; michael@0: } michael@0: michael@0: silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); michael@0: michael@0: /* For stereo, extract the flags for the mid channel */ michael@0: flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); michael@0: michael@0: Silk_TOC->inbandFECFlag = flags & 1; michael@0: for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { michael@0: flags = silk_RSHIFT( flags, 1 ); michael@0: Silk_TOC->VADFlags[ i ] = flags & 1; michael@0: Silk_TOC->VADFlag |= flags & 1; michael@0: } michael@0: michael@0: return ret; michael@0: } michael@0: #endif