Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /*********************************************************************** |
michael@0 | 2 | Copyright (c) 2006-2011, Skype Limited. All rights reserved. |
michael@0 | 3 | Redistribution and use in source and binary forms, with or without |
michael@0 | 4 | modification, are permitted provided that the following conditions |
michael@0 | 5 | are met: |
michael@0 | 6 | - Redistributions of source code must retain the above copyright notice, |
michael@0 | 7 | this list of conditions and the following disclaimer. |
michael@0 | 8 | - Redistributions in binary form must reproduce the above copyright |
michael@0 | 9 | notice, this list of conditions and the following disclaimer in the |
michael@0 | 10 | documentation and/or other materials provided with the distribution. |
michael@0 | 11 | - Neither the name of Internet Society, IETF or IETF Trust, nor the |
michael@0 | 12 | names of specific contributors, may be used to endorse or promote |
michael@0 | 13 | products derived from this software without specific prior written |
michael@0 | 14 | permission. |
michael@0 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
michael@0 | 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
michael@0 | 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
michael@0 | 18 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
michael@0 | 19 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
michael@0 | 20 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
michael@0 | 21 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
michael@0 | 22 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
michael@0 | 23 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
michael@0 | 24 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
michael@0 | 25 | POSSIBILITY OF SUCH DAMAGE. |
michael@0 | 26 | ***********************************************************************/ |
michael@0 | 27 | |
michael@0 | 28 | #ifndef SILK_SIGPROC_FIX_H |
michael@0 | 29 | #define SILK_SIGPROC_FIX_H |
michael@0 | 30 | |
michael@0 | 31 | #ifdef __cplusplus |
michael@0 | 32 | extern "C" |
michael@0 | 33 | { |
michael@0 | 34 | #endif |
michael@0 | 35 | |
michael@0 | 36 | /*#define silk_MACRO_COUNT */ /* Used to enable WMOPS counting */ |
michael@0 | 37 | |
michael@0 | 38 | #define SILK_MAX_ORDER_LPC 16 /* max order of the LPC analysis in schur() and k2a() */ |
michael@0 | 39 | |
michael@0 | 40 | #include <string.h> /* for memset(), memcpy(), memmove() */ |
michael@0 | 41 | #include "typedef.h" |
michael@0 | 42 | #include "resampler_structs.h" |
michael@0 | 43 | #include "macros.h" |
michael@0 | 44 | |
michael@0 | 45 | |
michael@0 | 46 | /********************************************************************/ |
michael@0 | 47 | /* SIGNAL PROCESSING FUNCTIONS */ |
michael@0 | 48 | /********************************************************************/ |
michael@0 | 49 | |
michael@0 | 50 | /*! |
michael@0 | 51 | * Initialize/reset the resampler state for a given pair of input/output sampling rates |
michael@0 | 52 | */ |
michael@0 | 53 | opus_int silk_resampler_init( |
michael@0 | 54 | silk_resampler_state_struct *S, /* I/O Resampler state */ |
michael@0 | 55 | opus_int32 Fs_Hz_in, /* I Input sampling rate (Hz) */ |
michael@0 | 56 | opus_int32 Fs_Hz_out, /* I Output sampling rate (Hz) */ |
michael@0 | 57 | opus_int forEnc /* I If 1: encoder; if 0: decoder */ |
michael@0 | 58 | ); |
michael@0 | 59 | |
michael@0 | 60 | /*! |
michael@0 | 61 | * Resampler: convert from one sampling rate to another |
michael@0 | 62 | */ |
michael@0 | 63 | opus_int silk_resampler( |
michael@0 | 64 | silk_resampler_state_struct *S, /* I/O Resampler state */ |
michael@0 | 65 | opus_int16 out[], /* O Output signal */ |
michael@0 | 66 | const opus_int16 in[], /* I Input signal */ |
michael@0 | 67 | opus_int32 inLen /* I Number of input samples */ |
michael@0 | 68 | ); |
michael@0 | 69 | |
michael@0 | 70 | /*! |
michael@0 | 71 | * Downsample 2x, mediocre quality |
michael@0 | 72 | */ |
michael@0 | 73 | void silk_resampler_down2( |
michael@0 | 74 | opus_int32 *S, /* I/O State vector [ 2 ] */ |
michael@0 | 75 | opus_int16 *out, /* O Output signal [ len ] */ |
michael@0 | 76 | const opus_int16 *in, /* I Input signal [ floor(len/2) ] */ |
michael@0 | 77 | opus_int32 inLen /* I Number of input samples */ |
michael@0 | 78 | ); |
michael@0 | 79 | |
michael@0 | 80 | /*! |
michael@0 | 81 | * Downsample by a factor 2/3, low quality |
michael@0 | 82 | */ |
michael@0 | 83 | void silk_resampler_down2_3( |
michael@0 | 84 | opus_int32 *S, /* I/O State vector [ 6 ] */ |
michael@0 | 85 | opus_int16 *out, /* O Output signal [ floor(2*inLen/3) ] */ |
michael@0 | 86 | const opus_int16 *in, /* I Input signal [ inLen ] */ |
michael@0 | 87 | opus_int32 inLen /* I Number of input samples */ |
michael@0 | 88 | ); |
michael@0 | 89 | |
michael@0 | 90 | /*! |
michael@0 | 91 | * second order ARMA filter; |
michael@0 | 92 | * slower than biquad() but uses more precise coefficients |
michael@0 | 93 | * can handle (slowly) varying coefficients |
michael@0 | 94 | */ |
michael@0 | 95 | void silk_biquad_alt( |
michael@0 | 96 | const opus_int16 *in, /* I input signal */ |
michael@0 | 97 | const opus_int32 *B_Q28, /* I MA coefficients [3] */ |
michael@0 | 98 | const opus_int32 *A_Q28, /* I AR coefficients [2] */ |
michael@0 | 99 | opus_int32 *S, /* I/O State vector [2] */ |
michael@0 | 100 | opus_int16 *out, /* O output signal */ |
michael@0 | 101 | const opus_int32 len, /* I signal length (must be even) */ |
michael@0 | 102 | opus_int stride /* I Operate on interleaved signal if > 1 */ |
michael@0 | 103 | ); |
michael@0 | 104 | |
michael@0 | 105 | /* Variable order MA prediction error filter. */ |
michael@0 | 106 | void silk_LPC_analysis_filter( |
michael@0 | 107 | opus_int16 *out, /* O Output signal */ |
michael@0 | 108 | const opus_int16 *in, /* I Input signal */ |
michael@0 | 109 | const opus_int16 *B, /* I MA prediction coefficients, Q12 [order] */ |
michael@0 | 110 | const opus_int32 len, /* I Signal length */ |
michael@0 | 111 | const opus_int32 d /* I Filter order */ |
michael@0 | 112 | ); |
michael@0 | 113 | |
michael@0 | 114 | /* Chirp (bandwidth expand) LP AR filter */ |
michael@0 | 115 | void silk_bwexpander( |
michael@0 | 116 | opus_int16 *ar, /* I/O AR filter to be expanded (without leading 1) */ |
michael@0 | 117 | const opus_int d, /* I Length of ar */ |
michael@0 | 118 | opus_int32 chirp_Q16 /* I Chirp factor (typically in the range 0 to 1) */ |
michael@0 | 119 | ); |
michael@0 | 120 | |
michael@0 | 121 | /* Chirp (bandwidth expand) LP AR filter */ |
michael@0 | 122 | void silk_bwexpander_32( |
michael@0 | 123 | opus_int32 *ar, /* I/O AR filter to be expanded (without leading 1) */ |
michael@0 | 124 | const opus_int d, /* I Length of ar */ |
michael@0 | 125 | opus_int32 chirp_Q16 /* I Chirp factor in Q16 */ |
michael@0 | 126 | ); |
michael@0 | 127 | |
michael@0 | 128 | /* Compute inverse of LPC prediction gain, and */ |
michael@0 | 129 | /* test if LPC coefficients are stable (all poles within unit circle) */ |
michael@0 | 130 | opus_int32 silk_LPC_inverse_pred_gain( /* O Returns inverse prediction gain in energy domain, Q30 */ |
michael@0 | 131 | const opus_int16 *A_Q12, /* I Prediction coefficients, Q12 [order] */ |
michael@0 | 132 | const opus_int order /* I Prediction order */ |
michael@0 | 133 | ); |
michael@0 | 134 | |
michael@0 | 135 | /* For input in Q24 domain */ |
michael@0 | 136 | opus_int32 silk_LPC_inverse_pred_gain_Q24( /* O Returns inverse prediction gain in energy domain, Q30 */ |
michael@0 | 137 | const opus_int32 *A_Q24, /* I Prediction coefficients [order] */ |
michael@0 | 138 | const opus_int order /* I Prediction order */ |
michael@0 | 139 | ); |
michael@0 | 140 | |
michael@0 | 141 | /* Split signal in two decimated bands using first-order allpass filters */ |
michael@0 | 142 | void silk_ana_filt_bank_1( |
michael@0 | 143 | const opus_int16 *in, /* I Input signal [N] */ |
michael@0 | 144 | opus_int32 *S, /* I/O State vector [2] */ |
michael@0 | 145 | opus_int16 *outL, /* O Low band [N/2] */ |
michael@0 | 146 | opus_int16 *outH, /* O High band [N/2] */ |
michael@0 | 147 | const opus_int32 N /* I Number of input samples */ |
michael@0 | 148 | ); |
michael@0 | 149 | |
michael@0 | 150 | /********************************************************************/ |
michael@0 | 151 | /* SCALAR FUNCTIONS */ |
michael@0 | 152 | /********************************************************************/ |
michael@0 | 153 | |
michael@0 | 154 | /* Approximation of 128 * log2() (exact inverse of approx 2^() below) */ |
michael@0 | 155 | /* Convert input to a log scale */ |
michael@0 | 156 | opus_int32 silk_lin2log( |
michael@0 | 157 | const opus_int32 inLin /* I input in linear scale */ |
michael@0 | 158 | ); |
michael@0 | 159 | |
michael@0 | 160 | /* Approximation of a sigmoid function */ |
michael@0 | 161 | opus_int silk_sigm_Q15( |
michael@0 | 162 | opus_int in_Q5 /* I */ |
michael@0 | 163 | ); |
michael@0 | 164 | |
michael@0 | 165 | /* Approximation of 2^() (exact inverse of approx log2() above) */ |
michael@0 | 166 | /* Convert input to a linear scale */ |
michael@0 | 167 | opus_int32 silk_log2lin( |
michael@0 | 168 | const opus_int32 inLog_Q7 /* I input on log scale */ |
michael@0 | 169 | ); |
michael@0 | 170 | |
michael@0 | 171 | /* Compute number of bits to right shift the sum of squares of a vector */ |
michael@0 | 172 | /* of int16s to make it fit in an int32 */ |
michael@0 | 173 | void silk_sum_sqr_shift( |
michael@0 | 174 | opus_int32 *energy, /* O Energy of x, after shifting to the right */ |
michael@0 | 175 | opus_int *shift, /* O Number of bits right shift applied to energy */ |
michael@0 | 176 | const opus_int16 *x, /* I Input vector */ |
michael@0 | 177 | opus_int len /* I Length of input vector */ |
michael@0 | 178 | ); |
michael@0 | 179 | |
michael@0 | 180 | /* Calculates the reflection coefficients from the correlation sequence */ |
michael@0 | 181 | /* Faster than schur64(), but much less accurate. */ |
michael@0 | 182 | /* uses SMLAWB(), requiring armv5E and higher. */ |
michael@0 | 183 | opus_int32 silk_schur( /* O Returns residual energy */ |
michael@0 | 184 | opus_int16 *rc_Q15, /* O reflection coefficients [order] Q15 */ |
michael@0 | 185 | const opus_int32 *c, /* I correlations [order+1] */ |
michael@0 | 186 | const opus_int32 order /* I prediction order */ |
michael@0 | 187 | ); |
michael@0 | 188 | |
michael@0 | 189 | /* Calculates the reflection coefficients from the correlation sequence */ |
michael@0 | 190 | /* Slower than schur(), but more accurate. */ |
michael@0 | 191 | /* Uses SMULL(), available on armv4 */ |
michael@0 | 192 | opus_int32 silk_schur64( /* O returns residual energy */ |
michael@0 | 193 | opus_int32 rc_Q16[], /* O Reflection coefficients [order] Q16 */ |
michael@0 | 194 | const opus_int32 c[], /* I Correlations [order+1] */ |
michael@0 | 195 | opus_int32 order /* I Prediction order */ |
michael@0 | 196 | ); |
michael@0 | 197 | |
michael@0 | 198 | /* Step up function, converts reflection coefficients to prediction coefficients */ |
michael@0 | 199 | void silk_k2a( |
michael@0 | 200 | opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ |
michael@0 | 201 | const opus_int16 *rc_Q15, /* I Reflection coefficients [order] Q15 */ |
michael@0 | 202 | const opus_int32 order /* I Prediction order */ |
michael@0 | 203 | ); |
michael@0 | 204 | |
michael@0 | 205 | /* Step up function, converts reflection coefficients to prediction coefficients */ |
michael@0 | 206 | void silk_k2a_Q16( |
michael@0 | 207 | opus_int32 *A_Q24, /* O Prediction coefficients [order] Q24 */ |
michael@0 | 208 | const opus_int32 *rc_Q16, /* I Reflection coefficients [order] Q16 */ |
michael@0 | 209 | const opus_int32 order /* I Prediction order */ |
michael@0 | 210 | ); |
michael@0 | 211 | |
michael@0 | 212 | /* Apply sine window to signal vector. */ |
michael@0 | 213 | /* Window types: */ |
michael@0 | 214 | /* 1 -> sine window from 0 to pi/2 */ |
michael@0 | 215 | /* 2 -> sine window from pi/2 to pi */ |
michael@0 | 216 | /* every other sample of window is linearly interpolated, for speed */ |
michael@0 | 217 | void silk_apply_sine_window( |
michael@0 | 218 | opus_int16 px_win[], /* O Pointer to windowed signal */ |
michael@0 | 219 | const opus_int16 px[], /* I Pointer to input signal */ |
michael@0 | 220 | const opus_int win_type, /* I Selects a window type */ |
michael@0 | 221 | const opus_int length /* I Window length, multiple of 4 */ |
michael@0 | 222 | ); |
michael@0 | 223 | |
michael@0 | 224 | /* Compute autocorrelation */ |
michael@0 | 225 | void silk_autocorr( |
michael@0 | 226 | opus_int32 *results, /* O Result (length correlationCount) */ |
michael@0 | 227 | opus_int *scale, /* O Scaling of the correlation vector */ |
michael@0 | 228 | const opus_int16 *inputData, /* I Input data to correlate */ |
michael@0 | 229 | const opus_int inputDataSize, /* I Length of input */ |
michael@0 | 230 | const opus_int correlationCount, /* I Number of correlation taps to compute */ |
michael@0 | 231 | int arch /* I Run-time architecture */ |
michael@0 | 232 | ); |
michael@0 | 233 | |
michael@0 | 234 | void silk_decode_pitch( |
michael@0 | 235 | opus_int16 lagIndex, /* I */ |
michael@0 | 236 | opus_int8 contourIndex, /* O */ |
michael@0 | 237 | opus_int pitch_lags[], /* O 4 pitch values */ |
michael@0 | 238 | const opus_int Fs_kHz, /* I sampling frequency (kHz) */ |
michael@0 | 239 | const opus_int nb_subfr /* I number of sub frames */ |
michael@0 | 240 | ); |
michael@0 | 241 | |
michael@0 | 242 | opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */ |
michael@0 | 243 | const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */ |
michael@0 | 244 | opus_int *pitch_out, /* O 4 pitch lag values */ |
michael@0 | 245 | opus_int16 *lagIndex, /* O Lag Index */ |
michael@0 | 246 | opus_int8 *contourIndex, /* O Pitch contour Index */ |
michael@0 | 247 | opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */ |
michael@0 | 248 | opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */ |
michael@0 | 249 | const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */ |
michael@0 | 250 | const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */ |
michael@0 | 251 | const opus_int Fs_kHz, /* I Sample frequency (kHz) */ |
michael@0 | 252 | const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */ |
michael@0 | 253 | const opus_int nb_subfr, /* I number of 5 ms subframes */ |
michael@0 | 254 | int arch /* I Run-time architecture */ |
michael@0 | 255 | ); |
michael@0 | 256 | |
michael@0 | 257 | /* Compute Normalized Line Spectral Frequencies (NLSFs) from whitening filter coefficients */ |
michael@0 | 258 | /* If not all roots are found, the a_Q16 coefficients are bandwidth expanded until convergence. */ |
michael@0 | 259 | void silk_A2NLSF( |
michael@0 | 260 | opus_int16 *NLSF, /* O Normalized Line Spectral Frequencies in Q15 (0..2^15-1) [d] */ |
michael@0 | 261 | opus_int32 *a_Q16, /* I/O Monic whitening filter coefficients in Q16 [d] */ |
michael@0 | 262 | const opus_int d /* I Filter order (must be even) */ |
michael@0 | 263 | ); |
michael@0 | 264 | |
michael@0 | 265 | /* compute whitening filter coefficients from normalized line spectral frequencies */ |
michael@0 | 266 | void silk_NLSF2A( |
michael@0 | 267 | opus_int16 *a_Q12, /* O monic whitening filter coefficients in Q12, [ d ] */ |
michael@0 | 268 | const opus_int16 *NLSF, /* I normalized line spectral frequencies in Q15, [ d ] */ |
michael@0 | 269 | const opus_int d /* I filter order (should be even) */ |
michael@0 | 270 | ); |
michael@0 | 271 | |
michael@0 | 272 | void silk_insertion_sort_increasing( |
michael@0 | 273 | opus_int32 *a, /* I/O Unsorted / Sorted vector */ |
michael@0 | 274 | opus_int *idx, /* O Index vector for the sorted elements */ |
michael@0 | 275 | const opus_int L, /* I Vector length */ |
michael@0 | 276 | const opus_int K /* I Number of correctly sorted positions */ |
michael@0 | 277 | ); |
michael@0 | 278 | |
michael@0 | 279 | void silk_insertion_sort_decreasing_int16( |
michael@0 | 280 | opus_int16 *a, /* I/O Unsorted / Sorted vector */ |
michael@0 | 281 | opus_int *idx, /* O Index vector for the sorted elements */ |
michael@0 | 282 | const opus_int L, /* I Vector length */ |
michael@0 | 283 | const opus_int K /* I Number of correctly sorted positions */ |
michael@0 | 284 | ); |
michael@0 | 285 | |
michael@0 | 286 | void silk_insertion_sort_increasing_all_values_int16( |
michael@0 | 287 | opus_int16 *a, /* I/O Unsorted / Sorted vector */ |
michael@0 | 288 | const opus_int L /* I Vector length */ |
michael@0 | 289 | ); |
michael@0 | 290 | |
michael@0 | 291 | /* NLSF stabilizer, for a single input data vector */ |
michael@0 | 292 | void silk_NLSF_stabilize( |
michael@0 | 293 | opus_int16 *NLSF_Q15, /* I/O Unstable/stabilized normalized LSF vector in Q15 [L] */ |
michael@0 | 294 | const opus_int16 *NDeltaMin_Q15, /* I Min distance vector, NDeltaMin_Q15[L] must be >= 1 [L+1] */ |
michael@0 | 295 | const opus_int L /* I Number of NLSF parameters in the input vector */ |
michael@0 | 296 | ); |
michael@0 | 297 | |
michael@0 | 298 | /* Laroia low complexity NLSF weights */ |
michael@0 | 299 | void silk_NLSF_VQ_weights_laroia( |
michael@0 | 300 | opus_int16 *pNLSFW_Q_OUT, /* O Pointer to input vector weights [D] */ |
michael@0 | 301 | const opus_int16 *pNLSF_Q15, /* I Pointer to input vector [D] */ |
michael@0 | 302 | const opus_int D /* I Input vector dimension (even) */ |
michael@0 | 303 | ); |
michael@0 | 304 | |
michael@0 | 305 | /* Compute reflection coefficients from input signal */ |
michael@0 | 306 | void silk_burg_modified( |
michael@0 | 307 | opus_int32 *res_nrg, /* O Residual energy */ |
michael@0 | 308 | opus_int *res_nrg_Q, /* O Residual energy Q value */ |
michael@0 | 309 | opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ |
michael@0 | 310 | const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ |
michael@0 | 311 | const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ |
michael@0 | 312 | const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ |
michael@0 | 313 | const opus_int nb_subfr, /* I Number of subframes stacked in x */ |
michael@0 | 314 | const opus_int D, /* I Order */ |
michael@0 | 315 | int arch /* I Run-time architecture */ |
michael@0 | 316 | ); |
michael@0 | 317 | |
michael@0 | 318 | /* Copy and multiply a vector by a constant */ |
michael@0 | 319 | void silk_scale_copy_vector16( |
michael@0 | 320 | opus_int16 *data_out, |
michael@0 | 321 | const opus_int16 *data_in, |
michael@0 | 322 | opus_int32 gain_Q16, /* I Gain in Q16 */ |
michael@0 | 323 | const opus_int dataSize /* I Length */ |
michael@0 | 324 | ); |
michael@0 | 325 | |
michael@0 | 326 | /* Some for the LTP related function requires Q26 to work.*/ |
michael@0 | 327 | void silk_scale_vector32_Q26_lshift_18( |
michael@0 | 328 | opus_int32 *data1, /* I/O Q0/Q18 */ |
michael@0 | 329 | opus_int32 gain_Q26, /* I Q26 */ |
michael@0 | 330 | opus_int dataSize /* I length */ |
michael@0 | 331 | ); |
michael@0 | 332 | |
michael@0 | 333 | /********************************************************************/ |
michael@0 | 334 | /* INLINE ARM MATH */ |
michael@0 | 335 | /********************************************************************/ |
michael@0 | 336 | |
michael@0 | 337 | /* return sum( inVec1[i] * inVec2[i] ) */ |
michael@0 | 338 | opus_int32 silk_inner_prod_aligned( |
michael@0 | 339 | const opus_int16 *const inVec1, /* I input vector 1 */ |
michael@0 | 340 | const opus_int16 *const inVec2, /* I input vector 2 */ |
michael@0 | 341 | const opus_int len /* I vector lengths */ |
michael@0 | 342 | ); |
michael@0 | 343 | |
michael@0 | 344 | opus_int32 silk_inner_prod_aligned_scale( |
michael@0 | 345 | const opus_int16 *const inVec1, /* I input vector 1 */ |
michael@0 | 346 | const opus_int16 *const inVec2, /* I input vector 2 */ |
michael@0 | 347 | const opus_int scale, /* I number of bits to shift */ |
michael@0 | 348 | const opus_int len /* I vector lengths */ |
michael@0 | 349 | ); |
michael@0 | 350 | |
michael@0 | 351 | opus_int64 silk_inner_prod16_aligned_64( |
michael@0 | 352 | const opus_int16 *inVec1, /* I input vector 1 */ |
michael@0 | 353 | const opus_int16 *inVec2, /* I input vector 2 */ |
michael@0 | 354 | const opus_int len /* I vector lengths */ |
michael@0 | 355 | ); |
michael@0 | 356 | |
michael@0 | 357 | /********************************************************************/ |
michael@0 | 358 | /* MACROS */ |
michael@0 | 359 | /********************************************************************/ |
michael@0 | 360 | |
michael@0 | 361 | /* Rotate a32 right by 'rot' bits. Negative rot values result in rotating |
michael@0 | 362 | left. Output is 32bit int. |
michael@0 | 363 | Note: contemporary compilers recognize the C expression below and |
michael@0 | 364 | compile it into a 'ror' instruction if available. No need for OPUS_INLINE ASM! */ |
michael@0 | 365 | static OPUS_INLINE opus_int32 silk_ROR32( opus_int32 a32, opus_int rot ) |
michael@0 | 366 | { |
michael@0 | 367 | opus_uint32 x = (opus_uint32) a32; |
michael@0 | 368 | opus_uint32 r = (opus_uint32) rot; |
michael@0 | 369 | opus_uint32 m = (opus_uint32) -rot; |
michael@0 | 370 | if( rot == 0 ) { |
michael@0 | 371 | return a32; |
michael@0 | 372 | } else if( rot < 0 ) { |
michael@0 | 373 | return (opus_int32) ((x << m) | (x >> (32 - m))); |
michael@0 | 374 | } else { |
michael@0 | 375 | return (opus_int32) ((x << (32 - r)) | (x >> r)); |
michael@0 | 376 | } |
michael@0 | 377 | } |
michael@0 | 378 | |
michael@0 | 379 | /* Allocate opus_int16 aligned to 4-byte memory address */ |
michael@0 | 380 | #if EMBEDDED_ARM |
michael@0 | 381 | #define silk_DWORD_ALIGN __attribute__((aligned(4))) |
michael@0 | 382 | #else |
michael@0 | 383 | #define silk_DWORD_ALIGN |
michael@0 | 384 | #endif |
michael@0 | 385 | |
michael@0 | 386 | /* Useful Macros that can be adjusted to other platforms */ |
michael@0 | 387 | #define silk_memcpy(dest, src, size) memcpy((dest), (src), (size)) |
michael@0 | 388 | #define silk_memset(dest, src, size) memset((dest), (src), (size)) |
michael@0 | 389 | #define silk_memmove(dest, src, size) memmove((dest), (src), (size)) |
michael@0 | 390 | |
michael@0 | 391 | /* Fixed point macros */ |
michael@0 | 392 | |
michael@0 | 393 | /* (a32 * b32) output have to be 32bit int */ |
michael@0 | 394 | #define silk_MUL(a32, b32) ((a32) * (b32)) |
michael@0 | 395 | |
michael@0 | 396 | /* (a32 * b32) output have to be 32bit uint */ |
michael@0 | 397 | #define silk_MUL_uint(a32, b32) silk_MUL(a32, b32) |
michael@0 | 398 | |
michael@0 | 399 | /* a32 + (b32 * c32) output have to be 32bit int */ |
michael@0 | 400 | #define silk_MLA(a32, b32, c32) silk_ADD32((a32),((b32) * (c32))) |
michael@0 | 401 | |
michael@0 | 402 | /* a32 + (b32 * c32) output have to be 32bit uint */ |
michael@0 | 403 | #define silk_MLA_uint(a32, b32, c32) silk_MLA(a32, b32, c32) |
michael@0 | 404 | |
michael@0 | 405 | /* ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */ |
michael@0 | 406 | #define silk_SMULTT(a32, b32) (((a32) >> 16) * ((b32) >> 16)) |
michael@0 | 407 | |
michael@0 | 408 | /* a32 + ((a32 >> 16) * (b32 >> 16)) output have to be 32bit int */ |
michael@0 | 409 | #define silk_SMLATT(a32, b32, c32) silk_ADD32((a32),((b32) >> 16) * ((c32) >> 16)) |
michael@0 | 410 | |
michael@0 | 411 | #define silk_SMLALBB(a64, b16, c16) silk_ADD64((a64),(opus_int64)((opus_int32)(b16) * (opus_int32)(c16))) |
michael@0 | 412 | |
michael@0 | 413 | /* (a32 * b32) */ |
michael@0 | 414 | #define silk_SMULL(a32, b32) ((opus_int64)(a32) * /*(opus_int64)*/(b32)) |
michael@0 | 415 | |
michael@0 | 416 | /* Adds two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour |
michael@0 | 417 | (just standard two's complement implementation-specific behaviour) */ |
michael@0 | 418 | #define silk_ADD32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) + (opus_uint32)(b))) |
michael@0 | 419 | /* Subtractss two signed 32-bit values in a way that can overflow, while not relying on undefined behaviour |
michael@0 | 420 | (just standard two's complement implementation-specific behaviour) */ |
michael@0 | 421 | #define silk_SUB32_ovflw(a, b) ((opus_int32)((opus_uint32)(a) - (opus_uint32)(b))) |
michael@0 | 422 | |
michael@0 | 423 | /* Multiply-accumulate macros that allow overflow in the addition (ie, no asserts in debug mode) */ |
michael@0 | 424 | #define silk_MLA_ovflw(a32, b32, c32) silk_ADD32_ovflw((a32), (opus_uint32)(b32) * (opus_uint32)(c32)) |
michael@0 | 425 | #define silk_SMLABB_ovflw(a32, b32, c32) (silk_ADD32_ovflw((a32) , ((opus_int32)((opus_int16)(b32))) * (opus_int32)((opus_int16)(c32)))) |
michael@0 | 426 | |
michael@0 | 427 | #define silk_DIV32_16(a32, b16) ((opus_int32)((a32) / (b16))) |
michael@0 | 428 | #define silk_DIV32(a32, b32) ((opus_int32)((a32) / (b32))) |
michael@0 | 429 | |
michael@0 | 430 | /* These macros enables checking for overflow in silk_API_Debug.h*/ |
michael@0 | 431 | #define silk_ADD16(a, b) ((a) + (b)) |
michael@0 | 432 | #define silk_ADD32(a, b) ((a) + (b)) |
michael@0 | 433 | #define silk_ADD64(a, b) ((a) + (b)) |
michael@0 | 434 | |
michael@0 | 435 | #define silk_SUB16(a, b) ((a) - (b)) |
michael@0 | 436 | #define silk_SUB32(a, b) ((a) - (b)) |
michael@0 | 437 | #define silk_SUB64(a, b) ((a) - (b)) |
michael@0 | 438 | |
michael@0 | 439 | #define silk_SAT8(a) ((a) > silk_int8_MAX ? silk_int8_MAX : \ |
michael@0 | 440 | ((a) < silk_int8_MIN ? silk_int8_MIN : (a))) |
michael@0 | 441 | #define silk_SAT16(a) ((a) > silk_int16_MAX ? silk_int16_MAX : \ |
michael@0 | 442 | ((a) < silk_int16_MIN ? silk_int16_MIN : (a))) |
michael@0 | 443 | #define silk_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : \ |
michael@0 | 444 | ((a) < silk_int32_MIN ? silk_int32_MIN : (a))) |
michael@0 | 445 | |
michael@0 | 446 | #define silk_CHECK_FIT8(a) (a) |
michael@0 | 447 | #define silk_CHECK_FIT16(a) (a) |
michael@0 | 448 | #define silk_CHECK_FIT32(a) (a) |
michael@0 | 449 | |
michael@0 | 450 | #define silk_ADD_SAT16(a, b) (opus_int16)silk_SAT16( silk_ADD32( (opus_int32)(a), (b) ) ) |
michael@0 | 451 | #define silk_ADD_SAT64(a, b) ((((a) + (b)) & 0x8000000000000000LL) == 0 ? \ |
michael@0 | 452 | ((((a) & (b)) & 0x8000000000000000LL) != 0 ? silk_int64_MIN : (a)+(b)) : \ |
michael@0 | 453 | ((((a) | (b)) & 0x8000000000000000LL) == 0 ? silk_int64_MAX : (a)+(b)) ) |
michael@0 | 454 | |
michael@0 | 455 | #define silk_SUB_SAT16(a, b) (opus_int16)silk_SAT16( silk_SUB32( (opus_int32)(a), (b) ) ) |
michael@0 | 456 | #define silk_SUB_SAT64(a, b) ((((a)-(b)) & 0x8000000000000000LL) == 0 ? \ |
michael@0 | 457 | (( (a) & ((b)^0x8000000000000000LL) & 0x8000000000000000LL) ? silk_int64_MIN : (a)-(b)) : \ |
michael@0 | 458 | ((((a)^0x8000000000000000LL) & (b) & 0x8000000000000000LL) ? silk_int64_MAX : (a)-(b)) ) |
michael@0 | 459 | |
michael@0 | 460 | /* Saturation for positive input values */ |
michael@0 | 461 | #define silk_POS_SAT32(a) ((a) > silk_int32_MAX ? silk_int32_MAX : (a)) |
michael@0 | 462 | |
michael@0 | 463 | /* Add with saturation for positive input values */ |
michael@0 | 464 | #define silk_ADD_POS_SAT8(a, b) ((((a)+(b)) & 0x80) ? silk_int8_MAX : ((a)+(b))) |
michael@0 | 465 | #define silk_ADD_POS_SAT16(a, b) ((((a)+(b)) & 0x8000) ? silk_int16_MAX : ((a)+(b))) |
michael@0 | 466 | #define silk_ADD_POS_SAT32(a, b) ((((a)+(b)) & 0x80000000) ? silk_int32_MAX : ((a)+(b))) |
michael@0 | 467 | #define silk_ADD_POS_SAT64(a, b) ((((a)+(b)) & 0x8000000000000000LL) ? silk_int64_MAX : ((a)+(b))) |
michael@0 | 468 | |
michael@0 | 469 | #define silk_LSHIFT8(a, shift) ((opus_int8)((opus_uint8)(a)<<(shift))) /* shift >= 0, shift < 8 */ |
michael@0 | 470 | #define silk_LSHIFT16(a, shift) ((opus_int16)((opus_uint16)(a)<<(shift))) /* shift >= 0, shift < 16 */ |
michael@0 | 471 | #define silk_LSHIFT32(a, shift) ((opus_int32)((opus_uint32)(a)<<(shift))) /* shift >= 0, shift < 32 */ |
michael@0 | 472 | #define silk_LSHIFT64(a, shift) ((opus_int64)((opus_uint64)(a)<<(shift))) /* shift >= 0, shift < 64 */ |
michael@0 | 473 | #define silk_LSHIFT(a, shift) silk_LSHIFT32(a, shift) /* shift >= 0, shift < 32 */ |
michael@0 | 474 | |
michael@0 | 475 | #define silk_RSHIFT8(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 8 */ |
michael@0 | 476 | #define silk_RSHIFT16(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 16 */ |
michael@0 | 477 | #define silk_RSHIFT32(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 32 */ |
michael@0 | 478 | #define silk_RSHIFT64(a, shift) ((a)>>(shift)) /* shift >= 0, shift < 64 */ |
michael@0 | 479 | #define silk_RSHIFT(a, shift) silk_RSHIFT32(a, shift) /* shift >= 0, shift < 32 */ |
michael@0 | 480 | |
michael@0 | 481 | /* saturates before shifting */ |
michael@0 | 482 | #define silk_LSHIFT_SAT32(a, shift) (silk_LSHIFT32( silk_LIMIT( (a), silk_RSHIFT32( silk_int32_MIN, (shift) ), \ |
michael@0 | 483 | silk_RSHIFT32( silk_int32_MAX, (shift) ) ), (shift) )) |
michael@0 | 484 | |
michael@0 | 485 | #define silk_LSHIFT_ovflw(a, shift) ((opus_int32)((opus_uint32)(a) << (shift))) /* shift >= 0, allowed to overflow */ |
michael@0 | 486 | #define silk_LSHIFT_uint(a, shift) ((a) << (shift)) /* shift >= 0 */ |
michael@0 | 487 | #define silk_RSHIFT_uint(a, shift) ((a) >> (shift)) /* shift >= 0 */ |
michael@0 | 488 | |
michael@0 | 489 | #define silk_ADD_LSHIFT(a, b, shift) ((a) + silk_LSHIFT((b), (shift))) /* shift >= 0 */ |
michael@0 | 490 | #define silk_ADD_LSHIFT32(a, b, shift) silk_ADD32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */ |
michael@0 | 491 | #define silk_ADD_LSHIFT_uint(a, b, shift) ((a) + silk_LSHIFT_uint((b), (shift))) /* shift >= 0 */ |
michael@0 | 492 | #define silk_ADD_RSHIFT(a, b, shift) ((a) + silk_RSHIFT((b), (shift))) /* shift >= 0 */ |
michael@0 | 493 | #define silk_ADD_RSHIFT32(a, b, shift) silk_ADD32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */ |
michael@0 | 494 | #define silk_ADD_RSHIFT_uint(a, b, shift) ((a) + silk_RSHIFT_uint((b), (shift))) /* shift >= 0 */ |
michael@0 | 495 | #define silk_SUB_LSHIFT32(a, b, shift) silk_SUB32((a), silk_LSHIFT32((b), (shift))) /* shift >= 0 */ |
michael@0 | 496 | #define silk_SUB_RSHIFT32(a, b, shift) silk_SUB32((a), silk_RSHIFT32((b), (shift))) /* shift >= 0 */ |
michael@0 | 497 | |
michael@0 | 498 | /* Requires that shift > 0 */ |
michael@0 | 499 | #define silk_RSHIFT_ROUND(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1) |
michael@0 | 500 | #define silk_RSHIFT_ROUND64(a, shift) ((shift) == 1 ? ((a) >> 1) + ((a) & 1) : (((a) >> ((shift) - 1)) + 1) >> 1) |
michael@0 | 501 | |
michael@0 | 502 | /* Number of rightshift required to fit the multiplication */ |
michael@0 | 503 | #define silk_NSHIFT_MUL_32_32(a, b) ( -(31- (32-silk_CLZ32(silk_abs(a)) + (32-silk_CLZ32(silk_abs(b))))) ) |
michael@0 | 504 | #define silk_NSHIFT_MUL_16_16(a, b) ( -(15- (16-silk_CLZ16(silk_abs(a)) + (16-silk_CLZ16(silk_abs(b))))) ) |
michael@0 | 505 | |
michael@0 | 506 | |
michael@0 | 507 | #define silk_min(a, b) (((a) < (b)) ? (a) : (b)) |
michael@0 | 508 | #define silk_max(a, b) (((a) > (b)) ? (a) : (b)) |
michael@0 | 509 | |
michael@0 | 510 | /* Macro to convert floating-point constants to fixed-point */ |
michael@0 | 511 | #define SILK_FIX_CONST( C, Q ) ((opus_int32)((C) * ((opus_int64)1 << (Q)) + 0.5)) |
michael@0 | 512 | |
michael@0 | 513 | /* silk_min() versions with typecast in the function call */ |
michael@0 | 514 | static OPUS_INLINE opus_int silk_min_int(opus_int a, opus_int b) |
michael@0 | 515 | { |
michael@0 | 516 | return (((a) < (b)) ? (a) : (b)); |
michael@0 | 517 | } |
michael@0 | 518 | static OPUS_INLINE opus_int16 silk_min_16(opus_int16 a, opus_int16 b) |
michael@0 | 519 | { |
michael@0 | 520 | return (((a) < (b)) ? (a) : (b)); |
michael@0 | 521 | } |
michael@0 | 522 | static OPUS_INLINE opus_int32 silk_min_32(opus_int32 a, opus_int32 b) |
michael@0 | 523 | { |
michael@0 | 524 | return (((a) < (b)) ? (a) : (b)); |
michael@0 | 525 | } |
michael@0 | 526 | static OPUS_INLINE opus_int64 silk_min_64(opus_int64 a, opus_int64 b) |
michael@0 | 527 | { |
michael@0 | 528 | return (((a) < (b)) ? (a) : (b)); |
michael@0 | 529 | } |
michael@0 | 530 | |
michael@0 | 531 | /* silk_min() versions with typecast in the function call */ |
michael@0 | 532 | static OPUS_INLINE opus_int silk_max_int(opus_int a, opus_int b) |
michael@0 | 533 | { |
michael@0 | 534 | return (((a) > (b)) ? (a) : (b)); |
michael@0 | 535 | } |
michael@0 | 536 | static OPUS_INLINE opus_int16 silk_max_16(opus_int16 a, opus_int16 b) |
michael@0 | 537 | { |
michael@0 | 538 | return (((a) > (b)) ? (a) : (b)); |
michael@0 | 539 | } |
michael@0 | 540 | static OPUS_INLINE opus_int32 silk_max_32(opus_int32 a, opus_int32 b) |
michael@0 | 541 | { |
michael@0 | 542 | return (((a) > (b)) ? (a) : (b)); |
michael@0 | 543 | } |
michael@0 | 544 | static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b) |
michael@0 | 545 | { |
michael@0 | 546 | return (((a) > (b)) ? (a) : (b)); |
michael@0 | 547 | } |
michael@0 | 548 | |
michael@0 | 549 | #define silk_LIMIT( a, limit1, limit2) ((limit1) > (limit2) ? ((a) > (limit1) ? (limit1) : ((a) < (limit2) ? (limit2) : (a))) \ |
michael@0 | 550 | : ((a) > (limit2) ? (limit2) : ((a) < (limit1) ? (limit1) : (a)))) |
michael@0 | 551 | |
michael@0 | 552 | #define silk_LIMIT_int silk_LIMIT |
michael@0 | 553 | #define silk_LIMIT_16 silk_LIMIT |
michael@0 | 554 | #define silk_LIMIT_32 silk_LIMIT |
michael@0 | 555 | |
michael@0 | 556 | #define silk_abs(a) (((a) > 0) ? (a) : -(a)) /* Be careful, silk_abs returns wrong when input equals to silk_intXX_MIN */ |
michael@0 | 557 | #define silk_abs_int(a) (((a) ^ ((a) >> (8 * sizeof(a) - 1))) - ((a) >> (8 * sizeof(a) - 1))) |
michael@0 | 558 | #define silk_abs_int32(a) (((a) ^ ((a) >> 31)) - ((a) >> 31)) |
michael@0 | 559 | #define silk_abs_int64(a) (((a) > 0) ? (a) : -(a)) |
michael@0 | 560 | |
michael@0 | 561 | #define silk_sign(a) ((a) > 0 ? 1 : ( (a) < 0 ? -1 : 0 )) |
michael@0 | 562 | |
michael@0 | 563 | /* PSEUDO-RANDOM GENERATOR */ |
michael@0 | 564 | /* Make sure to store the result as the seed for the next call (also in between */ |
michael@0 | 565 | /* frames), otherwise result won't be random at all. When only using some of the */ |
michael@0 | 566 | /* bits, take the most significant bits by right-shifting. */ |
michael@0 | 567 | #define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165)) |
michael@0 | 568 | |
michael@0 | 569 | /* Add some multiplication functions that can be easily mapped to ARM. */ |
michael@0 | 570 | |
michael@0 | 571 | /* silk_SMMUL: Signed top word multiply. |
michael@0 | 572 | ARMv6 2 instruction cycles. |
michael@0 | 573 | ARMv3M+ 3 instruction cycles. use SMULL and ignore LSB registers.(except xM)*/ |
michael@0 | 574 | /*#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT(silk_SMLAL(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)), 16)*/ |
michael@0 | 575 | /* the following seems faster on x86 */ |
michael@0 | 576 | #define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32) |
michael@0 | 577 | |
michael@0 | 578 | #include "Inlines.h" |
michael@0 | 579 | #include "MacroCount.h" |
michael@0 | 580 | #include "MacroDebug.h" |
michael@0 | 581 | |
michael@0 | 582 | #ifdef OPUS_ARM_INLINE_ASM |
michael@0 | 583 | #include "arm/SigProc_FIX_armv4.h" |
michael@0 | 584 | #endif |
michael@0 | 585 | |
michael@0 | 586 | #ifdef OPUS_ARM_INLINE_EDSP |
michael@0 | 587 | #include "arm/SigProc_FIX_armv5e.h" |
michael@0 | 588 | #endif |
michael@0 | 589 | |
michael@0 | 590 | #ifdef __cplusplus |
michael@0 | 591 | } |
michael@0 | 592 | #endif |
michael@0 | 593 | |
michael@0 | 594 | #endif /* SILK_SIGPROC_FIX_H */ |