1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libopus/silk/fixed/solve_LS_FIX.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,249 @@ 1.4 +/*********************************************************************** 1.5 +Copyright (c) 2006-2011, Skype Limited. All rights reserved. 1.6 +Redistribution and use in source and binary forms, with or without 1.7 +modification, are permitted provided that the following conditions 1.8 +are met: 1.9 +- Redistributions of source code must retain the above copyright notice, 1.10 +this list of conditions and the following disclaimer. 1.11 +- Redistributions in binary form must reproduce the above copyright 1.12 +notice, this list of conditions and the following disclaimer in the 1.13 +documentation and/or other materials provided with the distribution. 1.14 +- Neither the name of Internet Society, IETF or IETF Trust, nor the 1.15 +names of specific contributors, may be used to endorse or promote 1.16 +products derived from this software without specific prior written 1.17 +permission. 1.18 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 1.19 +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.20 +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1.21 +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 1.22 +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 1.23 +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 1.24 +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 1.25 +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 1.26 +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 1.27 +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 1.28 +POSSIBILITY OF SUCH DAMAGE. 1.29 +***********************************************************************/ 1.30 + 1.31 +#ifdef HAVE_CONFIG_H 1.32 +#include "config.h" 1.33 +#endif 1.34 + 1.35 +#include "main_FIX.h" 1.36 +#include "stack_alloc.h" 1.37 +#include "tuning_parameters.h" 1.38 + 1.39 +/*****************************/ 1.40 +/* Internal function headers */ 1.41 +/*****************************/ 1.42 + 1.43 +typedef struct { 1.44 + opus_int32 Q36_part; 1.45 + opus_int32 Q48_part; 1.46 +} inv_D_t; 1.47 + 1.48 +/* Factorize square matrix A into LDL form */ 1.49 +static OPUS_INLINE void silk_LDL_factorize_FIX( 1.50 + opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ 1.51 + opus_int M, /* I Size of Matrix */ 1.52 + opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ 1.53 + inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ 1.54 +); 1.55 + 1.56 +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ 1.57 +static OPUS_INLINE void silk_LS_SolveFirst_FIX( 1.58 + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ 1.59 + opus_int M, /* I Dim of Matrix equation */ 1.60 + const opus_int32 *b, /* I b Vector */ 1.61 + opus_int32 *x_Q16 /* O x Vector */ 1.62 +); 1.63 + 1.64 +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ 1.65 +static OPUS_INLINE void silk_LS_SolveLast_FIX( 1.66 + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ 1.67 + const opus_int M, /* I Dim of Matrix equation */ 1.68 + const opus_int32 *b, /* I b Vector */ 1.69 + opus_int32 *x_Q16 /* O x Vector */ 1.70 +); 1.71 + 1.72 +static OPUS_INLINE void silk_LS_divide_Q16_FIX( 1.73 + opus_int32 T[], /* I/O Numenator vector */ 1.74 + inv_D_t *inv_D, /* I 1 / D vector */ 1.75 + opus_int M /* I dimension */ 1.76 +); 1.77 + 1.78 +/* Solves Ax = b, assuming A is symmetric */ 1.79 +void silk_solve_LDL_FIX( 1.80 + opus_int32 *A, /* I Pointer to symetric square matrix A */ 1.81 + opus_int M, /* I Size of matrix */ 1.82 + const opus_int32 *b, /* I Pointer to b vector */ 1.83 + opus_int32 *x_Q16 /* O Pointer to x solution vector */ 1.84 +) 1.85 +{ 1.86 + VARDECL( opus_int32, L_Q16 ); 1.87 + opus_int32 Y[ MAX_MATRIX_SIZE ]; 1.88 + inv_D_t inv_D[ MAX_MATRIX_SIZE ]; 1.89 + SAVE_STACK; 1.90 + 1.91 + silk_assert( M <= MAX_MATRIX_SIZE ); 1.92 + ALLOC( L_Q16, M * M, opus_int32 ); 1.93 + 1.94 + /*************************************************** 1.95 + Factorize A by LDL such that A = L*D*L', 1.96 + where L is lower triangular with ones on diagonal 1.97 + ****************************************************/ 1.98 + silk_LDL_factorize_FIX( A, M, L_Q16, inv_D ); 1.99 + 1.100 + /**************************************************** 1.101 + * substitute D*L'*x = Y. ie: 1.102 + L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b 1.103 + ******************************************************/ 1.104 + silk_LS_SolveFirst_FIX( L_Q16, M, b, Y ); 1.105 + 1.106 + /**************************************************** 1.107 + D*L'*x = Y <=> L'*x = inv(D)*Y, because D is 1.108 + diagonal just multiply with 1/d_i 1.109 + ****************************************************/ 1.110 + silk_LS_divide_Q16_FIX( Y, inv_D, M ); 1.111 + 1.112 + /**************************************************** 1.113 + x = inv(L') * inv(D) * Y 1.114 + *****************************************************/ 1.115 + silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 ); 1.116 + RESTORE_STACK; 1.117 +} 1.118 + 1.119 +static OPUS_INLINE void silk_LDL_factorize_FIX( 1.120 + opus_int32 *A, /* I/O Pointer to Symetric Square Matrix */ 1.121 + opus_int M, /* I Size of Matrix */ 1.122 + opus_int32 *L_Q16, /* I/O Pointer to Square Upper triangular Matrix */ 1.123 + inv_D_t *inv_D /* I/O Pointer to vector holding inverted diagonal elements of D */ 1.124 +) 1.125 +{ 1.126 + opus_int i, j, k, status, loop_count; 1.127 + const opus_int32 *ptr1, *ptr2; 1.128 + opus_int32 diag_min_value, tmp_32, err; 1.129 + opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ]; 1.130 + opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48; 1.131 + 1.132 + silk_assert( M <= MAX_MATRIX_SIZE ); 1.133 + 1.134 + status = 1; 1.135 + diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 ); 1.136 + for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) { 1.137 + status = 0; 1.138 + for( j = 0; j < M; j++ ) { 1.139 + ptr1 = matrix_adr( L_Q16, j, 0, M ); 1.140 + tmp_32 = 0; 1.141 + for( i = 0; i < j; i++ ) { 1.142 + v_Q0[ i ] = silk_SMULWW( D_Q0[ i ], ptr1[ i ] ); /* Q0 */ 1.143 + tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */ 1.144 + } 1.145 + tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 ); 1.146 + 1.147 + if( tmp_32 < diag_min_value ) { 1.148 + tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 ); 1.149 + /* Matrix not positive semi-definite, or ill conditioned */ 1.150 + for( i = 0; i < M; i++ ) { 1.151 + matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 ); 1.152 + } 1.153 + status = 1; 1.154 + break; 1.155 + } 1.156 + D_Q0[ j ] = tmp_32; /* always < max(Correlation) */ 1.157 + 1.158 + /* two-step division */ 1.159 + one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 ); /* Q36 */ 1.160 + one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 ); /* Q40 */ 1.161 + err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) ); /* Q24 */ 1.162 + one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 ); /* Q48 */ 1.163 + 1.164 + /* Save 1/Ds */ 1.165 + inv_D[ j ].Q36_part = one_div_diag_Q36; 1.166 + inv_D[ j ].Q48_part = one_div_diag_Q48; 1.167 + 1.168 + matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */ 1.169 + ptr1 = matrix_adr( A, j, 0, M ); 1.170 + ptr2 = matrix_adr( L_Q16, j + 1, 0, M ); 1.171 + for( i = j + 1; i < M; i++ ) { 1.172 + tmp_32 = 0; 1.173 + for( k = 0; k < j; k++ ) { 1.174 + tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */ 1.175 + } 1.176 + tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */ 1.177 + 1.178 + /* tmp_32 / D_Q0[j] : Divide to Q16 */ 1.179 + matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), 1.180 + silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); 1.181 + 1.182 + /* go to next column */ 1.183 + ptr2 += M; 1.184 + } 1.185 + } 1.186 + } 1.187 + 1.188 + silk_assert( status == 0 ); 1.189 +} 1.190 + 1.191 +static OPUS_INLINE void silk_LS_divide_Q16_FIX( 1.192 + opus_int32 T[], /* I/O Numenator vector */ 1.193 + inv_D_t *inv_D, /* I 1 / D vector */ 1.194 + opus_int M /* I dimension */ 1.195 +) 1.196 +{ 1.197 + opus_int i; 1.198 + opus_int32 tmp_32; 1.199 + opus_int32 one_div_diag_Q36, one_div_diag_Q48; 1.200 + 1.201 + for( i = 0; i < M; i++ ) { 1.202 + one_div_diag_Q36 = inv_D[ i ].Q36_part; 1.203 + one_div_diag_Q48 = inv_D[ i ].Q48_part; 1.204 + 1.205 + tmp_32 = T[ i ]; 1.206 + T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) ); 1.207 + } 1.208 +} 1.209 + 1.210 +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */ 1.211 +static OPUS_INLINE void silk_LS_SolveFirst_FIX( 1.212 + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ 1.213 + opus_int M, /* I Dim of Matrix equation */ 1.214 + const opus_int32 *b, /* I b Vector */ 1.215 + opus_int32 *x_Q16 /* O x Vector */ 1.216 +) 1.217 +{ 1.218 + opus_int i, j; 1.219 + const opus_int32 *ptr32; 1.220 + opus_int32 tmp_32; 1.221 + 1.222 + for( i = 0; i < M; i++ ) { 1.223 + ptr32 = matrix_adr( L_Q16, i, 0, M ); 1.224 + tmp_32 = 0; 1.225 + for( j = 0; j < i; j++ ) { 1.226 + tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] ); 1.227 + } 1.228 + x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); 1.229 + } 1.230 +} 1.231 + 1.232 +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */ 1.233 +static OPUS_INLINE void silk_LS_SolveLast_FIX( 1.234 + const opus_int32 *L_Q16, /* I Pointer to Lower Triangular Matrix */ 1.235 + const opus_int M, /* I Dim of Matrix equation */ 1.236 + const opus_int32 *b, /* I b Vector */ 1.237 + opus_int32 *x_Q16 /* O x Vector */ 1.238 +) 1.239 +{ 1.240 + opus_int i, j; 1.241 + const opus_int32 *ptr32; 1.242 + opus_int32 tmp_32; 1.243 + 1.244 + for( i = M - 1; i >= 0; i-- ) { 1.245 + ptr32 = matrix_adr( L_Q16, 0, i, M ); 1.246 + tmp_32 = 0; 1.247 + for( j = M - 1; j > i; j-- ) { 1.248 + tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] ); 1.249 + } 1.250 + x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 ); 1.251 + } 1.252 +}