michael@0: /***********************************************************************
michael@0: Copyright (c) 2006-2011, Skype Limited. All rights reserved.
michael@0: Redistribution and use in source and binary forms, with or without
michael@0: modification, are permitted provided that the following conditions
michael@0: are met:
michael@0: - Redistributions of source code must retain the above copyright notice,
michael@0: this list of conditions and the following disclaimer.
michael@0: - Redistributions in binary form must reproduce the above copyright
michael@0: notice, this list of conditions and the following disclaimer in the
michael@0: documentation and/or other materials provided with the distribution.
michael@0: - Neither the name of Internet Society, IETF or IETF Trust, nor the
michael@0: names of specific contributors, may be used to endorse or promote
michael@0: products derived from this software without specific prior written
michael@0: permission.
michael@0: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
michael@0: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
michael@0: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
michael@0: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
michael@0: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
michael@0: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
michael@0: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
michael@0: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
michael@0: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
michael@0: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
michael@0: POSSIBILITY OF SUCH DAMAGE.
michael@0: ***********************************************************************/
michael@0: 
michael@0: #ifdef HAVE_CONFIG_H
michael@0: #include "config.h"
michael@0: #endif
michael@0: 
michael@0: #include "main_FIX.h"
michael@0: #include "stack_alloc.h"
michael@0: #include "tuning_parameters.h"
michael@0: 
michael@0: /*****************************/
michael@0: /* Internal function headers */
michael@0: /*****************************/
michael@0: 
michael@0: typedef struct {
michael@0:     opus_int32 Q36_part;
michael@0:     opus_int32 Q48_part;
michael@0: } inv_D_t;
michael@0: 
michael@0: /* Factorize square matrix A into LDL form */
michael@0: static OPUS_INLINE void silk_LDL_factorize_FIX(
michael@0:     opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
michael@0:     opus_int            M,          /* I   Size of Matrix                                               */
michael@0:     opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
michael@0:     inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
michael@0: );
michael@0: 
michael@0: /* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
michael@0: static OPUS_INLINE void silk_LS_SolveFirst_FIX(
michael@0:     const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
michael@0:     opus_int            M,          /* I    Dim of Matrix equation                                      */
michael@0:     const opus_int32    *b,         /* I    b Vector                                                    */
michael@0:     opus_int32          *x_Q16      /* O    x Vector                                                    */
michael@0: );
michael@0: 
michael@0: /* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
michael@0: static OPUS_INLINE void silk_LS_SolveLast_FIX(
michael@0:     const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
michael@0:     const opus_int      M,          /* I    Dim of Matrix equation                                      */
michael@0:     const opus_int32    *b,         /* I    b Vector                                                    */
michael@0:     opus_int32          *x_Q16      /* O    x Vector                                                    */
michael@0: );
michael@0: 
michael@0: static OPUS_INLINE void silk_LS_divide_Q16_FIX(
michael@0:     opus_int32          T[],        /* I/O  Numenator vector                                            */
michael@0:     inv_D_t             *inv_D,     /* I    1 / D vector                                                */
michael@0:     opus_int            M           /* I    dimension                                                   */
michael@0: );
michael@0: 
michael@0: /* Solves Ax = b, assuming A is symmetric */
michael@0: void silk_solve_LDL_FIX(
michael@0:     opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */
michael@0:     opus_int                        M,                                      /* I    Size of matrix                                                              */
michael@0:     const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */
michael@0:     opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */
michael@0: )
michael@0: {
michael@0:     VARDECL( opus_int32, L_Q16 );
michael@0:     opus_int32 Y[      MAX_MATRIX_SIZE ];
michael@0:     inv_D_t   inv_D[  MAX_MATRIX_SIZE ];
michael@0:     SAVE_STACK;
michael@0: 
michael@0:     silk_assert( M <= MAX_MATRIX_SIZE );
michael@0:     ALLOC( L_Q16, M * M, opus_int32 );
michael@0: 
michael@0:     /***************************************************
michael@0:     Factorize A by LDL such that A = L*D*L',
michael@0:     where L is lower triangular with ones on diagonal
michael@0:     ****************************************************/
michael@0:     silk_LDL_factorize_FIX( A, M, L_Q16, inv_D );
michael@0: 
michael@0:     /****************************************************
michael@0:     * substitute D*L'*x = Y. ie:
michael@0:     L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b
michael@0:     ******************************************************/
michael@0:     silk_LS_SolveFirst_FIX( L_Q16, M, b, Y );
michael@0: 
michael@0:     /****************************************************
michael@0:     D*L'*x = Y <=> L'*x = inv(D)*Y, because D is
michael@0:     diagonal just multiply with 1/d_i
michael@0:     ****************************************************/
michael@0:     silk_LS_divide_Q16_FIX( Y, inv_D, M );
michael@0: 
michael@0:     /****************************************************
michael@0:     x = inv(L') * inv(D) * Y
michael@0:     *****************************************************/
michael@0:     silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );
michael@0:     RESTORE_STACK;
michael@0: }
michael@0: 
michael@0: static OPUS_INLINE void silk_LDL_factorize_FIX(
michael@0:     opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
michael@0:     opus_int            M,          /* I   Size of Matrix                                               */
michael@0:     opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
michael@0:     inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
michael@0: )
michael@0: {
michael@0:     opus_int   i, j, k, status, loop_count;
michael@0:     const opus_int32 *ptr1, *ptr2;
michael@0:     opus_int32 diag_min_value, tmp_32, err;
michael@0:     opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ];
michael@0:     opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48;
michael@0: 
michael@0:     silk_assert( M <= MAX_MATRIX_SIZE );
michael@0: 
michael@0:     status = 1;
michael@0:     diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 );
michael@0:     for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) {
michael@0:         status = 0;
michael@0:         for( j = 0; j < M; j++ ) {
michael@0:             ptr1 = matrix_adr( L_Q16, j, 0, M );
michael@0:             tmp_32 = 0;
michael@0:             for( i = 0; i < j; i++ ) {
michael@0:                 v_Q0[ i ] = silk_SMULWW(         D_Q0[ i ], ptr1[ i ] ); /* Q0 */
michael@0:                 tmp_32    = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */
michael@0:             }
michael@0:             tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 );
michael@0: 
michael@0:             if( tmp_32 < diag_min_value ) {
michael@0:                 tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 );
michael@0:                 /* Matrix not positive semi-definite, or ill conditioned */
michael@0:                 for( i = 0; i < M; i++ ) {
michael@0:                     matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 );
michael@0:                 }
michael@0:                 status = 1;
michael@0:                 break;
michael@0:             }
michael@0:             D_Q0[ j ] = tmp_32;                         /* always < max(Correlation) */
michael@0: 
michael@0:             /* two-step division */
michael@0:             one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 );                    /* Q36 */
michael@0:             one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 );                   /* Q40 */
michael@0:             err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) );     /* Q24 */
michael@0:             one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 );                 /* Q48 */
michael@0: 
michael@0:             /* Save 1/Ds */
michael@0:             inv_D[ j ].Q36_part = one_div_diag_Q36;
michael@0:             inv_D[ j ].Q48_part = one_div_diag_Q48;
michael@0: 
michael@0:             matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */
michael@0:             ptr1 = matrix_adr( A, j, 0, M );
michael@0:             ptr2 = matrix_adr( L_Q16, j + 1, 0, M );
michael@0:             for( i = j + 1; i < M; i++ ) {
michael@0:                 tmp_32 = 0;
michael@0:                 for( k = 0; k < j; k++ ) {
michael@0:                     tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */
michael@0:                 }
michael@0:                 tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */
michael@0: 
michael@0:                 /* tmp_32 / D_Q0[j] : Divide to Q16 */
michael@0:                 matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ),
michael@0:                     silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
michael@0: 
michael@0:                 /* go to next column */
michael@0:                 ptr2 += M;
michael@0:             }
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     silk_assert( status == 0 );
michael@0: }
michael@0: 
michael@0: static OPUS_INLINE void silk_LS_divide_Q16_FIX(
michael@0:     opus_int32          T[],        /* I/O  Numenator vector                                            */
michael@0:     inv_D_t             *inv_D,     /* I    1 / D vector                                                */
michael@0:     opus_int            M           /* I    dimension                                                   */
michael@0: )
michael@0: {
michael@0:     opus_int   i;
michael@0:     opus_int32 tmp_32;
michael@0:     opus_int32 one_div_diag_Q36, one_div_diag_Q48;
michael@0: 
michael@0:     for( i = 0; i < M; i++ ) {
michael@0:         one_div_diag_Q36 = inv_D[ i ].Q36_part;
michael@0:         one_div_diag_Q48 = inv_D[ i ].Q48_part;
michael@0: 
michael@0:         tmp_32 = T[ i ];
michael@0:         T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
michael@0:     }
michael@0: }
michael@0: 
michael@0: /* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
michael@0: static OPUS_INLINE void silk_LS_SolveFirst_FIX(
michael@0:     const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
michael@0:     opus_int            M,          /* I    Dim of Matrix equation                                      */
michael@0:     const opus_int32    *b,         /* I    b Vector                                                    */
michael@0:     opus_int32          *x_Q16      /* O    x Vector                                                    */
michael@0: )
michael@0: {
michael@0:     opus_int i, j;
michael@0:     const opus_int32 *ptr32;
michael@0:     opus_int32 tmp_32;
michael@0: 
michael@0:     for( i = 0; i < M; i++ ) {
michael@0:         ptr32 = matrix_adr( L_Q16, i, 0, M );
michael@0:         tmp_32 = 0;
michael@0:         for( j = 0; j < i; j++ ) {
michael@0:             tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] );
michael@0:         }
michael@0:         x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
michael@0:     }
michael@0: }
michael@0: 
michael@0: /* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
michael@0: static OPUS_INLINE void silk_LS_SolveLast_FIX(
michael@0:     const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
michael@0:     const opus_int      M,          /* I    Dim of Matrix equation                                      */
michael@0:     const opus_int32    *b,         /* I    b Vector                                                    */
michael@0:     opus_int32          *x_Q16      /* O    x Vector                                                    */
michael@0: )
michael@0: {
michael@0:     opus_int i, j;
michael@0:     const opus_int32 *ptr32;
michael@0:     opus_int32 tmp_32;
michael@0: 
michael@0:     for( i = M - 1; i >= 0; i-- ) {
michael@0:         ptr32 = matrix_adr( L_Q16, 0, i, M );
michael@0:         tmp_32 = 0;
michael@0:         for( j = M - 1; j > i; j-- ) {
michael@0:             tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] );
michael@0:         }
michael@0:         x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
michael@0:     }
michael@0: }