media/libopus/silk/fixed/solve_LS_FIX.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libopus/silk/fixed/solve_LS_FIX.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,249 @@
     1.4 +/***********************************************************************
     1.5 +Copyright (c) 2006-2011, Skype Limited. All rights reserved.
     1.6 +Redistribution and use in source and binary forms, with or without
     1.7 +modification, are permitted provided that the following conditions
     1.8 +are met:
     1.9 +- Redistributions of source code must retain the above copyright notice,
    1.10 +this list of conditions and the following disclaimer.
    1.11 +- Redistributions in binary form must reproduce the above copyright
    1.12 +notice, this list of conditions and the following disclaimer in the
    1.13 +documentation and/or other materials provided with the distribution.
    1.14 +- Neither the name of Internet Society, IETF or IETF Trust, nor the
    1.15 +names of specific contributors, may be used to endorse or promote
    1.16 +products derived from this software without specific prior written
    1.17 +permission.
    1.18 +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    1.19 +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.20 +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    1.21 +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    1.22 +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    1.23 +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    1.24 +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    1.25 +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    1.26 +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    1.27 +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    1.28 +POSSIBILITY OF SUCH DAMAGE.
    1.29 +***********************************************************************/
    1.30 +
    1.31 +#ifdef HAVE_CONFIG_H
    1.32 +#include "config.h"
    1.33 +#endif
    1.34 +
    1.35 +#include "main_FIX.h"
    1.36 +#include "stack_alloc.h"
    1.37 +#include "tuning_parameters.h"
    1.38 +
    1.39 +/*****************************/
    1.40 +/* Internal function headers */
    1.41 +/*****************************/
    1.42 +
    1.43 +typedef struct {
    1.44 +    opus_int32 Q36_part;
    1.45 +    opus_int32 Q48_part;
    1.46 +} inv_D_t;
    1.47 +
    1.48 +/* Factorize square matrix A into LDL form */
    1.49 +static OPUS_INLINE void silk_LDL_factorize_FIX(
    1.50 +    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
    1.51 +    opus_int            M,          /* I   Size of Matrix                                               */
    1.52 +    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
    1.53 +    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
    1.54 +);
    1.55 +
    1.56 +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
    1.57 +static OPUS_INLINE void silk_LS_SolveFirst_FIX(
    1.58 +    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
    1.59 +    opus_int            M,          /* I    Dim of Matrix equation                                      */
    1.60 +    const opus_int32    *b,         /* I    b Vector                                                    */
    1.61 +    opus_int32          *x_Q16      /* O    x Vector                                                    */
    1.62 +);
    1.63 +
    1.64 +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
    1.65 +static OPUS_INLINE void silk_LS_SolveLast_FIX(
    1.66 +    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
    1.67 +    const opus_int      M,          /* I    Dim of Matrix equation                                      */
    1.68 +    const opus_int32    *b,         /* I    b Vector                                                    */
    1.69 +    opus_int32          *x_Q16      /* O    x Vector                                                    */
    1.70 +);
    1.71 +
    1.72 +static OPUS_INLINE void silk_LS_divide_Q16_FIX(
    1.73 +    opus_int32          T[],        /* I/O  Numenator vector                                            */
    1.74 +    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
    1.75 +    opus_int            M           /* I    dimension                                                   */
    1.76 +);
    1.77 +
    1.78 +/* Solves Ax = b, assuming A is symmetric */
    1.79 +void silk_solve_LDL_FIX(
    1.80 +    opus_int32                      *A,                                     /* I    Pointer to symetric square matrix A                                         */
    1.81 +    opus_int                        M,                                      /* I    Size of matrix                                                              */
    1.82 +    const opus_int32                *b,                                     /* I    Pointer to b vector                                                         */
    1.83 +    opus_int32                      *x_Q16                                  /* O    Pointer to x solution vector                                                */
    1.84 +)
    1.85 +{
    1.86 +    VARDECL( opus_int32, L_Q16 );
    1.87 +    opus_int32 Y[      MAX_MATRIX_SIZE ];
    1.88 +    inv_D_t   inv_D[  MAX_MATRIX_SIZE ];
    1.89 +    SAVE_STACK;
    1.90 +
    1.91 +    silk_assert( M <= MAX_MATRIX_SIZE );
    1.92 +    ALLOC( L_Q16, M * M, opus_int32 );
    1.93 +
    1.94 +    /***************************************************
    1.95 +    Factorize A by LDL such that A = L*D*L',
    1.96 +    where L is lower triangular with ones on diagonal
    1.97 +    ****************************************************/
    1.98 +    silk_LDL_factorize_FIX( A, M, L_Q16, inv_D );
    1.99 +
   1.100 +    /****************************************************
   1.101 +    * substitute D*L'*x = Y. ie:
   1.102 +    L*D*L'*x = b => L*Y = b <=> Y = inv(L)*b
   1.103 +    ******************************************************/
   1.104 +    silk_LS_SolveFirst_FIX( L_Q16, M, b, Y );
   1.105 +
   1.106 +    /****************************************************
   1.107 +    D*L'*x = Y <=> L'*x = inv(D)*Y, because D is
   1.108 +    diagonal just multiply with 1/d_i
   1.109 +    ****************************************************/
   1.110 +    silk_LS_divide_Q16_FIX( Y, inv_D, M );
   1.111 +
   1.112 +    /****************************************************
   1.113 +    x = inv(L') * inv(D) * Y
   1.114 +    *****************************************************/
   1.115 +    silk_LS_SolveLast_FIX( L_Q16, M, Y, x_Q16 );
   1.116 +    RESTORE_STACK;
   1.117 +}
   1.118 +
   1.119 +static OPUS_INLINE void silk_LDL_factorize_FIX(
   1.120 +    opus_int32          *A,         /* I/O Pointer to Symetric Square Matrix                            */
   1.121 +    opus_int            M,          /* I   Size of Matrix                                               */
   1.122 +    opus_int32          *L_Q16,     /* I/O Pointer to Square Upper triangular Matrix                    */
   1.123 +    inv_D_t             *inv_D      /* I/O Pointer to vector holding inverted diagonal elements of D    */
   1.124 +)
   1.125 +{
   1.126 +    opus_int   i, j, k, status, loop_count;
   1.127 +    const opus_int32 *ptr1, *ptr2;
   1.128 +    opus_int32 diag_min_value, tmp_32, err;
   1.129 +    opus_int32 v_Q0[ MAX_MATRIX_SIZE ], D_Q0[ MAX_MATRIX_SIZE ];
   1.130 +    opus_int32 one_div_diag_Q36, one_div_diag_Q40, one_div_diag_Q48;
   1.131 +
   1.132 +    silk_assert( M <= MAX_MATRIX_SIZE );
   1.133 +
   1.134 +    status = 1;
   1.135 +    diag_min_value = silk_max_32( silk_SMMUL( silk_ADD_SAT32( A[ 0 ], A[ silk_SMULBB( M, M ) - 1 ] ), SILK_FIX_CONST( FIND_LTP_COND_FAC, 31 ) ), 1 << 9 );
   1.136 +    for( loop_count = 0; loop_count < M && status == 1; loop_count++ ) {
   1.137 +        status = 0;
   1.138 +        for( j = 0; j < M; j++ ) {
   1.139 +            ptr1 = matrix_adr( L_Q16, j, 0, M );
   1.140 +            tmp_32 = 0;
   1.141 +            for( i = 0; i < j; i++ ) {
   1.142 +                v_Q0[ i ] = silk_SMULWW(         D_Q0[ i ], ptr1[ i ] ); /* Q0 */
   1.143 +                tmp_32    = silk_SMLAWW( tmp_32, v_Q0[ i ], ptr1[ i ] ); /* Q0 */
   1.144 +            }
   1.145 +            tmp_32 = silk_SUB32( matrix_ptr( A, j, j, M ), tmp_32 );
   1.146 +
   1.147 +            if( tmp_32 < diag_min_value ) {
   1.148 +                tmp_32 = silk_SUB32( silk_SMULBB( loop_count + 1, diag_min_value ), tmp_32 );
   1.149 +                /* Matrix not positive semi-definite, or ill conditioned */
   1.150 +                for( i = 0; i < M; i++ ) {
   1.151 +                    matrix_ptr( A, i, i, M ) = silk_ADD32( matrix_ptr( A, i, i, M ), tmp_32 );
   1.152 +                }
   1.153 +                status = 1;
   1.154 +                break;
   1.155 +            }
   1.156 +            D_Q0[ j ] = tmp_32;                         /* always < max(Correlation) */
   1.157 +
   1.158 +            /* two-step division */
   1.159 +            one_div_diag_Q36 = silk_INVERSE32_varQ( tmp_32, 36 );                    /* Q36 */
   1.160 +            one_div_diag_Q40 = silk_LSHIFT( one_div_diag_Q36, 4 );                   /* Q40 */
   1.161 +            err = silk_SUB32( (opus_int32)1 << 24, silk_SMULWW( tmp_32, one_div_diag_Q40 ) );     /* Q24 */
   1.162 +            one_div_diag_Q48 = silk_SMULWW( err, one_div_diag_Q40 );                 /* Q48 */
   1.163 +
   1.164 +            /* Save 1/Ds */
   1.165 +            inv_D[ j ].Q36_part = one_div_diag_Q36;
   1.166 +            inv_D[ j ].Q48_part = one_div_diag_Q48;
   1.167 +
   1.168 +            matrix_ptr( L_Q16, j, j, M ) = 65536; /* 1.0 in Q16 */
   1.169 +            ptr1 = matrix_adr( A, j, 0, M );
   1.170 +            ptr2 = matrix_adr( L_Q16, j + 1, 0, M );
   1.171 +            for( i = j + 1; i < M; i++ ) {
   1.172 +                tmp_32 = 0;
   1.173 +                for( k = 0; k < j; k++ ) {
   1.174 +                    tmp_32 = silk_SMLAWW( tmp_32, v_Q0[ k ], ptr2[ k ] ); /* Q0 */
   1.175 +                }
   1.176 +                tmp_32 = silk_SUB32( ptr1[ i ], tmp_32 ); /* always < max(Correlation) */
   1.177 +
   1.178 +                /* tmp_32 / D_Q0[j] : Divide to Q16 */
   1.179 +                matrix_ptr( L_Q16, i, j, M ) = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ),
   1.180 +                    silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
   1.181 +
   1.182 +                /* go to next column */
   1.183 +                ptr2 += M;
   1.184 +            }
   1.185 +        }
   1.186 +    }
   1.187 +
   1.188 +    silk_assert( status == 0 );
   1.189 +}
   1.190 +
   1.191 +static OPUS_INLINE void silk_LS_divide_Q16_FIX(
   1.192 +    opus_int32          T[],        /* I/O  Numenator vector                                            */
   1.193 +    inv_D_t             *inv_D,     /* I    1 / D vector                                                */
   1.194 +    opus_int            M           /* I    dimension                                                   */
   1.195 +)
   1.196 +{
   1.197 +    opus_int   i;
   1.198 +    opus_int32 tmp_32;
   1.199 +    opus_int32 one_div_diag_Q36, one_div_diag_Q48;
   1.200 +
   1.201 +    for( i = 0; i < M; i++ ) {
   1.202 +        one_div_diag_Q36 = inv_D[ i ].Q36_part;
   1.203 +        one_div_diag_Q48 = inv_D[ i ].Q48_part;
   1.204 +
   1.205 +        tmp_32 = T[ i ];
   1.206 +        T[ i ] = silk_ADD32( silk_SMMUL( tmp_32, one_div_diag_Q48 ), silk_RSHIFT( silk_SMULWW( tmp_32, one_div_diag_Q36 ), 4 ) );
   1.207 +    }
   1.208 +}
   1.209 +
   1.210 +/* Solve Lx = b, when L is lower triangular and has ones on the diagonal */
   1.211 +static OPUS_INLINE void silk_LS_SolveFirst_FIX(
   1.212 +    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
   1.213 +    opus_int            M,          /* I    Dim of Matrix equation                                      */
   1.214 +    const opus_int32    *b,         /* I    b Vector                                                    */
   1.215 +    opus_int32          *x_Q16      /* O    x Vector                                                    */
   1.216 +)
   1.217 +{
   1.218 +    opus_int i, j;
   1.219 +    const opus_int32 *ptr32;
   1.220 +    opus_int32 tmp_32;
   1.221 +
   1.222 +    for( i = 0; i < M; i++ ) {
   1.223 +        ptr32 = matrix_adr( L_Q16, i, 0, M );
   1.224 +        tmp_32 = 0;
   1.225 +        for( j = 0; j < i; j++ ) {
   1.226 +            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ j ], x_Q16[ j ] );
   1.227 +        }
   1.228 +        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
   1.229 +    }
   1.230 +}
   1.231 +
   1.232 +/* Solve L^t*x = b, where L is lower triangular with ones on the diagonal */
   1.233 +static OPUS_INLINE void silk_LS_SolveLast_FIX(
   1.234 +    const opus_int32    *L_Q16,     /* I    Pointer to Lower Triangular Matrix                          */
   1.235 +    const opus_int      M,          /* I    Dim of Matrix equation                                      */
   1.236 +    const opus_int32    *b,         /* I    b Vector                                                    */
   1.237 +    opus_int32          *x_Q16      /* O    x Vector                                                    */
   1.238 +)
   1.239 +{
   1.240 +    opus_int i, j;
   1.241 +    const opus_int32 *ptr32;
   1.242 +    opus_int32 tmp_32;
   1.243 +
   1.244 +    for( i = M - 1; i >= 0; i-- ) {
   1.245 +        ptr32 = matrix_adr( L_Q16, 0, i, M );
   1.246 +        tmp_32 = 0;
   1.247 +        for( j = M - 1; j > i; j-- ) {
   1.248 +            tmp_32 = silk_SMLAWW( tmp_32, ptr32[ silk_SMULBB( j, M ) ], x_Q16[ j ] );
   1.249 +        }
   1.250 +        x_Q16[ i ] = silk_SUB32( b[ i ], tmp_32 );
   1.251 +    }
   1.252 +}

mercurial