1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libtremor/lib/asm_arm.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,245 @@ 1.4 +/******************************************************************** 1.5 + * * 1.6 + * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE. * 1.7 + * * 1.8 + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 1.9 + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 1.10 + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 1.11 + * * 1.12 + * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * 1.13 + * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * 1.14 + * * 1.15 + ******************************************************************** 1.16 + 1.17 + function: arm7 and later wide math functions 1.18 + 1.19 + ********************************************************************/ 1.20 + 1.21 +#ifdef _ARM_ASSEM_ 1.22 + 1.23 +#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) 1.24 +#define _V_WIDE_MATH 1.25 + 1.26 +static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { 1.27 + int lo,hi; 1.28 + asm volatile("smull\t%0, %1, %2, %3" 1.29 + : "=&r"(lo),"=&r"(hi) 1.30 + : "%r"(x),"r"(y) 1.31 + : "cc"); 1.32 + return(hi); 1.33 +} 1.34 + 1.35 +static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { 1.36 + return MULT32(x,y)<<1; 1.37 +} 1.38 + 1.39 +static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { 1.40 + int lo,hi; 1.41 + asm volatile("smull %0, %1, %2, %3\n\t" 1.42 + "movs %0, %0, lsr #15\n\t" 1.43 + "adc %1, %0, %1, lsl #17\n\t" 1.44 + : "=&r"(lo),"=&r"(hi) 1.45 + : "%r"(x),"r"(y) 1.46 + : "cc"); 1.47 + return(hi); 1.48 +} 1.49 + 1.50 +#define MB() asm volatile ("" : : : "memory") 1.51 + 1.52 +static inline void XPROD32(ogg_int32_t a, ogg_int32_t b, 1.53 + ogg_int32_t t, ogg_int32_t v, 1.54 + ogg_int32_t *x, ogg_int32_t *y) 1.55 +{ 1.56 + int x1, y1, l; 1.57 + asm( "smull %0, %1, %4, %6\n\t" 1.58 + "smlal %0, %1, %5, %7\n\t" 1.59 + "rsb %3, %4, #0\n\t" 1.60 + "smull %0, %2, %5, %6\n\t" 1.61 + "smlal %0, %2, %3, %7" 1.62 + : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) 1.63 + : "3" (a), "r" (b), "r" (t), "r" (v) 1.64 + : "cc" ); 1.65 + *x = x1; 1.66 + MB(); 1.67 + *y = y1; 1.68 +} 1.69 + 1.70 +static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, 1.71 + ogg_int32_t t, ogg_int32_t v, 1.72 + ogg_int32_t *x, ogg_int32_t *y) 1.73 +{ 1.74 + int x1, y1, l; 1.75 + asm( "smull %0, %1, %4, %6\n\t" 1.76 + "smlal %0, %1, %5, %7\n\t" 1.77 + "rsb %3, %4, #0\n\t" 1.78 + "smull %0, %2, %5, %6\n\t" 1.79 + "smlal %0, %2, %3, %7" 1.80 + : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) 1.81 + : "3" (a), "r" (b), "r" (t), "r" (v) 1.82 + : "cc" ); 1.83 + *x = x1 << 1; 1.84 + MB(); 1.85 + *y = y1 << 1; 1.86 +} 1.87 + 1.88 +static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, 1.89 + ogg_int32_t t, ogg_int32_t v, 1.90 + ogg_int32_t *x, ogg_int32_t *y) 1.91 +{ 1.92 + int x1, y1, l; 1.93 + asm( "rsb %2, %4, #0\n\t" 1.94 + "smull %0, %1, %3, %5\n\t" 1.95 + "smlal %0, %1, %2, %6\n\t" 1.96 + "smull %0, %2, %4, %5\n\t" 1.97 + "smlal %0, %2, %3, %6" 1.98 + : "=&r" (l), "=&r" (x1), "=&r" (y1) 1.99 + : "r" (a), "r" (b), "r" (t), "r" (v) 1.100 + : "cc" ); 1.101 + *x = x1 << 1; 1.102 + MB(); 1.103 + *y = y1 << 1; 1.104 +} 1.105 + 1.106 +#endif 1.107 + 1.108 +#ifndef _V_CLIP_MATH 1.109 +#define _V_CLIP_MATH 1.110 + 1.111 +static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) { 1.112 + int tmp; 1.113 + asm volatile("subs %1, %0, #32768\n\t" 1.114 + "movpl %0, #0x7f00\n\t" 1.115 + "orrpl %0, %0, #0xff\n" 1.116 + "adds %1, %0, #32768\n\t" 1.117 + "movmi %0, #0x8000" 1.118 + : "+r"(x),"=r"(tmp) 1.119 + : 1.120 + : "cc"); 1.121 + return(x); 1.122 +} 1.123 + 1.124 +#endif 1.125 + 1.126 +#ifndef _V_LSP_MATH_ASM 1.127 +#define _V_LSP_MATH_ASM 1.128 + 1.129 +static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, 1.130 + ogg_int32_t *qexpp, 1.131 + ogg_int32_t *ilsp,ogg_int32_t wi, 1.132 + ogg_int32_t m){ 1.133 + 1.134 + ogg_uint32_t qi=*qip,pi=*pip; 1.135 + ogg_int32_t qexp=*qexpp; 1.136 + 1.137 + asm("mov r0,%3;" 1.138 + "movs r1,%5,asr#1;" 1.139 + "add r0,r0,r1,lsl#3;" 1.140 + "beq 2f;\n" 1.141 + "1:" 1.142 + 1.143 + "ldmdb r0!,{r1,r3};" 1.144 + "subs r1,r1,%4;" //ilsp[j]-wi 1.145 + "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi) 1.146 + "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi) 1.147 + 1.148 + "subs r1,r3,%4;" //ilsp[j+1]-wi 1.149 + "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi) 1.150 + "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi) 1.151 + 1.152 + "cmn r2,r3;" // shift down 16? 1.153 + "beq 0f;" 1.154 + "add %2,%2,#16;" 1.155 + "mov %0,%0,lsr #16;" 1.156 + "orr %0,%0,r2,lsl #16;" 1.157 + "mov %1,%1,lsr #16;" 1.158 + "orr %1,%1,r3,lsl #16;" 1.159 + "0:" 1.160 + "cmp r0,%3;\n" 1.161 + "bhi 1b;\n" 1.162 + 1.163 + "2:" 1.164 + // odd filter assymetry 1.165 + "ands r0,%5,#1;\n" 1.166 + "beq 3f;\n" 1.167 + "add r0,%3,%5,lsl#2;\n" 1.168 + 1.169 + "ldr r1,[r0,#-4];\n" 1.170 + "mov r0,#0x4000;\n" 1.171 + 1.172 + "subs r1,r1,%4;\n" //ilsp[j]-wi 1.173 + "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi) 1.174 + "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi) 1.175 + "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi) 1.176 + 1.177 + "cmn r2,r3;\n" // shift down 16? 1.178 + "beq 3f;\n" 1.179 + "add %2,%2,#16;\n" 1.180 + "mov %0,%0,lsr #16;\n" 1.181 + "orr %0,%0,r2,lsl #16;\n" 1.182 + "mov %1,%1,lsr #16;\n" 1.183 + "orr %1,%1,r3,lsl #16;\n" 1.184 + 1.185 + //qi=(pi>>shift)*labs(ilsp[j]-wi); 1.186 + //pi=(qi>>shift)*labs(ilsp[j+1]-wi); 1.187 + //qexp+=shift; 1.188 + 1.189 + //} 1.190 + 1.191 + /* normalize to max 16 sig figs */ 1.192 + "3:" 1.193 + "mov r2,#0;" 1.194 + "orr r1,%0,%1;" 1.195 + "tst r1,#0xff000000;" 1.196 + "addne r2,r2,#8;" 1.197 + "movne r1,r1,lsr #8;" 1.198 + "tst r1,#0x00f00000;" 1.199 + "addne r2,r2,#4;" 1.200 + "movne r1,r1,lsr #4;" 1.201 + "tst r1,#0x000c0000;" 1.202 + "addne r2,r2,#2;" 1.203 + "movne r1,r1,lsr #2;" 1.204 + "tst r1,#0x00020000;" 1.205 + "addne r2,r2,#1;" 1.206 + "movne r1,r1,lsr #1;" 1.207 + "tst r1,#0x00010000;" 1.208 + "addne r2,r2,#1;" 1.209 + "mov %0,%0,lsr r2;" 1.210 + "mov %1,%1,lsr r2;" 1.211 + "add %2,%2,r2;" 1.212 + 1.213 + : "+r"(qi),"+r"(pi),"+r"(qexp) 1.214 + : "r"(ilsp),"r"(wi),"r"(m) 1.215 + : "r0","r1","r2","r3","cc"); 1.216 + 1.217 + *qip=qi; 1.218 + *pip=pi; 1.219 + *qexpp=qexp; 1.220 +} 1.221 + 1.222 +static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){ 1.223 + 1.224 + ogg_uint32_t qi=*qip; 1.225 + ogg_int32_t qexp=*qexpp; 1.226 + 1.227 + asm("tst %0,#0x0000ff00;" 1.228 + "moveq %0,%0,lsl #8;" 1.229 + "subeq %1,%1,#8;" 1.230 + "tst %0,#0x0000f000;" 1.231 + "moveq %0,%0,lsl #4;" 1.232 + "subeq %1,%1,#4;" 1.233 + "tst %0,#0x0000c000;" 1.234 + "moveq %0,%0,lsl #2;" 1.235 + "subeq %1,%1,#2;" 1.236 + "tst %0,#0x00008000;" 1.237 + "moveq %0,%0,lsl #1;" 1.238 + "subeq %1,%1,#1;" 1.239 + : "+r"(qi),"+r"(qexp) 1.240 + : 1.241 + : "cc"); 1.242 + *qip=qi; 1.243 + *qexpp=qexp; 1.244 +} 1.245 + 1.246 +#endif 1.247 +#endif 1.248 +