media/libtremor/lib/asm_arm.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /********************************************************************
     2  *                                                                  *
     3  * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
     4  *                                                                  *
     5  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
     6  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
     7  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
     8  *                                                                  *
     9  * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
    10  * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
    11  *                                                                  *
    12  ********************************************************************
    14  function: arm7 and later wide math functions
    16  ********************************************************************/
    18 #ifdef _ARM_ASSEM_
    20 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
    21 #define _V_WIDE_MATH
    23 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
    24   int lo,hi;
    25   asm volatile("smull\t%0, %1, %2, %3"
    26                : "=&r"(lo),"=&r"(hi)
    27                : "%r"(x),"r"(y)
    28 	       : "cc");
    29   return(hi);
    30 }
    32 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
    33   return MULT32(x,y)<<1;
    34 }
    36 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
    37   int lo,hi;
    38   asm volatile("smull	%0, %1, %2, %3\n\t"
    39 	       "movs	%0, %0, lsr #15\n\t"
    40 	       "adc	%1, %0, %1, lsl #17\n\t"
    41                : "=&r"(lo),"=&r"(hi)
    42                : "%r"(x),"r"(y)
    43 	       : "cc");
    44   return(hi);
    45 }
    47 #define MB() asm volatile ("" : : : "memory")
    49 static inline void XPROD32(ogg_int32_t  a, ogg_int32_t  b,
    50 			   ogg_int32_t  t, ogg_int32_t  v,
    51 			   ogg_int32_t *x, ogg_int32_t *y)
    52 {
    53   int x1, y1, l;
    54   asm(	"smull	%0, %1, %4, %6\n\t"
    55 	"smlal	%0, %1, %5, %7\n\t"
    56 	"rsb	%3, %4, #0\n\t"
    57 	"smull	%0, %2, %5, %6\n\t"
    58 	"smlal	%0, %2, %3, %7"
    59 	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
    60 	: "3" (a), "r" (b), "r" (t), "r" (v)
    61 	: "cc" );
    62   *x = x1;
    63   MB();
    64   *y = y1;
    65 }
    67 static inline void XPROD31(ogg_int32_t  a, ogg_int32_t  b,
    68 			   ogg_int32_t  t, ogg_int32_t  v,
    69 			   ogg_int32_t *x, ogg_int32_t *y)
    70 {
    71   int x1, y1, l;
    72   asm(	"smull	%0, %1, %4, %6\n\t"
    73 	"smlal	%0, %1, %5, %7\n\t"
    74 	"rsb	%3, %4, #0\n\t"
    75 	"smull	%0, %2, %5, %6\n\t"
    76 	"smlal	%0, %2, %3, %7"
    77 	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
    78 	: "3" (a), "r" (b), "r" (t), "r" (v)
    79 	: "cc" );
    80   *x = x1 << 1;
    81   MB();
    82   *y = y1 << 1;
    83 }
    85 static inline void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,
    86 			    ogg_int32_t  t, ogg_int32_t  v,
    87 			    ogg_int32_t *x, ogg_int32_t *y)
    88 {
    89   int x1, y1, l;
    90   asm(	"rsb	%2, %4, #0\n\t"
    91 	"smull	%0, %1, %3, %5\n\t"
    92 	"smlal	%0, %1, %2, %6\n\t"
    93 	"smull	%0, %2, %4, %5\n\t"
    94 	"smlal	%0, %2, %3, %6"
    95 	: "=&r" (l), "=&r" (x1), "=&r" (y1)
    96 	: "r" (a), "r" (b), "r" (t), "r" (v)
    97 	: "cc" );
    98   *x = x1 << 1;
    99   MB();
   100   *y = y1 << 1;
   101 }
   103 #endif
   105 #ifndef _V_CLIP_MATH
   106 #define _V_CLIP_MATH
   108 static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
   109   int tmp;
   110   asm volatile("subs	%1, %0, #32768\n\t"
   111 	       "movpl	%0, #0x7f00\n\t"
   112 	       "orrpl	%0, %0, #0xff\n"
   113 	       "adds	%1, %0, #32768\n\t"
   114 	       "movmi	%0, #0x8000"
   115 	       : "+r"(x),"=r"(tmp)
   116 	       :
   117 	       : "cc");
   118   return(x);
   119 }
   121 #endif
   123 #ifndef _V_LSP_MATH_ASM
   124 #define _V_LSP_MATH_ASM
   126 static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip,
   127 				ogg_int32_t *qexpp,
   128 				ogg_int32_t *ilsp,ogg_int32_t wi,
   129 				ogg_int32_t m){
   131   ogg_uint32_t qi=*qip,pi=*pip;
   132   ogg_int32_t qexp=*qexpp;
   134   asm("mov     r0,%3;"
   135       "movs    r1,%5,asr#1;"
   136       "add     r0,r0,r1,lsl#3;"
   137       "beq 2f;\n"
   138       "1:"
   140       "ldmdb   r0!,{r1,r3};"
   141       "subs    r1,r1,%4;"          //ilsp[j]-wi
   142       "rsbmi   r1,r1,#0;"          //labs(ilsp[j]-wi)
   143       "umull   %0,r2,r1,%0;"       //qi*=labs(ilsp[j]-wi)
   145       "subs    r1,r3,%4;"          //ilsp[j+1]-wi
   146       "rsbmi   r1,r1,#0;"          //labs(ilsp[j+1]-wi)
   147       "umull   %1,r3,r1,%1;"       //pi*=labs(ilsp[j+1]-wi)
   149       "cmn     r2,r3;"             // shift down 16?
   150       "beq     0f;"
   151       "add     %2,%2,#16;"
   152       "mov     %0,%0,lsr #16;"
   153       "orr     %0,%0,r2,lsl #16;"
   154       "mov     %1,%1,lsr #16;"
   155       "orr     %1,%1,r3,lsl #16;"
   156       "0:"
   157       "cmp     r0,%3;\n"
   158       "bhi     1b;\n"
   160       "2:"
   161       // odd filter assymetry
   162       "ands    r0,%5,#1;\n"
   163       "beq     3f;\n"
   164       "add     r0,%3,%5,lsl#2;\n"
   166       "ldr     r1,[r0,#-4];\n"
   167       "mov     r0,#0x4000;\n"
   169       "subs    r1,r1,%4;\n"          //ilsp[j]-wi
   170       "rsbmi   r1,r1,#0;\n"          //labs(ilsp[j]-wi)
   171       "umull   %0,r2,r1,%0;\n"       //qi*=labs(ilsp[j]-wi)
   172       "umull   %1,r3,r0,%1;\n"       //pi*=labs(ilsp[j+1]-wi)
   174       "cmn     r2,r3;\n"             // shift down 16?
   175       "beq     3f;\n"
   176       "add     %2,%2,#16;\n"
   177       "mov     %0,%0,lsr #16;\n"
   178       "orr     %0,%0,r2,lsl #16;\n"
   179       "mov     %1,%1,lsr #16;\n"
   180       "orr     %1,%1,r3,lsl #16;\n"
   182       //qi=(pi>>shift)*labs(ilsp[j]-wi);
   183       //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
   184       //qexp+=shift;
   186       //}
   188       /* normalize to max 16 sig figs */
   189       "3:"
   190       "mov     r2,#0;"
   191       "orr     r1,%0,%1;"
   192       "tst     r1,#0xff000000;"
   193       "addne   r2,r2,#8;"
   194       "movne   r1,r1,lsr #8;"
   195       "tst     r1,#0x00f00000;"
   196       "addne   r2,r2,#4;"
   197       "movne   r1,r1,lsr #4;"
   198       "tst     r1,#0x000c0000;"
   199       "addne   r2,r2,#2;"
   200       "movne   r1,r1,lsr #2;"
   201       "tst     r1,#0x00020000;"
   202       "addne   r2,r2,#1;"
   203       "movne   r1,r1,lsr #1;"
   204       "tst     r1,#0x00010000;"
   205       "addne   r2,r2,#1;"
   206       "mov     %0,%0,lsr r2;"
   207       "mov     %1,%1,lsr r2;"
   208       "add     %2,%2,r2;"
   210       : "+r"(qi),"+r"(pi),"+r"(qexp)
   211       : "r"(ilsp),"r"(wi),"r"(m)
   212       : "r0","r1","r2","r3","cc");
   214   *qip=qi;
   215   *pip=pi;
   216   *qexpp=qexp;
   217 }
   219 static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){
   221   ogg_uint32_t qi=*qip;
   222   ogg_int32_t qexp=*qexpp;
   224   asm("tst     %0,#0x0000ff00;"
   225       "moveq   %0,%0,lsl #8;"
   226       "subeq   %1,%1,#8;"
   227       "tst     %0,#0x0000f000;"
   228       "moveq   %0,%0,lsl #4;"
   229       "subeq   %1,%1,#4;"
   230       "tst     %0,#0x0000c000;"
   231       "moveq   %0,%0,lsl #2;"
   232       "subeq   %1,%1,#2;"
   233       "tst     %0,#0x00008000;"
   234       "moveq   %0,%0,lsl #1;"
   235       "subeq   %1,%1,#1;"
   236       : "+r"(qi),"+r"(qexp)
   237       :
   238       : "cc");
   239   *qip=qi;
   240   *qexpp=qexp;
   241 }
   243 #endif
   244 #endif

mercurial