media/libvpx/vp8/common/arm/neon/dequant_idct_neon.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12     EXPORT  |vp8_dequant_idct_add_neon|
    13     ARM
    14     REQUIRE8
    15     PRESERVE8
    17     AREA ||.text||, CODE, READONLY, ALIGN=2
    18 ;void vp8_dequant_idct_add_neon(short *input, short *dq,
    19 ;                           unsigned char *dest, int stride)
    20 ; r0    short *input,
    21 ; r1    short *dq,
    22 ; r2    unsigned char *dest
    23 ; r3    int stride
    25 |vp8_dequant_idct_add_neon| PROC
    26     vld1.16         {q3, q4}, [r0]
    27     vld1.16         {q5, q6}, [r1]
    29     add             r1, r2, r3              ; r1 = dest + stride
    30     lsl             r3, #1                  ; 2x stride
    32     vld1.32         {d14[0]}, [r2], r3
    33     vld1.32         {d14[1]}, [r1], r3
    34     vld1.32         {d15[0]}, [r2]
    35     vld1.32         {d15[1]}, [r1]
    37     adr             r12, cospi8sqrt2minus1  ; pointer to the first constant
    39     vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
    40     vmul.i16        q2, q4, q6
    42 ;|short_idct4x4llm_neon| PROC
    43     vld1.16         {d0}, [r12]
    44     vswp            d3, d4                  ;q2(vp[4] vp[12])
    46     vqdmulh.s16     q3, q2, d0[2]
    47     vqdmulh.s16     q4, q2, d0[0]
    49     vqadd.s16       d12, d2, d3             ;a1
    50     vqsub.s16       d13, d2, d3             ;b1
    52     vshr.s16        q3, q3, #1
    53     vshr.s16        q4, q4, #1
    55     vqadd.s16       q3, q3, q2
    56     vqadd.s16       q4, q4, q2
    58     vqsub.s16       d10, d6, d9             ;c1
    59     vqadd.s16       d11, d7, d8             ;d1
    61     vqadd.s16       d2, d12, d11
    62     vqadd.s16       d3, d13, d10
    63     vqsub.s16       d4, d13, d10
    64     vqsub.s16       d5, d12, d11
    66     vtrn.32         d2, d4
    67     vtrn.32         d3, d5
    68     vtrn.16         d2, d3
    69     vtrn.16         d4, d5
    71 ; memset(input, 0, 32) -- 32bytes
    72     vmov.i16        q14, #0
    74     vswp            d3, d4
    75     vqdmulh.s16     q3, q2, d0[2]
    76     vqdmulh.s16     q4, q2, d0[0]
    78     vqadd.s16       d12, d2, d3             ;a1
    79     vqsub.s16       d13, d2, d3             ;b1
    81     vmov            q15, q14
    83     vshr.s16        q3, q3, #1
    84     vshr.s16        q4, q4, #1
    86     vqadd.s16       q3, q3, q2
    87     vqadd.s16       q4, q4, q2
    89     vqsub.s16       d10, d6, d9             ;c1
    90     vqadd.s16       d11, d7, d8             ;d1
    92     vqadd.s16       d2, d12, d11
    93     vqadd.s16       d3, d13, d10
    94     vqsub.s16       d4, d13, d10
    95     vqsub.s16       d5, d12, d11
    97     vst1.16         {q14, q15}, [r0]
    99     vrshr.s16       d2, d2, #3
   100     vrshr.s16       d3, d3, #3
   101     vrshr.s16       d4, d4, #3
   102     vrshr.s16       d5, d5, #3
   104     vtrn.32         d2, d4
   105     vtrn.32         d3, d5
   106     vtrn.16         d2, d3
   107     vtrn.16         d4, d5
   109     vaddw.u8        q1, q1, d14
   110     vaddw.u8        q2, q2, d15
   112     sub             r2, r2, r3
   113     sub             r1, r1, r3
   115     vqmovun.s16     d0, q1
   116     vqmovun.s16     d1, q2
   118     vst1.32         {d0[0]}, [r2], r3
   119     vst1.32         {d0[1]}, [r1], r3
   120     vst1.32         {d1[0]}, [r2]
   121     vst1.32         {d1[1]}, [r1]
   123     bx             lr
   125     ENDP           ; |vp8_dequant_idct_add_neon|
   127 ; Constant Pool
   128 cospi8sqrt2minus1 DCD 0x4e7b4e7b
   129 sinpi8sqrt2       DCD 0x8a8c8a8c
   131     END

mercurial