media/libvpx/vp8/encoder/arm/neon/subtract_neon.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    11     EXPORT |vp8_subtract_b_neon|
    12     EXPORT |vp8_subtract_mby_neon|
    13     EXPORT |vp8_subtract_mbuv_neon|
    15     INCLUDE vp8_asm_enc_offsets.asm
    17     ARM
    18     REQUIRE8
    19     PRESERVE8
    21     AREA ||.text||, CODE, READONLY, ALIGN=2
    23 ;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch)
    24 |vp8_subtract_b_neon| PROC
    26     stmfd   sp!, {r4-r7}
    28     ldr     r3, [r0, #vp8_block_base_src]
    29     ldr     r4, [r0, #vp8_block_src]
    30     ldr     r5, [r0, #vp8_block_src_diff]
    31     ldr     r3, [r3]
    32     ldr     r6, [r0, #vp8_block_src_stride]
    33     add     r3, r3, r4                      ; src = *base_src + src
    34     ldr     r7, [r1, #vp8_blockd_predictor]
    36     vld1.8          {d0}, [r3], r6          ;load src
    37     vld1.8          {d1}, [r7], r2          ;load pred
    38     vld1.8          {d2}, [r3], r6
    39     vld1.8          {d3}, [r7], r2
    40     vld1.8          {d4}, [r3], r6
    41     vld1.8          {d5}, [r7], r2
    42     vld1.8          {d6}, [r3], r6
    43     vld1.8          {d7}, [r7], r2
    45     vsubl.u8        q10, d0, d1
    46     vsubl.u8        q11, d2, d3
    47     vsubl.u8        q12, d4, d5
    48     vsubl.u8        q13, d6, d7
    50     mov             r2, r2, lsl #1
    52     vst1.16         {d20}, [r5], r2         ;store diff
    53     vst1.16         {d22}, [r5], r2
    54     vst1.16         {d24}, [r5], r2
    55     vst1.16         {d26}, [r5], r2
    57     ldmfd   sp!, {r4-r7}
    58     bx              lr
    60     ENDP
    63 ;==========================================
    64 ;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride
    65 ;                           unsigned char *pred, int pred_stride)
    66 |vp8_subtract_mby_neon| PROC
    67     push            {r4-r7}
    68     mov             r12, #4
    69     ldr             r4, [sp, #16]           ; pred_stride
    70     mov             r6, #32                 ; "diff" stride x2
    71     add             r5, r0, #16             ; second diff pointer
    73 subtract_mby_loop
    74     vld1.8          {q0}, [r1], r2          ;load src
    75     vld1.8          {q1}, [r3], r4          ;load pred
    76     vld1.8          {q2}, [r1], r2
    77     vld1.8          {q3}, [r3], r4
    78     vld1.8          {q4}, [r1], r2
    79     vld1.8          {q5}, [r3], r4
    80     vld1.8          {q6}, [r1], r2
    81     vld1.8          {q7}, [r3], r4
    83     vsubl.u8        q8, d0, d2
    84     vsubl.u8        q9, d1, d3
    85     vsubl.u8        q10, d4, d6
    86     vsubl.u8        q11, d5, d7
    87     vsubl.u8        q12, d8, d10
    88     vsubl.u8        q13, d9, d11
    89     vsubl.u8        q14, d12, d14
    90     vsubl.u8        q15, d13, d15
    92     vst1.16         {q8}, [r0], r6          ;store diff
    93     vst1.16         {q9}, [r5], r6
    94     vst1.16         {q10}, [r0], r6
    95     vst1.16         {q11}, [r5], r6
    96     vst1.16         {q12}, [r0], r6
    97     vst1.16         {q13}, [r5], r6
    98     vst1.16         {q14}, [r0], r6
    99     vst1.16         {q15}, [r5], r6
   101     subs            r12, r12, #1
   102     bne             subtract_mby_loop
   104     pop             {r4-r7}
   105     bx              lr
   106     ENDP
   108 ;=================================
   109 ;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
   110 ;                         int src_stride, unsigned char *upred,
   111 ;                         unsigned char *vpred, int pred_stride)
   113 |vp8_subtract_mbuv_neon| PROC
   114     push            {r4-r7}
   115     ldr             r4, [sp, #16]       ; upred
   116     ldr             r5, [sp, #20]       ; vpred
   117     ldr             r6, [sp, #24]       ; pred_stride
   118     add             r0, r0, #512        ; short *udiff = diff + 256;
   119     mov             r12, #32            ; "diff" stride x2
   120     add             r7, r0, #16         ; second diff pointer
   122 ;u
   123     vld1.8          {d0}, [r1], r3      ;load usrc
   124     vld1.8          {d1}, [r4], r6      ;load upred
   125     vld1.8          {d2}, [r1], r3
   126     vld1.8          {d3}, [r4], r6
   127     vld1.8          {d4}, [r1], r3
   128     vld1.8          {d5}, [r4], r6
   129     vld1.8          {d6}, [r1], r3
   130     vld1.8          {d7}, [r4], r6
   131     vld1.8          {d8}, [r1], r3
   132     vld1.8          {d9}, [r4], r6
   133     vld1.8          {d10}, [r1], r3
   134     vld1.8          {d11}, [r4], r6
   135     vld1.8          {d12}, [r1], r3
   136     vld1.8          {d13}, [r4], r6
   137     vld1.8          {d14}, [r1], r3
   138     vld1.8          {d15}, [r4], r6
   140     vsubl.u8        q8, d0, d1
   141     vsubl.u8        q9, d2, d3
   142     vsubl.u8        q10, d4, d5
   143     vsubl.u8        q11, d6, d7
   144     vsubl.u8        q12, d8, d9
   145     vsubl.u8        q13, d10, d11
   146     vsubl.u8        q14, d12, d13
   147     vsubl.u8        q15, d14, d15
   149     vst1.16         {q8}, [r0], r12     ;store diff
   150     vst1.16         {q9}, [r7], r12
   151     vst1.16         {q10}, [r0], r12
   152     vst1.16         {q11}, [r7], r12
   153     vst1.16         {q12}, [r0], r12
   154     vst1.16         {q13}, [r7], r12
   155     vst1.16         {q14}, [r0], r12
   156     vst1.16         {q15}, [r7], r12
   158 ;v
   159     vld1.8          {d0}, [r2], r3      ;load vsrc
   160     vld1.8          {d1}, [r5], r6      ;load vpred
   161     vld1.8          {d2}, [r2], r3
   162     vld1.8          {d3}, [r5], r6
   163     vld1.8          {d4}, [r2], r3
   164     vld1.8          {d5}, [r5], r6
   165     vld1.8          {d6}, [r2], r3
   166     vld1.8          {d7}, [r5], r6
   167     vld1.8          {d8}, [r2], r3
   168     vld1.8          {d9}, [r5], r6
   169     vld1.8          {d10}, [r2], r3
   170     vld1.8          {d11}, [r5], r6
   171     vld1.8          {d12}, [r2], r3
   172     vld1.8          {d13}, [r5], r6
   173     vld1.8          {d14}, [r2], r3
   174     vld1.8          {d15}, [r5], r6
   176     vsubl.u8        q8, d0, d1
   177     vsubl.u8        q9, d2, d3
   178     vsubl.u8        q10, d4, d5
   179     vsubl.u8        q11, d6, d7
   180     vsubl.u8        q12, d8, d9
   181     vsubl.u8        q13, d10, d11
   182     vsubl.u8        q14, d12, d13
   183     vsubl.u8        q15, d14, d15
   185     vst1.16         {q8}, [r0], r12     ;store diff
   186     vst1.16         {q9}, [r7], r12
   187     vst1.16         {q10}, [r0], r12
   188     vst1.16         {q11}, [r7], r12
   189     vst1.16         {q12}, [r0], r12
   190     vst1.16         {q13}, [r7], r12
   191     vst1.16         {q14}, [r0], r12
   192     vst1.16         {q15}, [r7], r12
   194     pop             {r4-r7}
   195     bx              lr
   197     ENDP
   199     END

mercurial