media/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12     EXPORT  |vp8_mse16x16_armv6|
    14     ARM
    16     AREA ||.text||, CODE, READONLY, ALIGN=2
    18 ; r0    unsigned char *src_ptr
    19 ; r1    int source_stride
    20 ; r2    unsigned char *ref_ptr
    21 ; r3    int  recon_stride
    22 ; stack unsigned int *sse
    23 ;
    24 ;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
    25 ;      So, we can remove this part of calculation.
    27 |vp8_mse16x16_armv6| PROC
    29     push    {r4-r9, lr}
    31     pld     [r0, r1, lsl #0]
    32     pld     [r2, r3, lsl #0]
    34     mov     r12, #16            ; set loop counter to 16 (=block height)
    35     mov     r4, #0              ; initialize sse = 0
    37 loop
    38     ; 1st 4 pixels
    39     ldr     r5, [r0, #0x0]      ; load 4 src pixels
    40     ldr     r6, [r2, #0x0]      ; load 4 ref pixels
    42     mov     lr, #0              ; constant zero
    44     usub8   r8, r5, r6          ; calculate difference
    45     pld     [r0, r1, lsl #1]
    46     sel     r7, r8, lr          ; select bytes with positive difference
    47     usub8   r9, r6, r5          ; calculate difference with reversed operands
    48     pld     [r2, r3, lsl #1]
    49     sel     r8, r9, lr          ; select bytes with negative difference
    51     ; calculate partial sums
    52     usad8   r5, r7, lr          ; calculate sum of positive differences
    53     usad8   r6, r8, lr          ; calculate sum of negative differences
    54     orr     r8, r8, r7          ; differences of all 4 pixels
    56     ldr     r5, [r0, #0x4]      ; load 4 src pixels
    58     ; calculate sse
    59     uxtb16  r6, r8              ; byte (two pixels) to halfwords
    60     uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
    61     smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
    63     ; 2nd 4 pixels
    64     ldr     r6, [r2, #0x4]      ; load 4 ref pixels
    65     smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
    67     usub8   r8, r5, r6          ; calculate difference
    68     sel     r7, r8, lr          ; select bytes with positive difference
    69     usub8   r9, r6, r5          ; calculate difference with reversed operands
    70     sel     r8, r9, lr          ; select bytes with negative difference
    72     ; calculate partial sums
    73     usad8   r5, r7, lr          ; calculate sum of positive differences
    74     usad8   r6, r8, lr          ; calculate sum of negative differences
    75     orr     r8, r8, r7          ; differences of all 4 pixels
    76     ldr     r5, [r0, #0x8]      ; load 4 src pixels
    77     ; calculate sse
    78     uxtb16  r6, r8              ; byte (two pixels) to halfwords
    79     uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
    80     smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
    82     ; 3rd 4 pixels
    83     ldr     r6, [r2, #0x8]      ; load 4 ref pixels
    84     smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
    86     usub8   r8, r5, r6          ; calculate difference
    87     sel     r7, r8, lr          ; select bytes with positive difference
    88     usub8   r9, r6, r5          ; calculate difference with reversed operands
    89     sel     r8, r9, lr          ; select bytes with negative difference
    91     ; calculate partial sums
    92     usad8   r5, r7, lr          ; calculate sum of positive differences
    93     usad8   r6, r8, lr          ; calculate sum of negative differences
    94     orr     r8, r8, r7          ; differences of all 4 pixels
    96     ldr     r5, [r0, #0xc]      ; load 4 src pixels
    98     ; calculate sse
    99     uxtb16  r6, r8              ; byte (two pixels) to halfwords
   100     uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
   101     smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
   103     ; 4th 4 pixels
   104     ldr     r6, [r2, #0xc]      ; load 4 ref pixels
   105     smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
   107     usub8   r8, r5, r6          ; calculate difference
   108     add     r0, r0, r1          ; set src_ptr to next row
   109     sel     r7, r8, lr          ; select bytes with positive difference
   110     usub8   r9, r6, r5          ; calculate difference with reversed operands
   111     add     r2, r2, r3          ; set dst_ptr to next row
   112     sel     r8, r9, lr          ; select bytes with negative difference
   114     ; calculate partial sums
   115     usad8   r5, r7, lr          ; calculate sum of positive differences
   116     usad8   r6, r8, lr          ; calculate sum of negative differences
   117     orr     r8, r8, r7          ; differences of all 4 pixels
   119     subs    r12, r12, #1        ; next row
   121     ; calculate sse
   122     uxtb16  r6, r8              ; byte (two pixels) to halfwords
   123     uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
   124     smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
   125     smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
   127     bne     loop
   129     ; return stuff
   130     ldr     r1, [sp, #28]       ; get address of sse
   131     mov     r0, r4              ; return sse
   132     str     r4, [r1]            ; store sse
   134     pop     {r4-r9, pc}
   136     ENDP
   138     END

mercurial