media/libvpx/vp8/common/x86/iwalsh_sse2.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
    15 global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE
    16 sym(vp8_short_inv_walsh4x4_sse2):
    17     push        rbp
    18     mov         rbp, rsp
    19     SHADOW_ARGS_TO_STACK 2
    20     ; end prolog
    22     mov         rcx, arg(0)
    23     mov         rdx, arg(1)
    24     mov         rax, 30003h
    26     movdqa      xmm0, [rcx + 0]     ;ip[4] ip[0]
    27     movdqa      xmm1, [rcx + 16]    ;ip[12] ip[8]
    30     pshufd      xmm2, xmm1, 4eh     ;ip[8] ip[12]
    31     movdqa      xmm3, xmm0          ;ip[4] ip[0]
    33     paddw       xmm0, xmm2          ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
    34     psubw       xmm3, xmm2          ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
    36     movdqa      xmm4, xmm0
    37     punpcklqdq  xmm0, xmm3          ;d1 a1
    38     punpckhqdq  xmm4, xmm3          ;c1 b1
    40     movdqa      xmm1, xmm4          ;c1 b1
    41     paddw       xmm4, xmm0          ;dl+cl a1+b1 aka op[4] op[0]
    42     psubw       xmm0, xmm1          ;d1-c1 a1-b1 aka op[12] op[8]
    44     ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    45     ; 13 12 11 10 03 02 01 00
    46     ;
    47     ; 33 32 31 30 23 22 21 20
    48     ;
    49     movdqa      xmm3, xmm4          ; 13 12 11 10 03 02 01 00
    50     punpcklwd   xmm4, xmm0          ; 23 03 22 02 21 01 20 00
    51     punpckhwd   xmm3, xmm0          ; 33 13 32 12 31 11 30 10
    52     movdqa      xmm1, xmm4          ; 23 03 22 02 21 01 20 00
    53     punpcklwd   xmm4, xmm3          ; 31 21 11 01 30 20 10 00
    54     punpckhwd   xmm1, xmm3          ; 33 23 13 03 32 22 12 02
    55     ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    56     movd        xmm0, eax
    57     pshufd      xmm2, xmm1, 4eh     ;ip[8] ip[12]
    58     movdqa      xmm3, xmm4          ;ip[4] ip[0]
    60     pshufd      xmm0, xmm0, 0       ;03 03 03 03 03 03 03 03
    62     paddw       xmm4, xmm2          ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
    63     psubw       xmm3, xmm2          ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
    65     movdqa      xmm5, xmm4
    66     punpcklqdq  xmm4, xmm3          ;d1 a1
    67     punpckhqdq  xmm5, xmm3          ;c1 b1
    69     movdqa      xmm1, xmm5          ;c1 b1
    70     paddw       xmm5, xmm4          ;dl+cl a1+b1 aka op[4] op[0]
    71     psubw       xmm4, xmm1          ;d1-c1 a1-b1 aka op[12] op[8]
    73     paddw       xmm5, xmm0
    74     paddw       xmm4, xmm0
    75     psraw       xmm5, 3
    76     psraw       xmm4, 3
    78     movd        eax, xmm5
    79     movd        ecx, xmm4
    80     psrldq      xmm5, 4
    81     psrldq      xmm4, 4
    82     mov         word ptr[rdx+32*0], ax
    83     mov         word ptr[rdx+32*2], cx
    84     shr         eax, 16
    85     shr         ecx, 16
    86     mov         word ptr[rdx+32*4], ax
    87     mov         word ptr[rdx+32*6], cx
    88     movd        eax, xmm5
    89     movd        ecx, xmm4
    90     psrldq      xmm5, 4
    91     psrldq      xmm4, 4
    92     mov         word ptr[rdx+32*8], ax
    93     mov         word ptr[rdx+32*10], cx
    94     shr         eax, 16
    95     shr         ecx, 16
    96     mov         word ptr[rdx+32*12], ax
    97     mov         word ptr[rdx+32*14], cx
    99     movd        eax, xmm5
   100     movd        ecx, xmm4
   101     psrldq      xmm5, 4
   102     psrldq      xmm4, 4
   103     mov         word ptr[rdx+32*1], ax
   104     mov         word ptr[rdx+32*3], cx
   105     shr         eax, 16
   106     shr         ecx, 16
   107     mov         word ptr[rdx+32*5], ax
   108     mov         word ptr[rdx+32*7], cx
   109     movd        eax, xmm5
   110     movd        ecx, xmm4
   111     mov         word ptr[rdx+32*9], ax
   112     mov         word ptr[rdx+32*11], cx
   113     shr         eax, 16
   114     shr         ecx, 16
   115     mov         word ptr[rdx+32*13], ax
   116     mov         word ptr[rdx+32*15], cx
   118     ; begin epilog
   119     UNSHADOW_ARGS
   120     pop         rbp
   121     ret

mercurial