media/libvpx/vp8/common/x86/sad_sse4.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 %macro PROCESS_16X2X8 1
    15 %if %1
    16         movdqa          xmm0,       XMMWORD PTR [rsi]
    17         movq            xmm1,       MMWORD PTR [rdi]
    18         movq            xmm3,       MMWORD PTR [rdi+8]
    19         movq            xmm2,       MMWORD PTR [rdi+16]
    20         punpcklqdq      xmm1,       xmm3
    21         punpcklqdq      xmm3,       xmm2
    23         movdqa          xmm2,       xmm1
    24         mpsadbw         xmm1,       xmm0,  0x0
    25         mpsadbw         xmm2,       xmm0,  0x5
    27         psrldq          xmm0,       8
    29         movdqa          xmm4,       xmm3
    30         mpsadbw         xmm3,       xmm0,  0x0
    31         mpsadbw         xmm4,       xmm0,  0x5
    33         paddw           xmm1,       xmm2
    34         paddw           xmm1,       xmm3
    35         paddw           xmm1,       xmm4
    36 %else
    37         movdqa          xmm0,       XMMWORD PTR [rsi]
    38         movq            xmm5,       MMWORD PTR [rdi]
    39         movq            xmm3,       MMWORD PTR [rdi+8]
    40         movq            xmm2,       MMWORD PTR [rdi+16]
    41         punpcklqdq      xmm5,       xmm3
    42         punpcklqdq      xmm3,       xmm2
    44         movdqa          xmm2,       xmm5
    45         mpsadbw         xmm5,       xmm0,  0x0
    46         mpsadbw         xmm2,       xmm0,  0x5
    48         psrldq          xmm0,       8
    50         movdqa          xmm4,       xmm3
    51         mpsadbw         xmm3,       xmm0,  0x0
    52         mpsadbw         xmm4,       xmm0,  0x5
    54         paddw           xmm5,       xmm2
    55         paddw           xmm5,       xmm3
    56         paddw           xmm5,       xmm4
    58         paddw           xmm1,       xmm5
    59 %endif
    60         movdqa          xmm0,       XMMWORD PTR [rsi + rax]
    61         movq            xmm5,       MMWORD PTR [rdi+ rdx]
    62         movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
    63         movq            xmm2,       MMWORD PTR [rdi+ rdx+16]
    64         punpcklqdq      xmm5,       xmm3
    65         punpcklqdq      xmm3,       xmm2
    67         lea             rsi,        [rsi+rax*2]
    68         lea             rdi,        [rdi+rdx*2]
    70         movdqa          xmm2,       xmm5
    71         mpsadbw         xmm5,       xmm0,  0x0
    72         mpsadbw         xmm2,       xmm0,  0x5
    74         psrldq          xmm0,       8
    75         movdqa          xmm4,       xmm3
    76         mpsadbw         xmm3,       xmm0,  0x0
    77         mpsadbw         xmm4,       xmm0,  0x5
    79         paddw           xmm5,       xmm2
    80         paddw           xmm5,       xmm3
    81         paddw           xmm5,       xmm4
    83         paddw           xmm1,       xmm5
    84 %endmacro
    86 %macro PROCESS_8X2X8 1
    87 %if %1
    88         movq            xmm0,       MMWORD PTR [rsi]
    89         movq            xmm1,       MMWORD PTR [rdi]
    90         movq            xmm3,       MMWORD PTR [rdi+8]
    91         punpcklqdq      xmm1,       xmm3
    93         movdqa          xmm2,       xmm1
    94         mpsadbw         xmm1,       xmm0,  0x0
    95         mpsadbw         xmm2,       xmm0,  0x5
    96         paddw           xmm1,       xmm2
    97 %else
    98         movq            xmm0,       MMWORD PTR [rsi]
    99         movq            xmm5,       MMWORD PTR [rdi]
   100         movq            xmm3,       MMWORD PTR [rdi+8]
   101         punpcklqdq      xmm5,       xmm3
   103         movdqa          xmm2,       xmm5
   104         mpsadbw         xmm5,       xmm0,  0x0
   105         mpsadbw         xmm2,       xmm0,  0x5
   106         paddw           xmm5,       xmm2
   108         paddw           xmm1,       xmm5
   109 %endif
   110         movq            xmm0,       MMWORD PTR [rsi + rax]
   111         movq            xmm5,       MMWORD PTR [rdi+ rdx]
   112         movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
   113         punpcklqdq      xmm5,       xmm3
   115         lea             rsi,        [rsi+rax*2]
   116         lea             rdi,        [rdi+rdx*2]
   118         movdqa          xmm2,       xmm5
   119         mpsadbw         xmm5,       xmm0,  0x0
   120         mpsadbw         xmm2,       xmm0,  0x5
   121         paddw           xmm5,       xmm2
   123         paddw           xmm1,       xmm5
   124 %endmacro
   126 %macro PROCESS_4X2X8 1
   127 %if %1
   128         movd            xmm0,       [rsi]
   129         movq            xmm1,       MMWORD PTR [rdi]
   130         movq            xmm3,       MMWORD PTR [rdi+8]
   131         punpcklqdq      xmm1,       xmm3
   133         mpsadbw         xmm1,       xmm0,  0x0
   134 %else
   135         movd            xmm0,       [rsi]
   136         movq            xmm5,       MMWORD PTR [rdi]
   137         movq            xmm3,       MMWORD PTR [rdi+8]
   138         punpcklqdq      xmm5,       xmm3
   140         mpsadbw         xmm5,       xmm0,  0x0
   142         paddw           xmm1,       xmm5
   143 %endif
   144         movd            xmm0,       [rsi + rax]
   145         movq            xmm5,       MMWORD PTR [rdi+ rdx]
   146         movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
   147         punpcklqdq      xmm5,       xmm3
   149         lea             rsi,        [rsi+rax*2]
   150         lea             rdi,        [rdi+rdx*2]
   152         mpsadbw         xmm5,       xmm0,  0x0
   154         paddw           xmm1,       xmm5
   155 %endmacro
   158 ;void vp8_sad16x16x8_sse4(
   159 ;    const unsigned char *src_ptr,
   160 ;    int  src_stride,
   161 ;    const unsigned char *ref_ptr,
   162 ;    int  ref_stride,
   163 ;    unsigned short *sad_array);
   164 global sym(vp8_sad16x16x8_sse4) PRIVATE
   165 sym(vp8_sad16x16x8_sse4):
   166     push        rbp
   167     mov         rbp, rsp
   168     SHADOW_ARGS_TO_STACK 5
   169     push        rsi
   170     push        rdi
   171     ; end prolog
   173         mov             rsi,        arg(0)           ;src_ptr
   174         mov             rdi,        arg(2)           ;ref_ptr
   176         movsxd          rax,        dword ptr arg(1) ;src_stride
   177         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   179         PROCESS_16X2X8 1
   180         PROCESS_16X2X8 0
   181         PROCESS_16X2X8 0
   182         PROCESS_16X2X8 0
   183         PROCESS_16X2X8 0
   184         PROCESS_16X2X8 0
   185         PROCESS_16X2X8 0
   186         PROCESS_16X2X8 0
   188         mov             rdi,        arg(4)           ;Results
   189         movdqa          XMMWORD PTR [rdi],    xmm1
   191     ; begin epilog
   192     pop         rdi
   193     pop         rsi
   194     UNSHADOW_ARGS
   195     pop         rbp
   196     ret
   199 ;void vp8_sad16x8x8_sse4(
   200 ;    const unsigned char *src_ptr,
   201 ;    int  src_stride,
   202 ;    const unsigned char *ref_ptr,
   203 ;    int  ref_stride,
   204 ;    unsigned short *sad_array
   205 ;);
   206 global sym(vp8_sad16x8x8_sse4) PRIVATE
   207 sym(vp8_sad16x8x8_sse4):
   208     push        rbp
   209     mov         rbp, rsp
   210     SHADOW_ARGS_TO_STACK 5
   211     push        rsi
   212     push        rdi
   213     ; end prolog
   215         mov             rsi,        arg(0)           ;src_ptr
   216         mov             rdi,        arg(2)           ;ref_ptr
   218         movsxd          rax,        dword ptr arg(1) ;src_stride
   219         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   221         PROCESS_16X2X8 1
   222         PROCESS_16X2X8 0
   223         PROCESS_16X2X8 0
   224         PROCESS_16X2X8 0
   226         mov             rdi,        arg(4)           ;Results
   227         movdqa          XMMWORD PTR [rdi],    xmm1
   229     ; begin epilog
   230     pop         rdi
   231     pop         rsi
   232     UNSHADOW_ARGS
   233     pop         rbp
   234     ret
   237 ;void vp8_sad8x8x8_sse4(
   238 ;    const unsigned char *src_ptr,
   239 ;    int  src_stride,
   240 ;    const unsigned char *ref_ptr,
   241 ;    int  ref_stride,
   242 ;    unsigned short *sad_array
   243 ;);
   244 global sym(vp8_sad8x8x8_sse4) PRIVATE
   245 sym(vp8_sad8x8x8_sse4):
   246     push        rbp
   247     mov         rbp, rsp
   248     SHADOW_ARGS_TO_STACK 5
   249     push        rsi
   250     push        rdi
   251     ; end prolog
   253         mov             rsi,        arg(0)           ;src_ptr
   254         mov             rdi,        arg(2)           ;ref_ptr
   256         movsxd          rax,        dword ptr arg(1) ;src_stride
   257         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   259         PROCESS_8X2X8 1
   260         PROCESS_8X2X8 0
   261         PROCESS_8X2X8 0
   262         PROCESS_8X2X8 0
   264         mov             rdi,        arg(4)           ;Results
   265         movdqa          XMMWORD PTR [rdi],    xmm1
   267     ; begin epilog
   268     pop         rdi
   269     pop         rsi
   270     UNSHADOW_ARGS
   271     pop         rbp
   272     ret
   275 ;void vp8_sad8x16x8_sse4(
   276 ;    const unsigned char *src_ptr,
   277 ;    int  src_stride,
   278 ;    const unsigned char *ref_ptr,
   279 ;    int  ref_stride,
   280 ;    unsigned short *sad_array
   281 ;);
   282 global sym(vp8_sad8x16x8_sse4) PRIVATE
   283 sym(vp8_sad8x16x8_sse4):
   284     push        rbp
   285     mov         rbp, rsp
   286     SHADOW_ARGS_TO_STACK 5
   287     push        rsi
   288     push        rdi
   289     ; end prolog
   291         mov             rsi,        arg(0)           ;src_ptr
   292         mov             rdi,        arg(2)           ;ref_ptr
   294         movsxd          rax,        dword ptr arg(1) ;src_stride
   295         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   297         PROCESS_8X2X8 1
   298         PROCESS_8X2X8 0
   299         PROCESS_8X2X8 0
   300         PROCESS_8X2X8 0
   301         PROCESS_8X2X8 0
   302         PROCESS_8X2X8 0
   303         PROCESS_8X2X8 0
   304         PROCESS_8X2X8 0
   305         mov             rdi,        arg(4)           ;Results
   306         movdqa          XMMWORD PTR [rdi],    xmm1
   308     ; begin epilog
   309     pop         rdi
   310     pop         rsi
   311     UNSHADOW_ARGS
   312     pop         rbp
   313     ret
   316 ;void vp8_sad4x4x8_c(
   317 ;    const unsigned char *src_ptr,
   318 ;    int  src_stride,
   319 ;    const unsigned char *ref_ptr,
   320 ;    int  ref_stride,
   321 ;    unsigned short *sad_array
   322 ;);
   323 global sym(vp8_sad4x4x8_sse4) PRIVATE
   324 sym(vp8_sad4x4x8_sse4):
   325     push        rbp
   326     mov         rbp, rsp
   327     SHADOW_ARGS_TO_STACK 5
   328     push        rsi
   329     push        rdi
   330     ; end prolog
   332         mov             rsi,        arg(0)           ;src_ptr
   333         mov             rdi,        arg(2)           ;ref_ptr
   335         movsxd          rax,        dword ptr arg(1) ;src_stride
   336         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   338         PROCESS_4X2X8 1
   339         PROCESS_4X2X8 0
   341         mov             rdi,        arg(4)           ;Results
   342         movdqa          XMMWORD PTR [rdi],    xmm1
   344     ; begin epilog
   345     pop         rdi
   346     pop         rsi
   347     UNSHADOW_ARGS
   348     pop         rbp
   349     ret

mercurial