media/libvpx/vp9/encoder/x86/vp9_sad_sse4.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 %macro PROCESS_16X2X8 1
    15 %if %1
    16         movdqa          xmm0,       XMMWORD PTR [rsi]
    17         movq            xmm1,       MMWORD PTR [rdi]
    18         movq            xmm3,       MMWORD PTR [rdi+8]
    19         movq            xmm2,       MMWORD PTR [rdi+16]
    20         punpcklqdq      xmm1,       xmm3
    21         punpcklqdq      xmm3,       xmm2
    23         movdqa          xmm2,       xmm1
    24         mpsadbw         xmm1,       xmm0,  0x0
    25         mpsadbw         xmm2,       xmm0,  0x5
    27         psrldq          xmm0,       8
    29         movdqa          xmm4,       xmm3
    30         mpsadbw         xmm3,       xmm0,  0x0
    31         mpsadbw         xmm4,       xmm0,  0x5
    33         paddw           xmm1,       xmm2
    34         paddw           xmm1,       xmm3
    35         paddw           xmm1,       xmm4
    36 %else
    37         movdqa          xmm0,       XMMWORD PTR [rsi]
    38         movq            xmm5,       MMWORD PTR [rdi]
    39         movq            xmm3,       MMWORD PTR [rdi+8]
    40         movq            xmm2,       MMWORD PTR [rdi+16]
    41         punpcklqdq      xmm5,       xmm3
    42         punpcklqdq      xmm3,       xmm2
    44         movdqa          xmm2,       xmm5
    45         mpsadbw         xmm5,       xmm0,  0x0
    46         mpsadbw         xmm2,       xmm0,  0x5
    48         psrldq          xmm0,       8
    50         movdqa          xmm4,       xmm3
    51         mpsadbw         xmm3,       xmm0,  0x0
    52         mpsadbw         xmm4,       xmm0,  0x5
    54         paddw           xmm5,       xmm2
    55         paddw           xmm5,       xmm3
    56         paddw           xmm5,       xmm4
    58         paddw           xmm1,       xmm5
    59 %endif
    60         movdqa          xmm0,       XMMWORD PTR [rsi + rax]
    61         movq            xmm5,       MMWORD PTR [rdi+ rdx]
    62         movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
    63         movq            xmm2,       MMWORD PTR [rdi+ rdx+16]
    64         punpcklqdq      xmm5,       xmm3
    65         punpcklqdq      xmm3,       xmm2
    67         lea             rsi,        [rsi+rax*2]
    68         lea             rdi,        [rdi+rdx*2]
    70         movdqa          xmm2,       xmm5
    71         mpsadbw         xmm5,       xmm0,  0x0
    72         mpsadbw         xmm2,       xmm0,  0x5
    74         psrldq          xmm0,       8
    75         movdqa          xmm4,       xmm3
    76         mpsadbw         xmm3,       xmm0,  0x0
    77         mpsadbw         xmm4,       xmm0,  0x5
    79         paddw           xmm5,       xmm2
    80         paddw           xmm5,       xmm3
    81         paddw           xmm5,       xmm4
    83         paddw           xmm1,       xmm5
    84 %endmacro
    86 %macro PROCESS_8X2X8 1
    87 %if %1
    88         movq            xmm0,       MMWORD PTR [rsi]
    89         movq            xmm1,       MMWORD PTR [rdi]
    90         movq            xmm3,       MMWORD PTR [rdi+8]
    91         punpcklqdq      xmm1,       xmm3
    93         movdqa          xmm2,       xmm1
    94         mpsadbw         xmm1,       xmm0,  0x0
    95         mpsadbw         xmm2,       xmm0,  0x5
    96         paddw           xmm1,       xmm2
    97 %else
    98         movq            xmm0,       MMWORD PTR [rsi]
    99         movq            xmm5,       MMWORD PTR [rdi]
   100         movq            xmm3,       MMWORD PTR [rdi+8]
   101         punpcklqdq      xmm5,       xmm3
   103         movdqa          xmm2,       xmm5
   104         mpsadbw         xmm5,       xmm0,  0x0
   105         mpsadbw         xmm2,       xmm0,  0x5
   106         paddw           xmm5,       xmm2
   108         paddw           xmm1,       xmm5
   109 %endif
   110         movq            xmm0,       MMWORD PTR [rsi + rax]
   111         movq            xmm5,       MMWORD PTR [rdi+ rdx]
   112         movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
   113         punpcklqdq      xmm5,       xmm3
   115         lea             rsi,        [rsi+rax*2]
   116         lea             rdi,        [rdi+rdx*2]
   118         movdqa          xmm2,       xmm5
   119         mpsadbw         xmm5,       xmm0,  0x0
   120         mpsadbw         xmm2,       xmm0,  0x5
   121         paddw           xmm5,       xmm2
   123         paddw           xmm1,       xmm5
   124 %endmacro
   126 %macro PROCESS_4X2X8 1
   127 %if %1
   128         movd            xmm0,       [rsi]
   129         movq            xmm1,       MMWORD PTR [rdi]
   130         movq            xmm3,       MMWORD PTR [rdi+8]
   131         punpcklqdq      xmm1,       xmm3
   133         mpsadbw         xmm1,       xmm0,  0x0
   134 %else
   135         movd            xmm0,       [rsi]
   136         movq            xmm5,       MMWORD PTR [rdi]
   137         movq            xmm3,       MMWORD PTR [rdi+8]
   138         punpcklqdq      xmm5,       xmm3
   140         mpsadbw         xmm5,       xmm0,  0x0
   142         paddw           xmm1,       xmm5
   143 %endif
   144         movd            xmm0,       [rsi + rax]
   145         movq            xmm5,       MMWORD PTR [rdi+ rdx]
   146         movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
   147         punpcklqdq      xmm5,       xmm3
   149         lea             rsi,        [rsi+rax*2]
   150         lea             rdi,        [rdi+rdx*2]
   152         mpsadbw         xmm5,       xmm0,  0x0
   154         paddw           xmm1,       xmm5
   155 %endmacro
   157 %macro WRITE_AS_INTS 0
   158     mov             rdi,        arg(4)           ;Results
   159     pxor            xmm0, xmm0
   160     movdqa          xmm2, xmm1
   161     punpcklwd       xmm1, xmm0
   162     punpckhwd       xmm2, xmm0
   164     movdqa          [rdi],    xmm1
   165     movdqa          [rdi + 16],    xmm2
   166 %endmacro
   168 ;void vp9_sad16x16x8_sse4(
   169 ;    const unsigned char *src_ptr,
   170 ;    int  src_stride,
   171 ;    const unsigned char *ref_ptr,
   172 ;    int  ref_stride,
   173 ;    unsigned short *sad_array);
   174 global sym(vp9_sad16x16x8_sse4) PRIVATE
   175 sym(vp9_sad16x16x8_sse4):
   176     push        rbp
   177     mov         rbp, rsp
   178     SHADOW_ARGS_TO_STACK 5
   179     push        rsi
   180     push        rdi
   181     ; end prolog
   183     mov             rsi,        arg(0)           ;src_ptr
   184     mov             rdi,        arg(2)           ;ref_ptr
   186     movsxd          rax,        dword ptr arg(1) ;src_stride
   187     movsxd          rdx,        dword ptr arg(3) ;ref_stride
   189     PROCESS_16X2X8 1
   190     PROCESS_16X2X8 0
   191     PROCESS_16X2X8 0
   192     PROCESS_16X2X8 0
   193     PROCESS_16X2X8 0
   194     PROCESS_16X2X8 0
   195     PROCESS_16X2X8 0
   196     PROCESS_16X2X8 0
   198     WRITE_AS_INTS
   200     ; begin epilog
   201     pop         rdi
   202     pop         rsi
   203     UNSHADOW_ARGS
   204     pop         rbp
   205     ret
   208 ;void vp9_sad16x8x8_sse4(
   209 ;    const unsigned char *src_ptr,
   210 ;    int  src_stride,
   211 ;    const unsigned char *ref_ptr,
   212 ;    int  ref_stride,
   213 ;    unsigned short *sad_array
   214 ;);
   215 global sym(vp9_sad16x8x8_sse4) PRIVATE
   216 sym(vp9_sad16x8x8_sse4):
   217     push        rbp
   218     mov         rbp, rsp
   219     SHADOW_ARGS_TO_STACK 5
   220     push        rsi
   221     push        rdi
   222     ; end prolog
   224     mov             rsi,        arg(0)           ;src_ptr
   225     mov             rdi,        arg(2)           ;ref_ptr
   227     movsxd          rax,        dword ptr arg(1) ;src_stride
   228     movsxd          rdx,        dword ptr arg(3) ;ref_stride
   230     PROCESS_16X2X8 1
   231     PROCESS_16X2X8 0
   232     PROCESS_16X2X8 0
   233     PROCESS_16X2X8 0
   235     WRITE_AS_INTS
   237     ; begin epilog
   238     pop         rdi
   239     pop         rsi
   240     UNSHADOW_ARGS
   241     pop         rbp
   242     ret
   245 ;void vp9_sad8x8x8_sse4(
   246 ;    const unsigned char *src_ptr,
   247 ;    int  src_stride,
   248 ;    const unsigned char *ref_ptr,
   249 ;    int  ref_stride,
   250 ;    unsigned short *sad_array
   251 ;);
   252 global sym(vp9_sad8x8x8_sse4) PRIVATE
   253 sym(vp9_sad8x8x8_sse4):
   254     push        rbp
   255     mov         rbp, rsp
   256     SHADOW_ARGS_TO_STACK 5
   257     push        rsi
   258     push        rdi
   259     ; end prolog
   261     mov             rsi,        arg(0)           ;src_ptr
   262     mov             rdi,        arg(2)           ;ref_ptr
   264     movsxd          rax,        dword ptr arg(1) ;src_stride
   265     movsxd          rdx,        dword ptr arg(3) ;ref_stride
   267     PROCESS_8X2X8 1
   268     PROCESS_8X2X8 0
   269     PROCESS_8X2X8 0
   270     PROCESS_8X2X8 0
   272     WRITE_AS_INTS
   274     ; begin epilog
   275     pop         rdi
   276     pop         rsi
   277     UNSHADOW_ARGS
   278     pop         rbp
   279     ret
   282 ;void vp9_sad8x16x8_sse4(
   283 ;    const unsigned char *src_ptr,
   284 ;    int  src_stride,
   285 ;    const unsigned char *ref_ptr,
   286 ;    int  ref_stride,
   287 ;    unsigned short *sad_array
   288 ;);
   289 global sym(vp9_sad8x16x8_sse4) PRIVATE
   290 sym(vp9_sad8x16x8_sse4):
   291     push        rbp
   292     mov         rbp, rsp
   293     SHADOW_ARGS_TO_STACK 5
   294     push        rsi
   295     push        rdi
   296     ; end prolog
   298     mov             rsi,        arg(0)           ;src_ptr
   299     mov             rdi,        arg(2)           ;ref_ptr
   301     movsxd          rax,        dword ptr arg(1) ;src_stride
   302     movsxd          rdx,        dword ptr arg(3) ;ref_stride
   304     PROCESS_8X2X8 1
   305     PROCESS_8X2X8 0
   306     PROCESS_8X2X8 0
   307     PROCESS_8X2X8 0
   308     PROCESS_8X2X8 0
   309     PROCESS_8X2X8 0
   310     PROCESS_8X2X8 0
   311     PROCESS_8X2X8 0
   313     WRITE_AS_INTS
   315     ; begin epilog
   316     pop         rdi
   317     pop         rsi
   318     UNSHADOW_ARGS
   319     pop         rbp
   320     ret
   323 ;void vp9_sad4x4x8_c(
   324 ;    const unsigned char *src_ptr,
   325 ;    int  src_stride,
   326 ;    const unsigned char *ref_ptr,
   327 ;    int  ref_stride,
   328 ;    unsigned short *sad_array
   329 ;);
   330 global sym(vp9_sad4x4x8_sse4) PRIVATE
   331 sym(vp9_sad4x4x8_sse4):
   332     push        rbp
   333     mov         rbp, rsp
   334     SHADOW_ARGS_TO_STACK 5
   335     push        rsi
   336     push        rdi
   337     ; end prolog
   339     mov             rsi,        arg(0)           ;src_ptr
   340     mov             rdi,        arg(2)           ;ref_ptr
   342     movsxd          rax,        dword ptr arg(1) ;src_stride
   343     movsxd          rdx,        dword ptr arg(3) ;ref_stride
   345     PROCESS_4X2X8 1
   346     PROCESS_4X2X8 0
   348     WRITE_AS_INTS
   350     ; begin epilog
   351     pop         rdi
   352     pop         rsi
   353     UNSHADOW_ARGS
   354     pop         rbp
   355     ret

mercurial