media/libvpx/vp8/common/x86/sad_sse2.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 ;unsigned int vp8_sad16x16_wmt(
    15 ;    unsigned char *src_ptr,
    16 ;    int  src_stride,
    17 ;    unsigned char *ref_ptr,
    18 ;    int  ref_stride)
    19 global sym(vp8_sad16x16_wmt) PRIVATE
    20 sym(vp8_sad16x16_wmt):
    21     push        rbp
    22     mov         rbp, rsp
    23     SHADOW_ARGS_TO_STACK 4
    24     SAVE_XMM 6
    25     push        rsi
    26     push        rdi
    27     ; end prolog
    29         mov             rsi,        arg(0) ;src_ptr
    30         mov             rdi,        arg(2) ;ref_ptr
    32         movsxd          rax,        dword ptr arg(1) ;src_stride
    33         movsxd          rdx,        dword ptr arg(3) ;ref_stride
    35         lea             rcx,        [rsi+rax*8]
    37         lea             rcx,        [rcx+rax*8]
    38         pxor            xmm6,       xmm6
    40 .x16x16sad_wmt_loop:
    42         movq            xmm0,       QWORD PTR [rsi]
    43         movq            xmm2,       QWORD PTR [rsi+8]
    45         movq            xmm1,       QWORD PTR [rdi]
    46         movq            xmm3,       QWORD PTR [rdi+8]
    48         movq            xmm4,       QWORD PTR [rsi+rax]
    49         movq            xmm5,       QWORD PTR [rdi+rdx]
    52         punpcklbw       xmm0,       xmm2
    53         punpcklbw       xmm1,       xmm3
    55         psadbw          xmm0,       xmm1
    56         movq            xmm2,       QWORD PTR [rsi+rax+8]
    58         movq            xmm3,       QWORD PTR [rdi+rdx+8]
    59         lea             rsi,        [rsi+rax*2]
    61         lea             rdi,        [rdi+rdx*2]
    62         punpcklbw       xmm4,       xmm2
    64         punpcklbw       xmm5,       xmm3
    65         psadbw          xmm4,       xmm5
    67         paddw           xmm6,       xmm0
    68         paddw           xmm6,       xmm4
    70         cmp             rsi,        rcx
    71         jne             .x16x16sad_wmt_loop
    73         movq            xmm0,       xmm6
    74         psrldq          xmm6,       8
    76         paddw           xmm0,       xmm6
    77         movq            rax,        xmm0
    79     ; begin epilog
    80     pop rdi
    81     pop rsi
    82     RESTORE_XMM
    83     UNSHADOW_ARGS
    84     pop         rbp
    85     ret
    87 ;unsigned int vp8_sad8x16_wmt(
    88 ;    unsigned char *src_ptr,
    89 ;    int  src_stride,
    90 ;    unsigned char *ref_ptr,
    91 ;    int  ref_stride,
    92 ;    int  max_sad)
    93 global sym(vp8_sad8x16_wmt) PRIVATE
    94 sym(vp8_sad8x16_wmt):
    95     push        rbp
    96     mov         rbp, rsp
    97     SHADOW_ARGS_TO_STACK 5
    98     push        rbx
    99     push        rsi
   100     push        rdi
   101     ; end prolog
   103         mov             rsi,        arg(0) ;src_ptr
   104         mov             rdi,        arg(2) ;ref_ptr
   106         movsxd          rbx,        dword ptr arg(1) ;src_stride
   107         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   109         lea             rcx,        [rsi+rbx*8]
   111         lea             rcx,        [rcx+rbx*8]
   112         pxor            mm7,        mm7
   114 .x8x16sad_wmt_loop:
   116         movq            rax,        mm7
   117         cmp             eax,        arg(4)
   118         ja              .x8x16sad_wmt_early_exit
   120         movq            mm0,        QWORD PTR [rsi]
   121         movq            mm1,        QWORD PTR [rdi]
   123         movq            mm2,        QWORD PTR [rsi+rbx]
   124         movq            mm3,        QWORD PTR [rdi+rdx]
   126         psadbw          mm0,        mm1
   127         psadbw          mm2,        mm3
   129         lea             rsi,        [rsi+rbx*2]
   130         lea             rdi,        [rdi+rdx*2]
   132         paddw           mm7,        mm0
   133         paddw           mm7,        mm2
   135         cmp             rsi,        rcx
   136         jne             .x8x16sad_wmt_loop
   138         movq            rax,        mm7
   140 .x8x16sad_wmt_early_exit:
   142     ; begin epilog
   143     pop         rdi
   144     pop         rsi
   145     pop         rbx
   146     UNSHADOW_ARGS
   147     pop         rbp
   148     ret
   151 ;unsigned int vp8_sad8x8_wmt(
   152 ;    unsigned char *src_ptr,
   153 ;    int  src_stride,
   154 ;    unsigned char *ref_ptr,
   155 ;    int  ref_stride)
   156 global sym(vp8_sad8x8_wmt) PRIVATE
   157 sym(vp8_sad8x8_wmt):
   158     push        rbp
   159     mov         rbp, rsp
   160     SHADOW_ARGS_TO_STACK 5
   161     push        rbx
   162     push        rsi
   163     push        rdi
   164     ; end prolog
   166         mov             rsi,        arg(0) ;src_ptr
   167         mov             rdi,        arg(2) ;ref_ptr
   169         movsxd          rbx,        dword ptr arg(1) ;src_stride
   170         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   172         lea             rcx,        [rsi+rbx*8]
   173         pxor            mm7,        mm7
   175 .x8x8sad_wmt_loop:
   177         movq            rax,        mm7
   178         cmp             eax,        arg(4)
   179         ja              .x8x8sad_wmt_early_exit
   181         movq            mm0,        QWORD PTR [rsi]
   182         movq            mm1,        QWORD PTR [rdi]
   184         psadbw          mm0,        mm1
   185         lea             rsi,        [rsi+rbx]
   187         add             rdi,        rdx
   188         paddw           mm7,        mm0
   190         cmp             rsi,        rcx
   191         jne             .x8x8sad_wmt_loop
   193         movq            rax,        mm7
   194 .x8x8sad_wmt_early_exit:
   196     ; begin epilog
   197     pop         rdi
   198     pop         rsi
   199     pop         rbx
   200     UNSHADOW_ARGS
   201     pop         rbp
   202     ret
   204 ;unsigned int vp8_sad4x4_wmt(
   205 ;    unsigned char *src_ptr,
   206 ;    int  src_stride,
   207 ;    unsigned char *ref_ptr,
   208 ;    int  ref_stride)
   209 global sym(vp8_sad4x4_wmt) PRIVATE
   210 sym(vp8_sad4x4_wmt):
   211     push        rbp
   212     mov         rbp, rsp
   213     SHADOW_ARGS_TO_STACK 4
   214     push        rsi
   215     push        rdi
   216     ; end prolog
   218         mov             rsi,        arg(0) ;src_ptr
   219         mov             rdi,        arg(2) ;ref_ptr
   221         movsxd          rax,        dword ptr arg(1) ;src_stride
   222         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   224         movd            mm0,        DWORD PTR [rsi]
   225         movd            mm1,        DWORD PTR [rdi]
   227         movd            mm2,        DWORD PTR [rsi+rax]
   228         movd            mm3,        DWORD PTR [rdi+rdx]
   230         punpcklbw       mm0,        mm2
   231         punpcklbw       mm1,        mm3
   233         psadbw          mm0,        mm1
   234         lea             rsi,        [rsi+rax*2]
   236         lea             rdi,        [rdi+rdx*2]
   237         movd            mm4,        DWORD PTR [rsi]
   239         movd            mm5,        DWORD PTR [rdi]
   240         movd            mm6,        DWORD PTR [rsi+rax]
   242         movd            mm7,        DWORD PTR [rdi+rdx]
   243         punpcklbw       mm4,        mm6
   245         punpcklbw       mm5,        mm7
   246         psadbw          mm4,        mm5
   248         paddw           mm0,        mm4
   249         movq            rax,        mm0
   251     ; begin epilog
   252     pop rdi
   253     pop rsi
   254     UNSHADOW_ARGS
   255     pop         rbp
   256     ret
   259 ;unsigned int vp8_sad16x8_wmt(
   260 ;    unsigned char *src_ptr,
   261 ;    int  src_stride,
   262 ;    unsigned char *ref_ptr,
   263 ;    int  ref_stride)
   264 global sym(vp8_sad16x8_wmt) PRIVATE
   265 sym(vp8_sad16x8_wmt):
   266     push        rbp
   267     mov         rbp, rsp
   268     SHADOW_ARGS_TO_STACK 5
   269     push        rbx
   270     push        rsi
   271     push        rdi
   272     ; end prolog
   275         mov             rsi,        arg(0) ;src_ptr
   276         mov             rdi,        arg(2) ;ref_ptr
   278         movsxd          rbx,        dword ptr arg(1) ;src_stride
   279         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   281         lea             rcx,        [rsi+rbx*8]
   282         pxor            mm7,        mm7
   284 .x16x8sad_wmt_loop:
   286         movq            rax,        mm7
   287         cmp             eax,        arg(4)
   288         ja              .x16x8sad_wmt_early_exit
   290         movq            mm0,        QWORD PTR [rsi]
   291         movq            mm2,        QWORD PTR [rsi+8]
   293         movq            mm1,        QWORD PTR [rdi]
   294         movq            mm3,        QWORD PTR [rdi+8]
   296         movq            mm4,        QWORD PTR [rsi+rbx]
   297         movq            mm5,        QWORD PTR [rdi+rdx]
   299         psadbw          mm0,        mm1
   300         psadbw          mm2,        mm3
   302         movq            mm1,        QWORD PTR [rsi+rbx+8]
   303         movq            mm3,        QWORD PTR [rdi+rdx+8]
   305         psadbw          mm4,        mm5
   306         psadbw          mm1,        mm3
   308         lea             rsi,        [rsi+rbx*2]
   309         lea             rdi,        [rdi+rdx*2]
   311         paddw           mm0,        mm2
   312         paddw           mm4,        mm1
   314         paddw           mm7,        mm0
   315         paddw           mm7,        mm4
   317         cmp             rsi,        rcx
   318         jne             .x16x8sad_wmt_loop
   320         movq            rax,        mm7
   322 .x16x8sad_wmt_early_exit:
   324     ; begin epilog
   325     pop         rdi
   326     pop         rsi
   327     pop         rbx
   328     UNSHADOW_ARGS
   329     pop         rbp
   330     ret
   332 ;void vp8_copy32xn_sse2(
   333 ;    unsigned char *src_ptr,
   334 ;    int  src_stride,
   335 ;    unsigned char *dst_ptr,
   336 ;    int  dst_stride,
   337 ;    int height);
   338 global sym(vp8_copy32xn_sse2) PRIVATE
   339 sym(vp8_copy32xn_sse2):
   340     push        rbp
   341     mov         rbp, rsp
   342     SHADOW_ARGS_TO_STACK 5
   343     SAVE_XMM 7
   344     push        rsi
   345     push        rdi
   346     ; end prolog
   348         mov             rsi,        arg(0) ;src_ptr
   349         mov             rdi,        arg(2) ;dst_ptr
   351         movsxd          rax,        dword ptr arg(1) ;src_stride
   352         movsxd          rdx,        dword ptr arg(3) ;dst_stride
   353         movsxd          rcx,        dword ptr arg(4) ;height
   355 .block_copy_sse2_loopx4:
   356         movdqu          xmm0,       XMMWORD PTR [rsi]
   357         movdqu          xmm1,       XMMWORD PTR [rsi + 16]
   358         movdqu          xmm2,       XMMWORD PTR [rsi + rax]
   359         movdqu          xmm3,       XMMWORD PTR [rsi + rax + 16]
   361         lea             rsi,        [rsi+rax*2]
   363         movdqu          xmm4,       XMMWORD PTR [rsi]
   364         movdqu          xmm5,       XMMWORD PTR [rsi + 16]
   365         movdqu          xmm6,       XMMWORD PTR [rsi + rax]
   366         movdqu          xmm7,       XMMWORD PTR [rsi + rax + 16]
   368         lea             rsi,    [rsi+rax*2]
   370         movdqa          XMMWORD PTR [rdi], xmm0
   371         movdqa          XMMWORD PTR [rdi + 16], xmm1
   372         movdqa          XMMWORD PTR [rdi + rdx], xmm2
   373         movdqa          XMMWORD PTR [rdi + rdx + 16], xmm3
   375         lea             rdi,    [rdi+rdx*2]
   377         movdqa          XMMWORD PTR [rdi], xmm4
   378         movdqa          XMMWORD PTR [rdi + 16], xmm5
   379         movdqa          XMMWORD PTR [rdi + rdx], xmm6
   380         movdqa          XMMWORD PTR [rdi + rdx + 16], xmm7
   382         lea             rdi,    [rdi+rdx*2]
   384         sub             rcx,     4
   385         cmp             rcx,     4
   386         jge             .block_copy_sse2_loopx4
   388         cmp             rcx, 0
   389         je              .copy_is_done
   391 .block_copy_sse2_loop:
   392         movdqu          xmm0,       XMMWORD PTR [rsi]
   393         movdqu          xmm1,       XMMWORD PTR [rsi + 16]
   394         lea             rsi,    [rsi+rax]
   396         movdqa          XMMWORD PTR [rdi], xmm0
   397         movdqa          XMMWORD PTR [rdi + 16], xmm1
   398         lea             rdi,    [rdi+rdx]
   400         sub             rcx,     1
   401         jne             .block_copy_sse2_loop
   403 .copy_is_done:
   404     ; begin epilog
   405     pop rdi
   406     pop rsi
   407     RESTORE_XMM
   408     UNSHADOW_ARGS
   409     pop         rbp
   410     ret

mercurial