media/libvpx/vp9/encoder/x86/vp9_sad_mmx.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 global sym(vp9_sad16x16_mmx) PRIVATE
    15 global sym(vp9_sad8x16_mmx) PRIVATE
    16 global sym(vp9_sad8x8_mmx) PRIVATE
    17 global sym(vp9_sad4x4_mmx) PRIVATE
    18 global sym(vp9_sad16x8_mmx) PRIVATE
    20 ;unsigned int vp9_sad16x16_mmx(
    21 ;    unsigned char *src_ptr,
    22 ;    int  src_stride,
    23 ;    unsigned char *ref_ptr,
    24 ;    int  ref_stride)
    25 sym(vp9_sad16x16_mmx):
    26     push        rbp
    27     mov         rbp, rsp
    28     SHADOW_ARGS_TO_STACK 4
    29     push rsi
    30     push rdi
    31     ; end prolog
    33         mov             rsi,        arg(0) ;src_ptr
    34         mov             rdi,        arg(2) ;ref_ptr
    36         movsxd          rax,        dword ptr arg(1) ;src_stride
    37         movsxd          rdx,        dword ptr arg(3) ;ref_stride
    39         lea             rcx,        [rsi+rax*8]
    41         lea             rcx,        [rcx+rax*8]
    42         pxor            mm7,        mm7
    44         pxor            mm6,        mm6
    46 .x16x16sad_mmx_loop:
    48         movq            mm0,        QWORD PTR [rsi]
    49         movq            mm2,        QWORD PTR [rsi+8]
    51         movq            mm1,        QWORD PTR [rdi]
    52         movq            mm3,        QWORD PTR [rdi+8]
    54         movq            mm4,        mm0
    55         movq            mm5,        mm2
    57         psubusb         mm0,        mm1
    58         psubusb         mm1,        mm4
    60         psubusb         mm2,        mm3
    61         psubusb         mm3,        mm5
    63         por             mm0,        mm1
    64         por             mm2,        mm3
    66         movq            mm1,        mm0
    67         movq            mm3,        mm2
    69         punpcklbw       mm0,        mm6
    70         punpcklbw       mm2,        mm6
    72         punpckhbw       mm1,        mm6
    73         punpckhbw       mm3,        mm6
    75         paddw           mm0,        mm2
    76         paddw           mm1,        mm3
    79         lea             rsi,        [rsi+rax]
    80         add             rdi,        rdx
    82         paddw           mm7,        mm0
    83         paddw           mm7,        mm1
    85         cmp             rsi,        rcx
    86         jne             .x16x16sad_mmx_loop
    89         movq            mm0,        mm7
    91         punpcklwd       mm0,        mm6
    92         punpckhwd       mm7,        mm6
    94         paddw           mm0,        mm7
    95         movq            mm7,        mm0
    98         psrlq           mm0,        32
    99         paddw           mm7,        mm0
   101         movq            rax,        mm7
   103     pop rdi
   104     pop rsi
   105     mov rsp, rbp
   106     ; begin epilog
   107     UNSHADOW_ARGS
   108     pop         rbp
   109     ret
   112 ;unsigned int vp9_sad8x16_mmx(
   113 ;    unsigned char *src_ptr,
   114 ;    int  src_stride,
   115 ;    unsigned char *ref_ptr,
   116 ;    int  ref_stride)
   117 sym(vp9_sad8x16_mmx):
   118     push        rbp
   119     mov         rbp, rsp
   120     SHADOW_ARGS_TO_STACK 4
   121     push rsi
   122     push rdi
   123     ; end prolog
   125         mov             rsi,        arg(0) ;src_ptr
   126         mov             rdi,        arg(2) ;ref_ptr
   128         movsxd          rax,        dword ptr arg(1) ;src_stride
   129         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   131         lea             rcx,        [rsi+rax*8]
   133         lea             rcx,        [rcx+rax*8]
   134         pxor            mm7,        mm7
   136         pxor            mm6,        mm6
   138 .x8x16sad_mmx_loop:
   140         movq            mm0,        QWORD PTR [rsi]
   141         movq            mm1,        QWORD PTR [rdi]
   143         movq            mm2,        mm0
   144         psubusb         mm0,        mm1
   146         psubusb         mm1,        mm2
   147         por             mm0,        mm1
   149         movq            mm2,        mm0
   150         punpcklbw       mm0,        mm6
   152         punpckhbw       mm2,        mm6
   153         lea             rsi,        [rsi+rax]
   155         add             rdi,        rdx
   156         paddw           mm7,        mm0
   158         paddw           mm7,        mm2
   159         cmp             rsi,        rcx
   161         jne             .x8x16sad_mmx_loop
   163         movq            mm0,        mm7
   164         punpcklwd       mm0,        mm6
   166         punpckhwd       mm7,        mm6
   167         paddw           mm0,        mm7
   169         movq            mm7,        mm0
   170         psrlq           mm0,        32
   172         paddw           mm7,        mm0
   173         movq            rax,        mm7
   175     pop rdi
   176     pop rsi
   177     mov rsp, rbp
   178     ; begin epilog
   179     UNSHADOW_ARGS
   180     pop         rbp
   181     ret
   184 ;unsigned int vp9_sad8x8_mmx(
   185 ;    unsigned char *src_ptr,
   186 ;    int  src_stride,
   187 ;    unsigned char *ref_ptr,
   188 ;    int  ref_stride)
   189 sym(vp9_sad8x8_mmx):
   190     push        rbp
   191     mov         rbp, rsp
   192     SHADOW_ARGS_TO_STACK 4
   193     push rsi
   194     push rdi
   195     ; end prolog
   197         mov             rsi,        arg(0) ;src_ptr
   198         mov             rdi,        arg(2) ;ref_ptr
   200         movsxd          rax,        dword ptr arg(1) ;src_stride
   201         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   203         lea             rcx,        [rsi+rax*8]
   204         pxor            mm7,        mm7
   206         pxor            mm6,        mm6
   208 .x8x8sad_mmx_loop:
   210         movq            mm0,        QWORD PTR [rsi]
   211         movq            mm1,        QWORD PTR [rdi]
   213         movq            mm2,        mm0
   214         psubusb         mm0,        mm1
   216         psubusb         mm1,        mm2
   217         por             mm0,        mm1
   219         movq            mm2,        mm0
   220         punpcklbw       mm0,        mm6
   222         punpckhbw       mm2,        mm6
   223         paddw           mm0,        mm2
   225         lea             rsi,       [rsi+rax]
   226         add             rdi,        rdx
   228         paddw           mm7,       mm0
   229         cmp             rsi,        rcx
   231         jne             .x8x8sad_mmx_loop
   233         movq            mm0,        mm7
   234         punpcklwd       mm0,        mm6
   236         punpckhwd       mm7,        mm6
   237         paddw           mm0,        mm7
   239         movq            mm7,        mm0
   240         psrlq           mm0,        32
   242         paddw           mm7,        mm0
   243         movq            rax,        mm7
   245     pop rdi
   246     pop rsi
   247     mov rsp, rbp
   248     ; begin epilog
   249     UNSHADOW_ARGS
   250     pop         rbp
   251     ret
   254 ;unsigned int vp9_sad4x4_mmx(
   255 ;    unsigned char *src_ptr,
   256 ;    int  src_stride,
   257 ;    unsigned char *ref_ptr,
   258 ;    int  ref_stride)
   259 sym(vp9_sad4x4_mmx):
   260     push        rbp
   261     mov         rbp, rsp
   262     SHADOW_ARGS_TO_STACK 4
   263     push rsi
   264     push rdi
   265     ; end prolog
   267         mov             rsi,        arg(0) ;src_ptr
   268         mov             rdi,        arg(2) ;ref_ptr
   270         movsxd          rax,        dword ptr arg(1) ;src_stride
   271         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   273         movd            mm0,        DWORD PTR [rsi]
   274         movd            mm1,        DWORD PTR [rdi]
   276         movd            mm2,        DWORD PTR [rsi+rax]
   277         movd            mm3,        DWORD PTR [rdi+rdx]
   279         punpcklbw       mm0,        mm2
   280         punpcklbw       mm1,        mm3
   282         movq            mm2,        mm0
   283         psubusb         mm0,        mm1
   285         psubusb         mm1,        mm2
   286         por             mm0,        mm1
   288         movq            mm2,        mm0
   289         pxor            mm3,        mm3
   291         punpcklbw       mm0,        mm3
   292         punpckhbw       mm2,        mm3
   294         paddw           mm0,        mm2
   296         lea             rsi,        [rsi+rax*2]
   297         lea             rdi,        [rdi+rdx*2]
   299         movd            mm4,        DWORD PTR [rsi]
   300         movd            mm5,        DWORD PTR [rdi]
   302         movd            mm6,        DWORD PTR [rsi+rax]
   303         movd            mm7,        DWORD PTR [rdi+rdx]
   305         punpcklbw       mm4,        mm6
   306         punpcklbw       mm5,        mm7
   308         movq            mm6,        mm4
   309         psubusb         mm4,        mm5
   311         psubusb         mm5,        mm6
   312         por             mm4,        mm5
   314         movq            mm5,        mm4
   315         punpcklbw       mm4,        mm3
   317         punpckhbw       mm5,        mm3
   318         paddw           mm4,        mm5
   320         paddw           mm0,        mm4
   321         movq            mm1,        mm0
   323         punpcklwd       mm0,        mm3
   324         punpckhwd       mm1,        mm3
   326         paddw           mm0,        mm1
   327         movq            mm1,        mm0
   329         psrlq           mm0,        32
   330         paddw           mm0,        mm1
   332         movq            rax,        mm0
   334     pop rdi
   335     pop rsi
   336     mov rsp, rbp
   337     ; begin epilog
   338     UNSHADOW_ARGS
   339     pop         rbp
   340     ret
   343 ;unsigned int vp9_sad16x8_mmx(
   344 ;    unsigned char *src_ptr,
   345 ;    int  src_stride,
   346 ;    unsigned char *ref_ptr,
   347 ;    int  ref_stride)
   348 sym(vp9_sad16x8_mmx):
   349     push        rbp
   350     mov         rbp, rsp
   351     SHADOW_ARGS_TO_STACK 4
   352     push rsi
   353     push rdi
   354     ; end prolog
   356         mov             rsi,        arg(0) ;src_ptr
   357         mov             rdi,        arg(2) ;ref_ptr
   359         movsxd          rax,        dword ptr arg(1) ;src_stride
   360         movsxd          rdx,        dword ptr arg(3) ;ref_stride
   362         lea             rcx,        [rsi+rax*8]
   363         pxor            mm7,        mm7
   365         pxor            mm6,        mm6
   367 .x16x8sad_mmx_loop:
   369         movq            mm0,       [rsi]
   370         movq            mm1,       [rdi]
   372         movq            mm2,        [rsi+8]
   373         movq            mm3,        [rdi+8]
   375         movq            mm4,        mm0
   376         movq            mm5,        mm2
   378         psubusb         mm0,        mm1
   379         psubusb         mm1,        mm4
   381         psubusb         mm2,        mm3
   382         psubusb         mm3,        mm5
   384         por             mm0,        mm1
   385         por             mm2,        mm3
   387         movq            mm1,        mm0
   388         movq            mm3,        mm2
   390         punpcklbw       mm0,        mm6
   391         punpckhbw       mm1,        mm6
   393         punpcklbw       mm2,        mm6
   394         punpckhbw       mm3,        mm6
   397         paddw           mm0,        mm2
   398         paddw           mm1,        mm3
   400         paddw           mm0,        mm1
   401         lea             rsi,        [rsi+rax]
   403         add             rdi,        rdx
   404         paddw           mm7,        mm0
   406         cmp             rsi,        rcx
   407         jne             .x16x8sad_mmx_loop
   409         movq            mm0,        mm7
   410         punpcklwd       mm0,        mm6
   412         punpckhwd       mm7,        mm6
   413         paddw           mm0,        mm7
   415         movq            mm7,        mm0
   416         psrlq           mm0,        32
   418         paddw           mm7,        mm0
   419         movq            rax,        mm7
   421     pop rdi
   422     pop rsi
   423     mov rsp, rbp
   424     ; begin epilog
   425     UNSHADOW_ARGS
   426     pop         rbp
   427     ret

mercurial