media/libvpx/vp8/encoder/x86/subtract_sse2.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 ;void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
    15 ;                            short *diff, unsigned char *Predictor,
    16 ;                            int pitch);
    17 global sym(vp8_subtract_b_sse2_impl) PRIVATE
    18 sym(vp8_subtract_b_sse2_impl):
    19     push        rbp
    20     mov         rbp, rsp
    21     SHADOW_ARGS_TO_STACK 5
    22     GET_GOT     rbx
    23     push rsi
    24     push rdi
    25     ; end prolog
    27         mov     rdi,        arg(2) ;diff
    28         mov     rax,        arg(3) ;Predictor
    29         mov     rsi,        arg(0) ;z
    30         movsxd  rdx,        dword ptr arg(1);src_stride;
    31         movsxd  rcx,        dword ptr arg(4);pitch
    32         pxor    mm7,        mm7
    34         movd    mm0,        [rsi]
    35         movd    mm1,        [rax]
    36         punpcklbw   mm0,    mm7
    37         punpcklbw   mm1,    mm7
    38         psubw   mm0,        mm1
    39         movq    MMWORD PTR [rdi],      mm0
    41         movd    mm0,        [rsi+rdx]
    42         movd    mm1,        [rax+rcx]
    43         punpcklbw   mm0,    mm7
    44         punpcklbw   mm1,    mm7
    45         psubw   mm0,        mm1
    46         movq    MMWORD PTR [rdi+rcx*2], mm0
    48         movd    mm0,        [rsi+rdx*2]
    49         movd    mm1,        [rax+rcx*2]
    50         punpcklbw   mm0,    mm7
    51         punpcklbw   mm1,    mm7
    52         psubw   mm0,        mm1
    53         movq    MMWORD PTR [rdi+rcx*4], mm0
    55         lea     rsi,        [rsi+rdx*2]
    56         lea     rcx,        [rcx+rcx*2]
    58         movd    mm0,        [rsi+rdx]
    59         movd    mm1,        [rax+rcx]
    60         punpcklbw   mm0,    mm7
    61         punpcklbw   mm1,    mm7
    62         psubw   mm0,        mm1
    63         movq    MMWORD PTR [rdi+rcx*2], mm0
    65     ; begin epilog
    66     pop rdi
    67     pop rsi
    68     RESTORE_GOT
    69     UNSHADOW_ARGS
    70     pop         rbp
    71     ret
    74 ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
    75 ;unsigned char *pred, int pred_stride)
    76 global sym(vp8_subtract_mby_sse2) PRIVATE
    77 sym(vp8_subtract_mby_sse2):
    78     push        rbp
    79     mov         rbp, rsp
    80     SHADOW_ARGS_TO_STACK 5
    81     GET_GOT     rbx
    82     push rsi
    83     push rdi
    84     ; end prolog
    86     mov         rdi,        arg(0)          ;diff
    87     mov         rsi,        arg(1)          ;src
    88     movsxd      rdx,        dword ptr arg(2);src_stride
    89     mov         rax,        arg(3)          ;pred
    90     movdqa      xmm4,       [GLOBAL(t80)]
    91     push        rbx
    92     mov         rcx,        8               ; do two lines at one time
    93     movsxd      rbx,        dword ptr arg(4);pred_stride
    95 .submby_loop:
    96     movdqa      xmm0,       [rsi]           ; src
    97     movdqa      xmm1,       [rax]           ; pred
    99     movdqa      xmm2,       xmm0
   100     psubb       xmm0,       xmm1
   102     pxor        xmm1,       xmm4            ;convert to signed values
   103     pxor        xmm2,       xmm4
   104     pcmpgtb     xmm1,       xmm2            ; obtain sign information
   106     movdqa      xmm2,       xmm0
   107     punpcklbw   xmm0,       xmm1            ; put sign back to subtraction
   108     punpckhbw   xmm2,       xmm1            ; put sign back to subtraction
   110     movdqa      xmm3,       [rsi + rdx]
   111     movdqa      xmm5,       [rax + rbx]
   113     lea         rsi,        [rsi+rdx*2]
   114     lea         rax,        [rax+rbx*2]
   116     movdqa      [rdi],      xmm0
   117     movdqa      [rdi +16],  xmm2
   119     movdqa      xmm1,       xmm3
   120     psubb       xmm3,       xmm5
   122     pxor        xmm5,       xmm4            ;convert to signed values
   123     pxor        xmm1,       xmm4
   124     pcmpgtb     xmm5,       xmm1            ; obtain sign information
   126     movdqa      xmm1,       xmm3
   127     punpcklbw   xmm3,       xmm5            ; put sign back to subtraction
   128     punpckhbw   xmm1,       xmm5            ; put sign back to subtraction
   130     movdqa      [rdi +32],  xmm3
   131     movdqa      [rdi +48],  xmm1
   133     add         rdi,        64
   134     dec         rcx
   135     jnz         .submby_loop
   137     pop rbx
   138     pop rdi
   139     pop rsi
   140     ; begin epilog
   141     RESTORE_GOT
   142     UNSHADOW_ARGS
   143     pop         rbp
   144     ret
   146 ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
   147 ;                         int src_stride, unsigned char *upred,
   148 ;                         unsigned char *vpred, int pred_stride)
   149 global sym(vp8_subtract_mbuv_sse2) PRIVATE
   150 sym(vp8_subtract_mbuv_sse2):
   151     push        rbp
   152     mov         rbp, rsp
   153     SHADOW_ARGS_TO_STACK 7
   154     GET_GOT     rbx
   155     push rsi
   156     push rdi
   157     ; end prolog
   159     movdqa      xmm4,       [GLOBAL(t80)]
   160     mov         rdi,        arg(0)          ;diff
   161     mov         rsi,        arg(1)          ;usrc
   162     movsxd      rdx,        dword ptr arg(3);src_stride;
   163     mov         rax,        arg(4)          ;upred
   164     add         rdi,        256*2           ;diff = diff + 256 (shorts)
   165     mov         rcx,        4
   166     push        rbx
   167     movsxd      rbx,        dword ptr arg(6);pred_stride
   169     ;u
   170 .submbu_loop:
   171     movq        xmm0,       [rsi]           ; src
   172     movq        xmm2,       [rsi+rdx]       ; src -- next line
   173     movq        xmm1,       [rax]           ; pred
   174     movq        xmm3,       [rax+rbx]       ; pred -- next line
   175     lea         rsi,        [rsi + rdx*2]
   176     lea         rax,        [rax + rbx*2]
   178     punpcklqdq  xmm0,       xmm2
   179     punpcklqdq  xmm1,       xmm3
   181     movdqa      xmm2,       xmm0
   182     psubb       xmm0,       xmm1            ; subtraction with sign missed
   184     pxor        xmm1,       xmm4            ;convert to signed values
   185     pxor        xmm2,       xmm4
   186     pcmpgtb     xmm1,       xmm2            ; obtain sign information
   188     movdqa      xmm2,       xmm0
   189     movdqa      xmm3,       xmm1
   190     punpcklbw   xmm0,       xmm1            ; put sign back to subtraction
   191     punpckhbw   xmm2,       xmm3            ; put sign back to subtraction
   193     movdqa      [rdi],      xmm0            ; store difference
   194     movdqa      [rdi +16],  xmm2            ; store difference
   195     add         rdi,        32
   196     sub         rcx, 1
   197     jnz         .submbu_loop
   199     mov         rsi,        arg(2)          ;vsrc
   200     mov         rax,        arg(5)          ;vpred
   201     mov         rcx,        4
   203     ;v
   204 .submbv_loop:
   205     movq        xmm0,       [rsi]           ; src
   206     movq        xmm2,       [rsi+rdx]       ; src -- next line
   207     movq        xmm1,       [rax]           ; pred
   208     movq        xmm3,       [rax+rbx]       ; pred -- next line
   209     lea         rsi,        [rsi + rdx*2]
   210     lea         rax,        [rax + rbx*2]
   212     punpcklqdq  xmm0,       xmm2
   213     punpcklqdq  xmm1,       xmm3
   215     movdqa      xmm2,       xmm0
   216     psubb       xmm0,       xmm1            ; subtraction with sign missed
   218     pxor        xmm1,       xmm4            ;convert to signed values
   219     pxor        xmm2,       xmm4
   220     pcmpgtb     xmm1,       xmm2            ; obtain sign information
   222     movdqa      xmm2,       xmm0
   223     movdqa      xmm3,       xmm1
   224     punpcklbw   xmm0,       xmm1            ; put sign back to subtraction
   225     punpckhbw   xmm2,       xmm3            ; put sign back to subtraction
   227     movdqa      [rdi],      xmm0            ; store difference
   228     movdqa      [rdi +16],  xmm2            ; store difference
   229     add         rdi,        32
   230     sub         rcx, 1
   231     jnz         .submbv_loop
   233     pop         rbx
   234     ; begin epilog
   235     pop rdi
   236     pop rsi
   237     RESTORE_GOT
   238     UNSHADOW_ARGS
   239     pop         rbp
   240     ret
   242 SECTION_RODATA
   243 align 16
   244 t80:
   245     times 16 db 0x80

mercurial