media/libvpx/vp8/encoder/x86/subtract_mmx.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 ;void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
    15 ;                            short *diff, unsigned char *Predictor,
    16 ;                            int pitch);
    17 global sym(vp8_subtract_b_mmx_impl) PRIVATE
    18 sym(vp8_subtract_b_mmx_impl):
    19     push        rbp
    20     mov         rbp, rsp
    21     SHADOW_ARGS_TO_STACK 5
    22     push rsi
    23     push rdi
    24     ; end prolog
    27         mov     rdi,        arg(2) ;diff
    28         mov     rax,        arg(3) ;Predictor
    29         mov     rsi,        arg(0) ;z
    30         movsxd  rdx,        dword ptr arg(1);src_stride;
    31         movsxd  rcx,        dword ptr arg(4);pitch
    32         pxor    mm7,        mm7
    34         movd    mm0,        [rsi]
    35         movd    mm1,        [rax]
    36         punpcklbw   mm0,    mm7
    37         punpcklbw   mm1,    mm7
    38         psubw   mm0,        mm1
    39         movq    [rdi],      mm0
    42         movd    mm0,        [rsi+rdx]
    43         movd    mm1,        [rax+rcx]
    44         punpcklbw   mm0,    mm7
    45         punpcklbw   mm1,    mm7
    46         psubw   mm0,        mm1
    47         movq    [rdi+rcx*2],mm0
    50         movd    mm0,        [rsi+rdx*2]
    51         movd    mm1,        [rax+rcx*2]
    52         punpcklbw   mm0,    mm7
    53         punpcklbw   mm1,    mm7
    54         psubw   mm0,        mm1
    55         movq    [rdi+rcx*4],        mm0
    57         lea     rsi,        [rsi+rdx*2]
    58         lea     rcx,        [rcx+rcx*2]
    62         movd    mm0,        [rsi+rdx]
    63         movd    mm1,        [rax+rcx]
    64         punpcklbw   mm0,    mm7
    65         punpcklbw   mm1,    mm7
    66         psubw   mm0,        mm1
    67         movq    [rdi+rcx*2],        mm0
    69     ; begin epilog
    70     pop rdi
    71     pop rsi
    72     UNSHADOW_ARGS
    73     pop         rbp
    74     ret
    76 ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
    77 ;unsigned char *pred, int pred_stride)
    78 global sym(vp8_subtract_mby_mmx) PRIVATE
    79 sym(vp8_subtract_mby_mmx):
    80     push        rbp
    81     mov         rbp, rsp
    82     SHADOW_ARGS_TO_STACK 5
    83     push rsi
    84     push rdi
    85     ; end prolog
    87     mov         rdi,        arg(0)          ;diff
    88     mov         rsi,        arg(1)          ;src
    89     movsxd      rdx,        dword ptr arg(2);src_stride
    90     mov         rax,        arg(3)          ;pred
    91     push        rbx
    92     movsxd      rbx,        dword ptr arg(4);pred_stride
    94     pxor        mm0,        mm0
    95     mov         rcx,        16
    98 .submby_loop:
    99     movq        mm1,        [rsi]
   100     movq        mm3,        [rax]
   102     movq        mm2,        mm1
   103     movq        mm4,        mm3
   105     punpcklbw   mm1,        mm0
   106     punpcklbw   mm3,        mm0
   108     punpckhbw   mm2,        mm0
   109     punpckhbw   mm4,        mm0
   111     psubw       mm1,        mm3
   112     psubw       mm2,        mm4
   114     movq        [rdi],      mm1
   115     movq        [rdi+8],    mm2
   117     movq        mm1,        [rsi+8]
   118     movq        mm3,        [rax+8]
   120     movq        mm2,        mm1
   121     movq        mm4,        mm3
   123     punpcklbw   mm1,        mm0
   124     punpcklbw   mm3,        mm0
   126     punpckhbw   mm2,        mm0
   127     punpckhbw   mm4,        mm0
   129     psubw       mm1,        mm3
   130     psubw       mm2,        mm4
   132     movq        [rdi+16],   mm1
   133     movq        [rdi+24],   mm2
   134     add         rdi,        32
   135     lea         rax,        [rax+rbx]
   136     lea         rsi,        [rsi+rdx]
   137     dec         rcx
   138     jnz         .submby_loop
   140     pop rbx
   141     pop rdi
   142     pop rsi
   143     ; begin epilog
   144     UNSHADOW_ARGS
   145     pop         rbp
   146     ret
   149 ;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc,
   150 ;                         int src_stride, unsigned char *upred,
   151 ;                         unsigned char *vpred, int pred_stride)
   153 global sym(vp8_subtract_mbuv_mmx) PRIVATE
   154 sym(vp8_subtract_mbuv_mmx):
   155     push        rbp
   156     mov         rbp, rsp
   157     SHADOW_ARGS_TO_STACK 7
   158     push rsi
   159     push rdi
   160     ; end prolog
   162     mov         rdi,        arg(0)          ;diff
   163     mov         rsi,        arg(1)          ;usrc
   164     movsxd      rdx,        dword ptr arg(3);src_stride;
   165     mov         rax,        arg(4)          ;upred
   166     add         rdi,        256*2           ;diff = diff + 256 (shorts)
   167     mov         rcx,        8
   168     push        rbx
   169     movsxd      rbx,        dword ptr arg(6);pred_stride
   171     pxor        mm7,        mm7
   173 .submbu_loop:
   174     movq        mm0,        [rsi]
   175     movq        mm1,        [rax]
   176     movq        mm3,        mm0
   177     movq        mm4,        mm1
   178     punpcklbw   mm0,        mm7
   179     punpcklbw   mm1,        mm7
   180     punpckhbw   mm3,        mm7
   181     punpckhbw   mm4,        mm7
   182     psubw       mm0,        mm1
   183     psubw       mm3,        mm4
   184     movq        [rdi],      mm0
   185     movq        [rdi+8],    mm3
   186     add         rdi, 16
   187     add         rsi, rdx
   188     add         rax, rbx
   190     dec         rcx
   191     jnz         .submbu_loop
   193     mov         rsi,        arg(2)          ;vsrc
   194     mov         rax,        arg(5)          ;vpred
   195     mov         rcx,        8
   197 .submbv_loop:
   198     movq        mm0,        [rsi]
   199     movq        mm1,        [rax]
   200     movq        mm3,        mm0
   201     movq        mm4,        mm1
   202     punpcklbw   mm0,        mm7
   203     punpcklbw   mm1,        mm7
   204     punpckhbw   mm3,        mm7
   205     punpckhbw   mm4,        mm7
   206     psubw       mm0,        mm1
   207     psubw       mm3,        mm4
   208     movq        [rdi],      mm0
   209     movq        [rdi+8],    mm3
   210     add         rdi, 16
   211     add         rsi, rdx
   212     add         rax, rbx
   214     dec         rcx
   215     jnz         .submbv_loop
   217     pop         rbx
   218     ; begin epilog
   219     pop rdi
   220     pop rsi
   221     UNSHADOW_ARGS
   222     pop         rbp
   223     ret

mercurial