media/libvpx/vp8/encoder/x86/encodeopt.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    12 %include "vpx_ports/x86_abi_support.asm"
    14 ;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr)
    15 global sym(vp8_block_error_xmm) PRIVATE
    16 sym(vp8_block_error_xmm):
    17     push        rbp
    18     mov         rbp, rsp
    19     SHADOW_ARGS_TO_STACK 2
    20     push rsi
    21     push rdi
    22     ; end prologue
    24         mov         rsi,        arg(0) ;coeff_ptr
    25         mov         rdi,        arg(1) ;dcoef_ptr
    27         movdqa      xmm0,       [rsi]
    28         movdqa      xmm1,       [rdi]
    30         movdqa      xmm2,       [rsi+16]
    31         movdqa      xmm3,       [rdi+16]
    33         psubw       xmm0,       xmm1
    34         psubw       xmm2,       xmm3
    36         pmaddwd     xmm0,       xmm0
    37         pmaddwd     xmm2,       xmm2
    39         paddd       xmm0,       xmm2
    41         pxor        xmm5,       xmm5
    42         movdqa      xmm1,       xmm0
    44         punpckldq   xmm0,       xmm5
    45         punpckhdq   xmm1,       xmm5
    47         paddd       xmm0,       xmm1
    48         movdqa      xmm1,       xmm0
    50         psrldq      xmm0,       8
    51         paddd       xmm0,       xmm1
    53         movq        rax,        xmm0
    55     pop rdi
    56     pop rsi
    57     ; begin epilog
    58     UNSHADOW_ARGS
    59     pop         rbp
    60     ret
    62 ;int vp8_block_error_mmx(short *coeff_ptr,  short *dcoef_ptr)
    63 global sym(vp8_block_error_mmx) PRIVATE
    64 sym(vp8_block_error_mmx):
    65     push        rbp
    66     mov         rbp, rsp
    67     SHADOW_ARGS_TO_STACK 2
    68     push rsi
    69     push rdi
    70     ; end prolog
    73         mov         rsi,        arg(0) ;coeff_ptr
    74         pxor        mm7,        mm7
    76         mov         rdi,        arg(1) ;dcoef_ptr
    77         movq        mm3,        [rsi]
    79         movq        mm4,        [rdi]
    80         movq        mm5,        [rsi+8]
    82         movq        mm6,        [rdi+8]
    83         pxor        mm1,        mm1 ; from movd mm1, dc ; dc =0
    85         movq        mm2,        mm7
    86         psubw       mm5,        mm6
    88         por         mm1,        mm2
    89         pmaddwd     mm5,        mm5
    91         pcmpeqw     mm1,        mm7
    92         psubw       mm3,        mm4
    94         pand        mm1,        mm3
    95         pmaddwd     mm1,        mm1
    97         paddd       mm1,        mm5
    98         movq        mm3,        [rsi+16]
   100         movq        mm4,        [rdi+16]
   101         movq        mm5,        [rsi+24]
   103         movq        mm6,        [rdi+24]
   104         psubw       mm5,        mm6
   106         pmaddwd     mm5,        mm5
   107         psubw       mm3,        mm4
   109         pmaddwd     mm3,        mm3
   110         paddd       mm3,        mm5
   112         paddd       mm1,        mm3
   113         movq        mm0,        mm1
   115         psrlq       mm1,        32
   116         paddd       mm0,        mm1
   118         movq        rax,        mm0
   120     pop rdi
   121     pop rsi
   122     ; begin epilog
   123     UNSHADOW_ARGS
   124     pop         rbp
   125     ret
   128 ;int vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
   129 global sym(vp8_mbblock_error_mmx_impl) PRIVATE
   130 sym(vp8_mbblock_error_mmx_impl):
   131     push        rbp
   132     mov         rbp, rsp
   133     SHADOW_ARGS_TO_STACK 3
   134     push rsi
   135     push rdi
   136     ; end prolog
   139         mov         rsi,        arg(0) ;coeff_ptr
   140         pxor        mm7,        mm7
   142         mov         rdi,        arg(1) ;dcoef_ptr
   143         pxor        mm2,        mm2
   145         movd        mm1,        dword ptr arg(2) ;dc
   146         por         mm1,        mm2
   148         pcmpeqw     mm1,        mm7
   149         mov         rcx,        16
   151 .mberror_loop_mmx:
   152         movq        mm3,       [rsi]
   153         movq        mm4,       [rdi]
   155         movq        mm5,       [rsi+8]
   156         movq        mm6,       [rdi+8]
   159         psubw       mm5,        mm6
   160         pmaddwd     mm5,        mm5
   162         psubw       mm3,        mm4
   163         pand        mm3,        mm1
   165         pmaddwd     mm3,        mm3
   166         paddd       mm2,        mm5
   168         paddd       mm2,        mm3
   169         movq        mm3,       [rsi+16]
   171         movq        mm4,       [rdi+16]
   172         movq        mm5,       [rsi+24]
   174         movq        mm6,       [rdi+24]
   175         psubw       mm5,        mm6
   177         pmaddwd     mm5,        mm5
   178         psubw       mm3,        mm4
   180         pmaddwd     mm3,        mm3
   181         paddd       mm2,        mm5
   183         paddd       mm2,        mm3
   184         add         rsi,        32
   186         add         rdi,        32
   187         sub         rcx,        1
   189         jnz         .mberror_loop_mmx
   191         movq        mm0,        mm2
   192         psrlq       mm2,        32
   194         paddd       mm0,        mm2
   195         movq        rax,        mm0
   197     pop rdi
   198     pop rsi
   199     ; begin epilog
   200     UNSHADOW_ARGS
   201     pop         rbp
   202     ret
   205 ;int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
   206 global sym(vp8_mbblock_error_xmm_impl) PRIVATE
   207 sym(vp8_mbblock_error_xmm_impl):
   208     push        rbp
   209     mov         rbp, rsp
   210     SHADOW_ARGS_TO_STACK 3
   211     SAVE_XMM 6
   212     push rsi
   213     push rdi
   214     ; end prolog
   217         mov         rsi,        arg(0) ;coeff_ptr
   218         pxor        xmm6,       xmm6
   220         mov         rdi,        arg(1) ;dcoef_ptr
   221         pxor        xmm4,       xmm4
   223         movd        xmm5,       dword ptr arg(2) ;dc
   224         por         xmm5,       xmm4
   226         pcmpeqw     xmm5,       xmm6
   227         mov         rcx,        16
   229 .mberror_loop:
   230         movdqa      xmm0,       [rsi]
   231         movdqa      xmm1,       [rdi]
   233         movdqa      xmm2,       [rsi+16]
   234         movdqa      xmm3,       [rdi+16]
   237         psubw       xmm2,       xmm3
   238         pmaddwd     xmm2,       xmm2
   240         psubw       xmm0,       xmm1
   241         pand        xmm0,       xmm5
   243         pmaddwd     xmm0,       xmm0
   244         add         rsi,        32
   246         add         rdi,        32
   248         sub         rcx,        1
   249         paddd       xmm4,       xmm2
   251         paddd       xmm4,       xmm0
   252         jnz         .mberror_loop
   254         movdqa      xmm0,       xmm4
   255         punpckldq   xmm0,       xmm6
   257         punpckhdq   xmm4,       xmm6
   258         paddd       xmm0,       xmm4
   260         movdqa      xmm1,       xmm0
   261         psrldq      xmm0,       8
   263         paddd       xmm0,       xmm1
   264         movq        rax,        xmm0
   266     pop rdi
   267     pop rsi
   268     ; begin epilog
   269     RESTORE_XMM
   270     UNSHADOW_ARGS
   271     pop         rbp
   272     ret
   275 ;int vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
   276 global sym(vp8_mbuverror_mmx_impl) PRIVATE
   277 sym(vp8_mbuverror_mmx_impl):
   278     push        rbp
   279     mov         rbp, rsp
   280     SHADOW_ARGS_TO_STACK 2
   281     push rsi
   282     push rdi
   283     ; end prolog
   286         mov             rsi,        arg(0) ;s_ptr
   287         mov             rdi,        arg(1) ;d_ptr
   289         mov             rcx,        16
   290         pxor            mm7,        mm7
   292 .mbuverror_loop_mmx:
   294         movq            mm1,        [rsi]
   295         movq            mm2,        [rdi]
   297         psubw           mm1,        mm2
   298         pmaddwd         mm1,        mm1
   301         movq            mm3,        [rsi+8]
   302         movq            mm4,        [rdi+8]
   304         psubw           mm3,        mm4
   305         pmaddwd         mm3,        mm3
   308         paddd           mm7,        mm1
   309         paddd           mm7,        mm3
   312         add             rsi,        16
   313         add             rdi,        16
   315         dec             rcx
   316         jnz             .mbuverror_loop_mmx
   318         movq            mm0,        mm7
   319         psrlq           mm7,        32
   321         paddd           mm0,        mm7
   322         movq            rax,        mm0
   324     pop rdi
   325     pop rsi
   326     ; begin epilog
   327     UNSHADOW_ARGS
   328     pop         rbp
   329     ret
   332 ;int vp8_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
   333 global sym(vp8_mbuverror_xmm_impl) PRIVATE
   334 sym(vp8_mbuverror_xmm_impl):
   335     push        rbp
   336     mov         rbp, rsp
   337     SHADOW_ARGS_TO_STACK 2
   338     push rsi
   339     push rdi
   340     ; end prolog
   343         mov             rsi,        arg(0) ;s_ptr
   344         mov             rdi,        arg(1) ;d_ptr
   346         mov             rcx,        16
   347         pxor            xmm3,       xmm3
   349 .mbuverror_loop:
   351         movdqa          xmm1,       [rsi]
   352         movdqa          xmm2,       [rdi]
   354         psubw           xmm1,       xmm2
   355         pmaddwd         xmm1,       xmm1
   357         paddd           xmm3,       xmm1
   359         add             rsi,        16
   360         add             rdi,        16
   362         dec             rcx
   363         jnz             .mbuverror_loop
   365         pxor        xmm0,           xmm0
   366         movdqa      xmm1,           xmm3
   368         movdqa      xmm2,           xmm1
   369         punpckldq   xmm1,           xmm0
   371         punpckhdq   xmm2,           xmm0
   372         paddd       xmm1,           xmm2
   374         movdqa      xmm2,           xmm1
   376         psrldq      xmm1,           8
   377         paddd       xmm1,           xmm2
   379         movq            rax,            xmm1
   381     pop rdi
   382     pop rsi
   383     ; begin epilog
   384     UNSHADOW_ARGS
   385     pop         rbp
   386     ret

mercurial