media/libvpx/vp9/common/x86/vp9_intrapred_sse2.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    11 %include "third_party/x86inc/x86inc.asm"
    13 SECTION_RODATA
    14 pw_4:  times 8 dw 4
    15 pw_8:  times 8 dw 8
    16 pw_16: times 8 dw 16
    17 pw_32: times 8 dw 32
    19 SECTION .text
    21 INIT_MMX sse
    22 cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset
    23   GET_GOT     goffsetq
    25   pxor                  m1, m1
    26   movd                  m0, [aboveq]
    27   punpckldq             m0, [leftq]
    28   psadbw                m0, m1
    29   paddw                 m0, [GLOBAL(pw_4)]
    30   psraw                 m0, 3
    31   pshufw                m0, m0, 0x0
    32   packuswb              m0, m0
    33   movd      [dstq        ], m0
    34   movd      [dstq+strideq], m0
    35   lea                 dstq, [dstq+strideq*2]
    36   movd      [dstq        ], m0
    37   movd      [dstq+strideq], m0
    39   RESTORE_GOT
    40   RET
    42 INIT_MMX sse
    43 cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset
    44   GET_GOT     goffsetq
    46   pxor                  m1, m1
    47   movq                  m0, [aboveq]
    48   movq                  m2, [leftq]
    49   DEFINE_ARGS dst, stride, stride3
    50   lea             stride3q, [strideq*3]
    51   psadbw                m0, m1
    52   psadbw                m2, m1
    53   paddw                 m0, m2
    54   paddw                 m0, [GLOBAL(pw_8)]
    55   psraw                 m0, 4
    56   pshufw                m0, m0, 0x0
    57   packuswb              m0, m0
    58   movq    [dstq          ], m0
    59   movq    [dstq+strideq  ], m0
    60   movq    [dstq+strideq*2], m0
    61   movq    [dstq+stride3q ], m0
    62   lea                 dstq, [dstq+strideq*4]
    63   movq    [dstq          ], m0
    64   movq    [dstq+strideq  ], m0
    65   movq    [dstq+strideq*2], m0
    66   movq    [dstq+stride3q ], m0
    68   RESTORE_GOT
    69   RET
    71 INIT_XMM sse2
    72 cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset
    73   GET_GOT     goffsetq
    75   pxor                  m1, m1
    76   mova                  m0, [aboveq]
    77   mova                  m2, [leftq]
    78   DEFINE_ARGS dst, stride, stride3, lines4
    79   lea             stride3q, [strideq*3]
    80   mov              lines4d, 4
    81   psadbw                m0, m1
    82   psadbw                m2, m1
    83   paddw                 m0, m2
    84   movhlps               m2, m0
    85   paddw                 m0, m2
    86   paddw                 m0, [GLOBAL(pw_16)]
    87   psraw                 m0, 5
    88   pshuflw               m0, m0, 0x0
    89   punpcklqdq            m0, m0
    90   packuswb              m0, m0
    91 .loop:
    92   mova    [dstq          ], m0
    93   mova    [dstq+strideq  ], m0
    94   mova    [dstq+strideq*2], m0
    95   mova    [dstq+stride3q ], m0
    96   lea                 dstq, [dstq+strideq*4]
    97   dec              lines4d
    98   jnz .loop
   100   RESTORE_GOT
   101   REP_RET
   103 INIT_XMM sse2
   104 cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset
   105   GET_GOT     goffsetq
   107   pxor                  m1, m1
   108   mova                  m0, [aboveq]
   109   mova                  m2, [aboveq+16]
   110   mova                  m3, [leftq]
   111   mova                  m4, [leftq+16]
   112   DEFINE_ARGS dst, stride, stride3, lines4
   113   lea             stride3q, [strideq*3]
   114   mov              lines4d, 8
   115   psadbw                m0, m1
   116   psadbw                m2, m1
   117   psadbw                m3, m1
   118   psadbw                m4, m1
   119   paddw                 m0, m2
   120   paddw                 m0, m3
   121   paddw                 m0, m4
   122   movhlps               m2, m0
   123   paddw                 m0, m2
   124   paddw                 m0, [GLOBAL(pw_32)]
   125   psraw                 m0, 6
   126   pshuflw               m0, m0, 0x0
   127   punpcklqdq            m0, m0
   128   packuswb              m0, m0
   129 .loop:
   130   mova [dstq             ], m0
   131   mova [dstq          +16], m0
   132   mova [dstq+strideq     ], m0
   133   mova [dstq+strideq  +16], m0
   134   mova [dstq+strideq*2   ], m0
   135   mova [dstq+strideq*2+16], m0
   136   mova [dstq+stride3q    ], m0
   137   mova [dstq+stride3q +16], m0
   138   lea                 dstq, [dstq+strideq*4]
   139   dec              lines4d
   140   jnz .loop
   142   RESTORE_GOT
   143   REP_RET
   145 INIT_MMX sse
   146 cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above
   147   movd                  m0, [aboveq]
   148   movd      [dstq        ], m0
   149   movd      [dstq+strideq], m0
   150   lea                 dstq, [dstq+strideq*2]
   151   movd      [dstq        ], m0
   152   movd      [dstq+strideq], m0
   153   RET
   155 INIT_MMX sse
   156 cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above
   157   movq                  m0, [aboveq]
   158   DEFINE_ARGS dst, stride, stride3
   159   lea             stride3q, [strideq*3]
   160   movq    [dstq          ], m0
   161   movq    [dstq+strideq  ], m0
   162   movq    [dstq+strideq*2], m0
   163   movq    [dstq+stride3q ], m0
   164   lea                 dstq, [dstq+strideq*4]
   165   movq    [dstq          ], m0
   166   movq    [dstq+strideq  ], m0
   167   movq    [dstq+strideq*2], m0
   168   movq    [dstq+stride3q ], m0
   169   RET
   171 INIT_XMM sse2
   172 cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above
   173   mova                  m0, [aboveq]
   174   DEFINE_ARGS dst, stride, stride3, nlines4
   175   lea             stride3q, [strideq*3]
   176   mov              nlines4d, 4
   177 .loop:
   178   mova    [dstq          ], m0
   179   mova    [dstq+strideq  ], m0
   180   mova    [dstq+strideq*2], m0
   181   mova    [dstq+stride3q ], m0
   182   lea                 dstq, [dstq+strideq*4]
   183   dec             nlines4d
   184   jnz .loop
   185   REP_RET
   187 INIT_XMM sse2
   188 cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above
   189   mova                  m0, [aboveq]
   190   mova                  m1, [aboveq+16]
   191   DEFINE_ARGS dst, stride, stride3, nlines4
   192   lea             stride3q, [strideq*3]
   193   mov              nlines4d, 8
   194 .loop:
   195   mova [dstq             ], m0
   196   mova [dstq          +16], m1
   197   mova [dstq+strideq     ], m0
   198   mova [dstq+strideq  +16], m1
   199   mova [dstq+strideq*2   ], m0
   200   mova [dstq+strideq*2+16], m1
   201   mova [dstq+stride3q    ], m0
   202   mova [dstq+stride3q +16], m1
   203   lea                 dstq, [dstq+strideq*4]
   204   dec             nlines4d
   205   jnz .loop
   206   REP_RET
   208 INIT_MMX sse
   209 cglobal tm_predictor_4x4, 4, 4, 4, dst, stride, above, left
   210   pxor                  m1, m1
   211   movd                  m2, [aboveq-1]
   212   movd                  m0, [aboveq]
   213   punpcklbw             m2, m1
   214   punpcklbw             m0, m1
   215   pshufw                m2, m2, 0x0
   216   DEFINE_ARGS dst, stride, line, left
   217   mov                lineq, -2
   218   add                leftq, 4
   219   psubw                 m0, m2
   220 .loop:
   221   movd                  m2, [leftq+lineq*2]
   222   movd                  m3, [leftq+lineq*2+1]
   223   punpcklbw             m2, m1
   224   punpcklbw             m3, m1
   225   pshufw                m2, m2, 0x0
   226   pshufw                m3, m3, 0x0
   227   paddw                 m2, m0
   228   paddw                 m3, m0
   229   packuswb              m2, m2
   230   packuswb              m3, m3
   231   movd      [dstq        ], m2
   232   movd      [dstq+strideq], m3
   233   lea                 dstq, [dstq+strideq*2]
   234   inc                lineq
   235   jnz .loop
   236   REP_RET
   238 INIT_XMM sse2
   239 cglobal tm_predictor_8x8, 4, 4, 4, dst, stride, above, left
   240   pxor                  m1, m1
   241   movd                  m2, [aboveq-1]
   242   movq                  m0, [aboveq]
   243   punpcklbw             m2, m1
   244   punpcklbw             m0, m1
   245   pshuflw               m2, m2, 0x0
   246   DEFINE_ARGS dst, stride, line, left
   247   mov                lineq, -4
   248   punpcklqdq            m2, m2
   249   add                leftq, 8
   250   psubw                 m0, m2
   251 .loop:
   252   movd                  m2, [leftq+lineq*2]
   253   movd                  m3, [leftq+lineq*2+1]
   254   punpcklbw             m2, m1
   255   punpcklbw             m3, m1
   256   pshuflw               m2, m2, 0x0
   257   pshuflw               m3, m3, 0x0
   258   punpcklqdq            m2, m2
   259   punpcklqdq            m3, m3
   260   paddw                 m2, m0
   261   paddw                 m3, m0
   262   packuswb              m2, m3
   263   movq      [dstq        ], m2
   264   movhps    [dstq+strideq], m2
   265   lea                 dstq, [dstq+strideq*2]
   266   inc                lineq
   267   jnz .loop
   268   REP_RET
   270 INIT_XMM sse2
   271 cglobal tm_predictor_16x16, 4, 4, 7, dst, stride, above, left
   272   pxor                  m1, m1
   273   movd                  m2, [aboveq-1]
   274   mova                  m0, [aboveq]
   275   punpcklbw             m2, m1
   276   punpckhbw             m4, m0, m1
   277   punpcklbw             m0, m1
   278   pshuflw               m2, m2, 0x0
   279   DEFINE_ARGS dst, stride, line, left
   280   mov                lineq, -8
   281   punpcklqdq            m2, m2
   282   add                leftq, 16
   283   psubw                 m0, m2
   284   psubw                 m4, m2
   285 .loop:
   286   movd                  m2, [leftq+lineq*2]
   287   movd                  m3, [leftq+lineq*2+1]
   288   punpcklbw             m2, m1
   289   punpcklbw             m3, m1
   290   pshuflw               m2, m2, 0x0
   291   pshuflw               m3, m3, 0x0
   292   punpcklqdq            m2, m2
   293   punpcklqdq            m3, m3
   294   paddw                 m5, m2, m0
   295   paddw                 m6, m3, m0
   296   paddw                 m2, m4
   297   paddw                 m3, m4
   298   packuswb              m5, m2
   299   packuswb              m6, m3
   300   mova      [dstq        ], m5
   301   mova      [dstq+strideq], m6
   302   lea                 dstq, [dstq+strideq*2]
   303   inc                lineq
   304   jnz .loop
   305   REP_RET
   307 %if ARCH_X86_64
   308 INIT_XMM sse2
   309 cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left
   310   pxor                  m1, m1
   311   movd                  m2, [aboveq-1]
   312   mova                  m0, [aboveq]
   313   mova                  m4, [aboveq+16]
   314   punpcklbw             m2, m1
   315   punpckhbw             m3, m0, m1
   316   punpckhbw             m5, m4, m1
   317   punpcklbw             m0, m1
   318   punpcklbw             m4, m1
   319   pshuflw               m2, m2, 0x0
   320   DEFINE_ARGS dst, stride, line, left
   321   mov                lineq, -16
   322   punpcklqdq            m2, m2
   323   add                leftq, 32
   324   psubw                 m0, m2
   325   psubw                 m3, m2
   326   psubw                 m4, m2
   327   psubw                 m5, m2
   328 .loop:
   329   movd                  m2, [leftq+lineq*2]
   330   movd                  m6, [leftq+lineq*2+1]
   331   punpcklbw             m2, m1
   332   punpcklbw             m6, m1
   333   pshuflw               m2, m2, 0x0
   334   pshuflw               m6, m6, 0x0
   335   punpcklqdq            m2, m2
   336   punpcklqdq            m6, m6
   337   paddw                 m7, m2, m0
   338   paddw                 m8, m2, m3
   339   paddw                 m9, m2, m4
   340   paddw                 m2, m5
   341   packuswb              m7, m8
   342   packuswb              m9, m2
   343   paddw                 m2, m6, m0
   344   paddw                 m8, m6, m3
   345   mova   [dstq           ], m7
   346   paddw                 m7, m6, m4
   347   paddw                 m6, m5
   348   mova   [dstq        +16], m9
   349   packuswb              m2, m8
   350   packuswb              m7, m6
   351   mova   [dstq+strideq   ], m2
   352   mova   [dstq+strideq+16], m7
   353   lea                 dstq, [dstq+strideq*2]
   354   inc                lineq
   355   jnz .loop
   356   REP_RET
   357 %endif

mercurial