media/libvpx/vp9/common/x86/vp9_copy_sse2.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 ;
     2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
     3 ;
     4 ;  Use of this source code is governed by a BSD-style license
     5 ;  that can be found in the LICENSE file in the root of the source
     6 ;  tree. An additional intellectual property rights grant can be found
     7 ;  in the file PATENTS.  All contributing project authors may
     8 ;  be found in the AUTHORS file in the root of the source tree.
     9 ;
    11 %include "third_party/x86inc/x86inc.asm"
    13 SECTION .text
    15 %macro convolve_fn 1
    16 INIT_XMM sse2
    17 cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
    18                               fx, fxs, fy, fys, w, h
    19   mov r4d, dword wm
    20   cmp r4d, 4
    21   je .w4
    22   cmp r4d, 8
    23   je .w8
    24   cmp r4d, 16
    25   je .w16
    26   cmp r4d, 32
    27   je .w32
    29   mov                    r4d, dword hm
    30 .loop64:
    31   movu                    m0, [srcq]
    32   movu                    m1, [srcq+16]
    33   movu                    m2, [srcq+32]
    34   movu                    m3, [srcq+48]
    35   add                   srcq, src_strideq
    36 %ifidn %1, avg
    37   pavgb                   m0, [dstq]
    38   pavgb                   m1, [dstq+16]
    39   pavgb                   m2, [dstq+32]
    40   pavgb                   m3, [dstq+48]
    41 %endif
    42   mova             [dstq   ], m0
    43   mova             [dstq+16], m1
    44   mova             [dstq+32], m2
    45   mova             [dstq+48], m3
    46   add                   dstq, dst_strideq
    47   dec                    r4d
    48   jnz .loop64
    49   RET
    51 .w32:
    52   mov                    r4d, dword hm
    53 .loop32:
    54   movu                    m0, [srcq]
    55   movu                    m1, [srcq+16]
    56   movu                    m2, [srcq+src_strideq]
    57   movu                    m3, [srcq+src_strideq+16]
    58   lea                   srcq, [srcq+src_strideq*2]
    59 %ifidn %1, avg
    60   pavgb                   m0, [dstq]
    61   pavgb                   m1, [dstq            +16]
    62   pavgb                   m2, [dstq+dst_strideq]
    63   pavgb                   m3, [dstq+dst_strideq+16]
    64 %endif
    65   mova [dstq               ], m0
    66   mova [dstq            +16], m1
    67   mova [dstq+dst_strideq   ], m2
    68   mova [dstq+dst_strideq+16], m3
    69   lea                   dstq, [dstq+dst_strideq*2]
    70   sub                    r4d, 2
    71   jnz .loop32
    72   RET
    74 .w16:
    75   mov                    r4d, dword hm
    76   lea                    r5q, [src_strideq*3]
    77   lea                    r6q, [dst_strideq*3]
    78 .loop16:
    79   movu                    m0, [srcq]
    80   movu                    m1, [srcq+src_strideq]
    81   movu                    m2, [srcq+src_strideq*2]
    82   movu                    m3, [srcq+r5q]
    83   lea                   srcq, [srcq+src_strideq*4]
    84 %ifidn %1, avg
    85   pavgb                   m0, [dstq]
    86   pavgb                   m1, [dstq+dst_strideq]
    87   pavgb                   m2, [dstq+dst_strideq*2]
    88   pavgb                   m3, [dstq+r6q]
    89 %endif
    90   mova  [dstq              ], m0
    91   mova  [dstq+dst_strideq  ], m1
    92   mova  [dstq+dst_strideq*2], m2
    93   mova  [dstq+r6q          ], m3
    94   lea                   dstq, [dstq+dst_strideq*4]
    95   sub                    r4d, 4
    96   jnz .loop16
    97   RET
    99 INIT_MMX sse
   100 .w8:
   101   mov                    r4d, dword hm
   102   lea                    r5q, [src_strideq*3]
   103   lea                    r6q, [dst_strideq*3]
   104 .loop8:
   105   movu                    m0, [srcq]
   106   movu                    m1, [srcq+src_strideq]
   107   movu                    m2, [srcq+src_strideq*2]
   108   movu                    m3, [srcq+r5q]
   109   lea                   srcq, [srcq+src_strideq*4]
   110 %ifidn %1, avg
   111   pavgb                   m0, [dstq]
   112   pavgb                   m1, [dstq+dst_strideq]
   113   pavgb                   m2, [dstq+dst_strideq*2]
   114   pavgb                   m3, [dstq+r6q]
   115 %endif
   116   mova  [dstq              ], m0
   117   mova  [dstq+dst_strideq  ], m1
   118   mova  [dstq+dst_strideq*2], m2
   119   mova  [dstq+r6q          ], m3
   120   lea                   dstq, [dstq+dst_strideq*4]
   121   sub                    r4d, 4
   122   jnz .loop8
   123   RET
   125 .w4:
   126   mov                    r4d, dword hm
   127   lea                    r5q, [src_strideq*3]
   128   lea                    r6q, [dst_strideq*3]
   129 .loop4:
   130   movh                    m0, [srcq]
   131   movh                    m1, [srcq+src_strideq]
   132   movh                    m2, [srcq+src_strideq*2]
   133   movh                    m3, [srcq+r5q]
   134   lea                   srcq, [srcq+src_strideq*4]
   135 %ifidn %1, avg
   136   pavgb                   m0, [dstq]
   137   pavgb                   m1, [dstq+dst_strideq]
   138   pavgb                   m2, [dstq+dst_strideq*2]
   139   pavgb                   m3, [dstq+r6q]
   140 %endif
   141   movh  [dstq              ], m0
   142   movh  [dstq+dst_strideq  ], m1
   143   movh  [dstq+dst_strideq*2], m2
   144   movh  [dstq+r6q          ], m3
   145   lea                   dstq, [dstq+dst_strideq*4]
   146   sub                    r4d, 4
   147   jnz .loop4
   148   RET
   149 %endmacro
   151 convolve_fn copy
   152 convolve_fn avg

mercurial