media/libvpx/vp9/common/x86/vp9_copy_sse2.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11 %include "third_party/x86inc/x86inc.asm"
michael@0 12
michael@0 13 SECTION .text
michael@0 14
michael@0 15 %macro convolve_fn 1
michael@0 16 INIT_XMM sse2
michael@0 17 cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
michael@0 18 fx, fxs, fy, fys, w, h
michael@0 19 mov r4d, dword wm
michael@0 20 cmp r4d, 4
michael@0 21 je .w4
michael@0 22 cmp r4d, 8
michael@0 23 je .w8
michael@0 24 cmp r4d, 16
michael@0 25 je .w16
michael@0 26 cmp r4d, 32
michael@0 27 je .w32
michael@0 28
michael@0 29 mov r4d, dword hm
michael@0 30 .loop64:
michael@0 31 movu m0, [srcq]
michael@0 32 movu m1, [srcq+16]
michael@0 33 movu m2, [srcq+32]
michael@0 34 movu m3, [srcq+48]
michael@0 35 add srcq, src_strideq
michael@0 36 %ifidn %1, avg
michael@0 37 pavgb m0, [dstq]
michael@0 38 pavgb m1, [dstq+16]
michael@0 39 pavgb m2, [dstq+32]
michael@0 40 pavgb m3, [dstq+48]
michael@0 41 %endif
michael@0 42 mova [dstq ], m0
michael@0 43 mova [dstq+16], m1
michael@0 44 mova [dstq+32], m2
michael@0 45 mova [dstq+48], m3
michael@0 46 add dstq, dst_strideq
michael@0 47 dec r4d
michael@0 48 jnz .loop64
michael@0 49 RET
michael@0 50
michael@0 51 .w32:
michael@0 52 mov r4d, dword hm
michael@0 53 .loop32:
michael@0 54 movu m0, [srcq]
michael@0 55 movu m1, [srcq+16]
michael@0 56 movu m2, [srcq+src_strideq]
michael@0 57 movu m3, [srcq+src_strideq+16]
michael@0 58 lea srcq, [srcq+src_strideq*2]
michael@0 59 %ifidn %1, avg
michael@0 60 pavgb m0, [dstq]
michael@0 61 pavgb m1, [dstq +16]
michael@0 62 pavgb m2, [dstq+dst_strideq]
michael@0 63 pavgb m3, [dstq+dst_strideq+16]
michael@0 64 %endif
michael@0 65 mova [dstq ], m0
michael@0 66 mova [dstq +16], m1
michael@0 67 mova [dstq+dst_strideq ], m2
michael@0 68 mova [dstq+dst_strideq+16], m3
michael@0 69 lea dstq, [dstq+dst_strideq*2]
michael@0 70 sub r4d, 2
michael@0 71 jnz .loop32
michael@0 72 RET
michael@0 73
michael@0 74 .w16:
michael@0 75 mov r4d, dword hm
michael@0 76 lea r5q, [src_strideq*3]
michael@0 77 lea r6q, [dst_strideq*3]
michael@0 78 .loop16:
michael@0 79 movu m0, [srcq]
michael@0 80 movu m1, [srcq+src_strideq]
michael@0 81 movu m2, [srcq+src_strideq*2]
michael@0 82 movu m3, [srcq+r5q]
michael@0 83 lea srcq, [srcq+src_strideq*4]
michael@0 84 %ifidn %1, avg
michael@0 85 pavgb m0, [dstq]
michael@0 86 pavgb m1, [dstq+dst_strideq]
michael@0 87 pavgb m2, [dstq+dst_strideq*2]
michael@0 88 pavgb m3, [dstq+r6q]
michael@0 89 %endif
michael@0 90 mova [dstq ], m0
michael@0 91 mova [dstq+dst_strideq ], m1
michael@0 92 mova [dstq+dst_strideq*2], m2
michael@0 93 mova [dstq+r6q ], m3
michael@0 94 lea dstq, [dstq+dst_strideq*4]
michael@0 95 sub r4d, 4
michael@0 96 jnz .loop16
michael@0 97 RET
michael@0 98
michael@0 99 INIT_MMX sse
michael@0 100 .w8:
michael@0 101 mov r4d, dword hm
michael@0 102 lea r5q, [src_strideq*3]
michael@0 103 lea r6q, [dst_strideq*3]
michael@0 104 .loop8:
michael@0 105 movu m0, [srcq]
michael@0 106 movu m1, [srcq+src_strideq]
michael@0 107 movu m2, [srcq+src_strideq*2]
michael@0 108 movu m3, [srcq+r5q]
michael@0 109 lea srcq, [srcq+src_strideq*4]
michael@0 110 %ifidn %1, avg
michael@0 111 pavgb m0, [dstq]
michael@0 112 pavgb m1, [dstq+dst_strideq]
michael@0 113 pavgb m2, [dstq+dst_strideq*2]
michael@0 114 pavgb m3, [dstq+r6q]
michael@0 115 %endif
michael@0 116 mova [dstq ], m0
michael@0 117 mova [dstq+dst_strideq ], m1
michael@0 118 mova [dstq+dst_strideq*2], m2
michael@0 119 mova [dstq+r6q ], m3
michael@0 120 lea dstq, [dstq+dst_strideq*4]
michael@0 121 sub r4d, 4
michael@0 122 jnz .loop8
michael@0 123 RET
michael@0 124
michael@0 125 .w4:
michael@0 126 mov r4d, dword hm
michael@0 127 lea r5q, [src_strideq*3]
michael@0 128 lea r6q, [dst_strideq*3]
michael@0 129 .loop4:
michael@0 130 movh m0, [srcq]
michael@0 131 movh m1, [srcq+src_strideq]
michael@0 132 movh m2, [srcq+src_strideq*2]
michael@0 133 movh m3, [srcq+r5q]
michael@0 134 lea srcq, [srcq+src_strideq*4]
michael@0 135 %ifidn %1, avg
michael@0 136 pavgb m0, [dstq]
michael@0 137 pavgb m1, [dstq+dst_strideq]
michael@0 138 pavgb m2, [dstq+dst_strideq*2]
michael@0 139 pavgb m3, [dstq+r6q]
michael@0 140 %endif
michael@0 141 movh [dstq ], m0
michael@0 142 movh [dstq+dst_strideq ], m1
michael@0 143 movh [dstq+dst_strideq*2], m2
michael@0 144 movh [dstq+r6q ], m3
michael@0 145 lea dstq, [dstq+dst_strideq*4]
michael@0 146 sub r4d, 4
michael@0 147 jnz .loop4
michael@0 148 RET
michael@0 149 %endmacro
michael@0 150
michael@0 151 convolve_fn copy
michael@0 152 convolve_fn avg

mercurial