Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
11 %include "third_party/x86inc/x86inc.asm"
13 SECTION .text
15 %macro convolve_fn 1
16 INIT_XMM sse2
17 cglobal convolve_%1, 4, 7, 4, src, src_stride, dst, dst_stride, \
18 fx, fxs, fy, fys, w, h
19 mov r4d, dword wm
20 cmp r4d, 4
21 je .w4
22 cmp r4d, 8
23 je .w8
24 cmp r4d, 16
25 je .w16
26 cmp r4d, 32
27 je .w32
29 mov r4d, dword hm
30 .loop64:
31 movu m0, [srcq]
32 movu m1, [srcq+16]
33 movu m2, [srcq+32]
34 movu m3, [srcq+48]
35 add srcq, src_strideq
36 %ifidn %1, avg
37 pavgb m0, [dstq]
38 pavgb m1, [dstq+16]
39 pavgb m2, [dstq+32]
40 pavgb m3, [dstq+48]
41 %endif
42 mova [dstq ], m0
43 mova [dstq+16], m1
44 mova [dstq+32], m2
45 mova [dstq+48], m3
46 add dstq, dst_strideq
47 dec r4d
48 jnz .loop64
49 RET
51 .w32:
52 mov r4d, dword hm
53 .loop32:
54 movu m0, [srcq]
55 movu m1, [srcq+16]
56 movu m2, [srcq+src_strideq]
57 movu m3, [srcq+src_strideq+16]
58 lea srcq, [srcq+src_strideq*2]
59 %ifidn %1, avg
60 pavgb m0, [dstq]
61 pavgb m1, [dstq +16]
62 pavgb m2, [dstq+dst_strideq]
63 pavgb m3, [dstq+dst_strideq+16]
64 %endif
65 mova [dstq ], m0
66 mova [dstq +16], m1
67 mova [dstq+dst_strideq ], m2
68 mova [dstq+dst_strideq+16], m3
69 lea dstq, [dstq+dst_strideq*2]
70 sub r4d, 2
71 jnz .loop32
72 RET
74 .w16:
75 mov r4d, dword hm
76 lea r5q, [src_strideq*3]
77 lea r6q, [dst_strideq*3]
78 .loop16:
79 movu m0, [srcq]
80 movu m1, [srcq+src_strideq]
81 movu m2, [srcq+src_strideq*2]
82 movu m3, [srcq+r5q]
83 lea srcq, [srcq+src_strideq*4]
84 %ifidn %1, avg
85 pavgb m0, [dstq]
86 pavgb m1, [dstq+dst_strideq]
87 pavgb m2, [dstq+dst_strideq*2]
88 pavgb m3, [dstq+r6q]
89 %endif
90 mova [dstq ], m0
91 mova [dstq+dst_strideq ], m1
92 mova [dstq+dst_strideq*2], m2
93 mova [dstq+r6q ], m3
94 lea dstq, [dstq+dst_strideq*4]
95 sub r4d, 4
96 jnz .loop16
97 RET
99 INIT_MMX sse
100 .w8:
101 mov r4d, dword hm
102 lea r5q, [src_strideq*3]
103 lea r6q, [dst_strideq*3]
104 .loop8:
105 movu m0, [srcq]
106 movu m1, [srcq+src_strideq]
107 movu m2, [srcq+src_strideq*2]
108 movu m3, [srcq+r5q]
109 lea srcq, [srcq+src_strideq*4]
110 %ifidn %1, avg
111 pavgb m0, [dstq]
112 pavgb m1, [dstq+dst_strideq]
113 pavgb m2, [dstq+dst_strideq*2]
114 pavgb m3, [dstq+r6q]
115 %endif
116 mova [dstq ], m0
117 mova [dstq+dst_strideq ], m1
118 mova [dstq+dst_strideq*2], m2
119 mova [dstq+r6q ], m3
120 lea dstq, [dstq+dst_strideq*4]
121 sub r4d, 4
122 jnz .loop8
123 RET
125 .w4:
126 mov r4d, dword hm
127 lea r5q, [src_strideq*3]
128 lea r6q, [dst_strideq*3]
129 .loop4:
130 movh m0, [srcq]
131 movh m1, [srcq+src_strideq]
132 movh m2, [srcq+src_strideq*2]
133 movh m3, [srcq+r5q]
134 lea srcq, [srcq+src_strideq*4]
135 %ifidn %1, avg
136 pavgb m0, [dstq]
137 pavgb m1, [dstq+dst_strideq]
138 pavgb m2, [dstq+dst_strideq*2]
139 pavgb m3, [dstq+r6q]
140 %endif
141 movh [dstq ], m0
142 movh [dstq+dst_strideq ], m1
143 movh [dstq+dst_strideq*2], m2
144 movh [dstq+r6q ], m3
145 lea dstq, [dstq+dst_strideq*4]
146 sub r4d, 4
147 jnz .loop4
148 RET
149 %endmacro
151 convolve_fn copy
152 convolve_fn avg