Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
12 %include "vpx_ports/x86_abi_support.asm"
14 %macro PROCESS_16X2X8 1
15 %if %1
16 movdqa xmm0, XMMWORD PTR [rsi]
17 movq xmm1, MMWORD PTR [rdi]
18 movq xmm3, MMWORD PTR [rdi+8]
19 movq xmm2, MMWORD PTR [rdi+16]
20 punpcklqdq xmm1, xmm3
21 punpcklqdq xmm3, xmm2
23 movdqa xmm2, xmm1
24 mpsadbw xmm1, xmm0, 0x0
25 mpsadbw xmm2, xmm0, 0x5
27 psrldq xmm0, 8
29 movdqa xmm4, xmm3
30 mpsadbw xmm3, xmm0, 0x0
31 mpsadbw xmm4, xmm0, 0x5
33 paddw xmm1, xmm2
34 paddw xmm1, xmm3
35 paddw xmm1, xmm4
36 %else
37 movdqa xmm0, XMMWORD PTR [rsi]
38 movq xmm5, MMWORD PTR [rdi]
39 movq xmm3, MMWORD PTR [rdi+8]
40 movq xmm2, MMWORD PTR [rdi+16]
41 punpcklqdq xmm5, xmm3
42 punpcklqdq xmm3, xmm2
44 movdqa xmm2, xmm5
45 mpsadbw xmm5, xmm0, 0x0
46 mpsadbw xmm2, xmm0, 0x5
48 psrldq xmm0, 8
50 movdqa xmm4, xmm3
51 mpsadbw xmm3, xmm0, 0x0
52 mpsadbw xmm4, xmm0, 0x5
54 paddw xmm5, xmm2
55 paddw xmm5, xmm3
56 paddw xmm5, xmm4
58 paddw xmm1, xmm5
59 %endif
60 movdqa xmm0, XMMWORD PTR [rsi + rax]
61 movq xmm5, MMWORD PTR [rdi+ rdx]
62 movq xmm3, MMWORD PTR [rdi+ rdx+8]
63 movq xmm2, MMWORD PTR [rdi+ rdx+16]
64 punpcklqdq xmm5, xmm3
65 punpcklqdq xmm3, xmm2
67 lea rsi, [rsi+rax*2]
68 lea rdi, [rdi+rdx*2]
70 movdqa xmm2, xmm5
71 mpsadbw xmm5, xmm0, 0x0
72 mpsadbw xmm2, xmm0, 0x5
74 psrldq xmm0, 8
75 movdqa xmm4, xmm3
76 mpsadbw xmm3, xmm0, 0x0
77 mpsadbw xmm4, xmm0, 0x5
79 paddw xmm5, xmm2
80 paddw xmm5, xmm3
81 paddw xmm5, xmm4
83 paddw xmm1, xmm5
84 %endmacro
86 %macro PROCESS_8X2X8 1
87 %if %1
88 movq xmm0, MMWORD PTR [rsi]
89 movq xmm1, MMWORD PTR [rdi]
90 movq xmm3, MMWORD PTR [rdi+8]
91 punpcklqdq xmm1, xmm3
93 movdqa xmm2, xmm1
94 mpsadbw xmm1, xmm0, 0x0
95 mpsadbw xmm2, xmm0, 0x5
96 paddw xmm1, xmm2
97 %else
98 movq xmm0, MMWORD PTR [rsi]
99 movq xmm5, MMWORD PTR [rdi]
100 movq xmm3, MMWORD PTR [rdi+8]
101 punpcklqdq xmm5, xmm3
103 movdqa xmm2, xmm5
104 mpsadbw xmm5, xmm0, 0x0
105 mpsadbw xmm2, xmm0, 0x5
106 paddw xmm5, xmm2
108 paddw xmm1, xmm5
109 %endif
110 movq xmm0, MMWORD PTR [rsi + rax]
111 movq xmm5, MMWORD PTR [rdi+ rdx]
112 movq xmm3, MMWORD PTR [rdi+ rdx+8]
113 punpcklqdq xmm5, xmm3
115 lea rsi, [rsi+rax*2]
116 lea rdi, [rdi+rdx*2]
118 movdqa xmm2, xmm5
119 mpsadbw xmm5, xmm0, 0x0
120 mpsadbw xmm2, xmm0, 0x5
121 paddw xmm5, xmm2
123 paddw xmm1, xmm5
124 %endmacro
126 %macro PROCESS_4X2X8 1
127 %if %1
128 movd xmm0, [rsi]
129 movq xmm1, MMWORD PTR [rdi]
130 movq xmm3, MMWORD PTR [rdi+8]
131 punpcklqdq xmm1, xmm3
133 mpsadbw xmm1, xmm0, 0x0
134 %else
135 movd xmm0, [rsi]
136 movq xmm5, MMWORD PTR [rdi]
137 movq xmm3, MMWORD PTR [rdi+8]
138 punpcklqdq xmm5, xmm3
140 mpsadbw xmm5, xmm0, 0x0
142 paddw xmm1, xmm5
143 %endif
144 movd xmm0, [rsi + rax]
145 movq xmm5, MMWORD PTR [rdi+ rdx]
146 movq xmm3, MMWORD PTR [rdi+ rdx+8]
147 punpcklqdq xmm5, xmm3
149 lea rsi, [rsi+rax*2]
150 lea rdi, [rdi+rdx*2]
152 mpsadbw xmm5, xmm0, 0x0
154 paddw xmm1, xmm5
155 %endmacro
157 %macro WRITE_AS_INTS 0
158 mov rdi, arg(4) ;Results
159 pxor xmm0, xmm0
160 movdqa xmm2, xmm1
161 punpcklwd xmm1, xmm0
162 punpckhwd xmm2, xmm0
164 movdqa [rdi], xmm1
165 movdqa [rdi + 16], xmm2
166 %endmacro
168 ;void vp9_sad16x16x8_sse4(
169 ; const unsigned char *src_ptr,
170 ; int src_stride,
171 ; const unsigned char *ref_ptr,
172 ; int ref_stride,
173 ; unsigned short *sad_array);
174 global sym(vp9_sad16x16x8_sse4) PRIVATE
175 sym(vp9_sad16x16x8_sse4):
176 push rbp
177 mov rbp, rsp
178 SHADOW_ARGS_TO_STACK 5
179 push rsi
180 push rdi
181 ; end prolog
183 mov rsi, arg(0) ;src_ptr
184 mov rdi, arg(2) ;ref_ptr
186 movsxd rax, dword ptr arg(1) ;src_stride
187 movsxd rdx, dword ptr arg(3) ;ref_stride
189 PROCESS_16X2X8 1
190 PROCESS_16X2X8 0
191 PROCESS_16X2X8 0
192 PROCESS_16X2X8 0
193 PROCESS_16X2X8 0
194 PROCESS_16X2X8 0
195 PROCESS_16X2X8 0
196 PROCESS_16X2X8 0
198 WRITE_AS_INTS
200 ; begin epilog
201 pop rdi
202 pop rsi
203 UNSHADOW_ARGS
204 pop rbp
205 ret
208 ;void vp9_sad16x8x8_sse4(
209 ; const unsigned char *src_ptr,
210 ; int src_stride,
211 ; const unsigned char *ref_ptr,
212 ; int ref_stride,
213 ; unsigned short *sad_array
214 ;);
215 global sym(vp9_sad16x8x8_sse4) PRIVATE
216 sym(vp9_sad16x8x8_sse4):
217 push rbp
218 mov rbp, rsp
219 SHADOW_ARGS_TO_STACK 5
220 push rsi
221 push rdi
222 ; end prolog
224 mov rsi, arg(0) ;src_ptr
225 mov rdi, arg(2) ;ref_ptr
227 movsxd rax, dword ptr arg(1) ;src_stride
228 movsxd rdx, dword ptr arg(3) ;ref_stride
230 PROCESS_16X2X8 1
231 PROCESS_16X2X8 0
232 PROCESS_16X2X8 0
233 PROCESS_16X2X8 0
235 WRITE_AS_INTS
237 ; begin epilog
238 pop rdi
239 pop rsi
240 UNSHADOW_ARGS
241 pop rbp
242 ret
245 ;void vp9_sad8x8x8_sse4(
246 ; const unsigned char *src_ptr,
247 ; int src_stride,
248 ; const unsigned char *ref_ptr,
249 ; int ref_stride,
250 ; unsigned short *sad_array
251 ;);
252 global sym(vp9_sad8x8x8_sse4) PRIVATE
253 sym(vp9_sad8x8x8_sse4):
254 push rbp
255 mov rbp, rsp
256 SHADOW_ARGS_TO_STACK 5
257 push rsi
258 push rdi
259 ; end prolog
261 mov rsi, arg(0) ;src_ptr
262 mov rdi, arg(2) ;ref_ptr
264 movsxd rax, dword ptr arg(1) ;src_stride
265 movsxd rdx, dword ptr arg(3) ;ref_stride
267 PROCESS_8X2X8 1
268 PROCESS_8X2X8 0
269 PROCESS_8X2X8 0
270 PROCESS_8X2X8 0
272 WRITE_AS_INTS
274 ; begin epilog
275 pop rdi
276 pop rsi
277 UNSHADOW_ARGS
278 pop rbp
279 ret
282 ;void vp9_sad8x16x8_sse4(
283 ; const unsigned char *src_ptr,
284 ; int src_stride,
285 ; const unsigned char *ref_ptr,
286 ; int ref_stride,
287 ; unsigned short *sad_array
288 ;);
289 global sym(vp9_sad8x16x8_sse4) PRIVATE
290 sym(vp9_sad8x16x8_sse4):
291 push rbp
292 mov rbp, rsp
293 SHADOW_ARGS_TO_STACK 5
294 push rsi
295 push rdi
296 ; end prolog
298 mov rsi, arg(0) ;src_ptr
299 mov rdi, arg(2) ;ref_ptr
301 movsxd rax, dword ptr arg(1) ;src_stride
302 movsxd rdx, dword ptr arg(3) ;ref_stride
304 PROCESS_8X2X8 1
305 PROCESS_8X2X8 0
306 PROCESS_8X2X8 0
307 PROCESS_8X2X8 0
308 PROCESS_8X2X8 0
309 PROCESS_8X2X8 0
310 PROCESS_8X2X8 0
311 PROCESS_8X2X8 0
313 WRITE_AS_INTS
315 ; begin epilog
316 pop rdi
317 pop rsi
318 UNSHADOW_ARGS
319 pop rbp
320 ret
323 ;void vp9_sad4x4x8_c(
324 ; const unsigned char *src_ptr,
325 ; int src_stride,
326 ; const unsigned char *ref_ptr,
327 ; int ref_stride,
328 ; unsigned short *sad_array
329 ;);
330 global sym(vp9_sad4x4x8_sse4) PRIVATE
331 sym(vp9_sad4x4x8_sse4):
332 push rbp
333 mov rbp, rsp
334 SHADOW_ARGS_TO_STACK 5
335 push rsi
336 push rdi
337 ; end prolog
339 mov rsi, arg(0) ;src_ptr
340 mov rdi, arg(2) ;ref_ptr
342 movsxd rax, dword ptr arg(1) ;src_stride
343 movsxd rdx, dword ptr arg(3) ;ref_stride
345 PROCESS_4X2X8 1
346 PROCESS_4X2X8 0
348 WRITE_AS_INTS
350 ; begin epilog
351 pop rdi
352 pop rsi
353 UNSHADOW_ARGS
354 pop rbp
355 ret