Thu, 15 Jan 2015 15:59:08 +0100
Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
12 %include "vpx_ports/x86_abi_support.asm"
14 ;unsigned int vp9_get_mb_ss_sse2
15 ;(
16 ; short *src_ptr
17 ;)
18 global sym(vp9_get_mb_ss_sse2) PRIVATE
19 sym(vp9_get_mb_ss_sse2):
20 push rbp
21 mov rbp, rsp
22 SHADOW_ARGS_TO_STACK 1
23 GET_GOT rbx
24 push rsi
25 push rdi
26 sub rsp, 16
27 ; end prolog
30 mov rax, arg(0) ;[src_ptr]
31 mov rcx, 8
32 pxor xmm4, xmm4
34 .NEXTROW:
35 movdqa xmm0, [rax]
36 movdqa xmm1, [rax+16]
37 movdqa xmm2, [rax+32]
38 movdqa xmm3, [rax+48]
39 pmaddwd xmm0, xmm0
40 pmaddwd xmm1, xmm1
41 pmaddwd xmm2, xmm2
42 pmaddwd xmm3, xmm3
44 paddd xmm0, xmm1
45 paddd xmm2, xmm3
46 paddd xmm4, xmm0
47 paddd xmm4, xmm2
49 add rax, 0x40
50 dec rcx
51 ja .NEXTROW
53 movdqa xmm3,xmm4
54 psrldq xmm4,8
55 paddd xmm4,xmm3
56 movdqa xmm3,xmm4
57 psrldq xmm4,4
58 paddd xmm4,xmm3
59 movq rax,xmm4
62 ; begin epilog
63 add rsp, 16
64 pop rdi
65 pop rsi
66 RESTORE_GOT
67 UNSHADOW_ARGS
68 pop rbp
69 ret
72 ;unsigned int vp9_get16x16var_sse2
73 ;(
74 ; unsigned char * src_ptr,
75 ; int source_stride,
76 ; unsigned char * ref_ptr,
77 ; int recon_stride,
78 ; unsigned int * SSE,
79 ; int * Sum
80 ;)
81 global sym(vp9_get16x16var_sse2) PRIVATE
82 sym(vp9_get16x16var_sse2):
83 push rbp
84 mov rbp, rsp
85 SHADOW_ARGS_TO_STACK 6
86 SAVE_XMM 7
87 push rbx
88 push rsi
89 push rdi
90 ; end prolog
92 mov rsi, arg(0) ;[src_ptr]
93 mov rdi, arg(2) ;[ref_ptr]
95 movsxd rax, DWORD PTR arg(1) ;[source_stride]
96 movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
98 ; Prefetch data
99 lea rcx, [rax+rax*2]
100 prefetcht0 [rsi]
101 prefetcht0 [rsi+rax]
102 prefetcht0 [rsi+rax*2]
103 prefetcht0 [rsi+rcx]
104 lea rbx, [rsi+rax*4]
105 prefetcht0 [rbx]
106 prefetcht0 [rbx+rax]
107 prefetcht0 [rbx+rax*2]
108 prefetcht0 [rbx+rcx]
110 lea rcx, [rdx+rdx*2]
111 prefetcht0 [rdi]
112 prefetcht0 [rdi+rdx]
113 prefetcht0 [rdi+rdx*2]
114 prefetcht0 [rdi+rcx]
115 lea rbx, [rdi+rdx*4]
116 prefetcht0 [rbx]
117 prefetcht0 [rbx+rdx]
118 prefetcht0 [rbx+rdx*2]
119 prefetcht0 [rbx+rcx]
121 pxor xmm0, xmm0 ; clear xmm0 for unpack
122 pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
124 pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
125 mov rcx, 16
127 .var16loop:
128 movdqu xmm1, XMMWORD PTR [rsi]
129 movdqu xmm2, XMMWORD PTR [rdi]
131 prefetcht0 [rsi+rax*8]
132 prefetcht0 [rdi+rdx*8]
134 movdqa xmm3, xmm1
135 movdqa xmm4, xmm2
138 punpcklbw xmm1, xmm0
139 punpckhbw xmm3, xmm0
141 punpcklbw xmm2, xmm0
142 punpckhbw xmm4, xmm0
145 psubw xmm1, xmm2
146 psubw xmm3, xmm4
148 paddw xmm7, xmm1
149 pmaddwd xmm1, xmm1
151 paddw xmm7, xmm3
152 pmaddwd xmm3, xmm3
154 paddd xmm6, xmm1
155 paddd xmm6, xmm3
157 add rsi, rax
158 add rdi, rdx
160 sub rcx, 1
161 jnz .var16loop
164 movdqa xmm1, xmm6
165 pxor xmm6, xmm6
167 pxor xmm5, xmm5
168 punpcklwd xmm6, xmm7
170 punpckhwd xmm5, xmm7
171 psrad xmm5, 16
173 psrad xmm6, 16
174 paddd xmm6, xmm5
176 movdqa xmm2, xmm1
177 punpckldq xmm1, xmm0
179 punpckhdq xmm2, xmm0
180 movdqa xmm7, xmm6
182 paddd xmm1, xmm2
183 punpckldq xmm6, xmm0
185 punpckhdq xmm7, xmm0
186 paddd xmm6, xmm7
188 movdqa xmm2, xmm1
189 movdqa xmm7, xmm6
191 psrldq xmm1, 8
192 psrldq xmm6, 8
194 paddd xmm7, xmm6
195 paddd xmm1, xmm2
197 mov rax, arg(5) ;[Sum]
198 mov rdi, arg(4) ;[SSE]
200 movd DWORD PTR [rax], xmm7
201 movd DWORD PTR [rdi], xmm1
204 ; begin epilog
205 pop rdi
206 pop rsi
207 pop rbx
208 RESTORE_XMM
209 UNSHADOW_ARGS
210 pop rbp
211 ret
216 ;unsigned int vp9_get8x8var_sse2
217 ;(
218 ; unsigned char * src_ptr,
219 ; int source_stride,
220 ; unsigned char * ref_ptr,
221 ; int recon_stride,
222 ; unsigned int * SSE,
223 ; int * Sum
224 ;)
225 global sym(vp9_get8x8var_sse2) PRIVATE
226 sym(vp9_get8x8var_sse2):
227 push rbp
228 mov rbp, rsp
229 SHADOW_ARGS_TO_STACK 6
230 SAVE_XMM 7
231 GET_GOT rbx
232 push rsi
233 push rdi
234 sub rsp, 16
235 ; end prolog
237 mov rsi, arg(0) ;[src_ptr]
238 mov rdi, arg(2) ;[ref_ptr]
240 movsxd rax, DWORD PTR arg(1) ;[source_stride]
241 movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
243 pxor xmm0, xmm0 ; clear xmm0 for unpack
244 pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
246 movq xmm1, QWORD PTR [rsi]
247 movq xmm2, QWORD PTR [rdi]
249 punpcklbw xmm1, xmm0
250 punpcklbw xmm2, xmm0
252 psubsw xmm1, xmm2
253 paddw xmm7, xmm1
255 pmaddwd xmm1, xmm1
257 movq xmm2, QWORD PTR[rsi + rax]
258 movq xmm3, QWORD PTR[rdi + rdx]
260 punpcklbw xmm2, xmm0
261 punpcklbw xmm3, xmm0
263 psubsw xmm2, xmm3
264 paddw xmm7, xmm2
266 pmaddwd xmm2, xmm2
267 paddd xmm1, xmm2
270 movq xmm2, QWORD PTR[rsi + rax * 2]
271 movq xmm3, QWORD PTR[rdi + rdx * 2]
273 punpcklbw xmm2, xmm0
274 punpcklbw xmm3, xmm0
276 psubsw xmm2, xmm3
277 paddw xmm7, xmm2
279 pmaddwd xmm2, xmm2
280 paddd xmm1, xmm2
283 lea rsi, [rsi + rax * 2]
284 lea rdi, [rdi + rdx * 2]
285 movq xmm2, QWORD PTR[rsi + rax]
286 movq xmm3, QWORD PTR[rdi + rdx]
288 punpcklbw xmm2, xmm0
289 punpcklbw xmm3, xmm0
291 psubsw xmm2, xmm3
292 paddw xmm7, xmm2
294 pmaddwd xmm2, xmm2
295 paddd xmm1, xmm2
297 movq xmm2, QWORD PTR[rsi + rax *2]
298 movq xmm3, QWORD PTR[rdi + rdx *2]
300 punpcklbw xmm2, xmm0
301 punpcklbw xmm3, xmm0
303 psubsw xmm2, xmm3
304 paddw xmm7, xmm2
306 pmaddwd xmm2, xmm2
307 paddd xmm1, xmm2
310 lea rsi, [rsi + rax * 2]
311 lea rdi, [rdi + rdx * 2]
314 movq xmm2, QWORD PTR[rsi + rax]
315 movq xmm3, QWORD PTR[rdi + rdx]
317 punpcklbw xmm2, xmm0
318 punpcklbw xmm3, xmm0
320 psubsw xmm2, xmm3
321 paddw xmm7, xmm2
323 pmaddwd xmm2, xmm2
324 paddd xmm1, xmm2
326 movq xmm2, QWORD PTR[rsi + rax *2]
327 movq xmm3, QWORD PTR[rdi + rdx *2]
329 punpcklbw xmm2, xmm0
330 punpcklbw xmm3, xmm0
332 psubsw xmm2, xmm3
333 paddw xmm7, xmm2
335 pmaddwd xmm2, xmm2
336 paddd xmm1, xmm2
339 lea rsi, [rsi + rax * 2]
340 lea rdi, [rdi + rdx * 2]
342 movq xmm2, QWORD PTR[rsi + rax]
343 movq xmm3, QWORD PTR[rdi + rdx]
345 punpcklbw xmm2, xmm0
346 punpcklbw xmm3, xmm0
348 psubsw xmm2, xmm3
349 paddw xmm7, xmm2
351 pmaddwd xmm2, xmm2
352 paddd xmm1, xmm2
355 movdqa xmm6, xmm7
356 punpcklwd xmm6, xmm0
358 punpckhwd xmm7, xmm0
359 movdqa xmm2, xmm1
361 paddw xmm6, xmm7
362 punpckldq xmm1, xmm0
364 punpckhdq xmm2, xmm0
365 movdqa xmm7, xmm6
367 paddd xmm1, xmm2
368 punpckldq xmm6, xmm0
370 punpckhdq xmm7, xmm0
371 paddw xmm6, xmm7
373 movdqa xmm2, xmm1
374 movdqa xmm7, xmm6
376 psrldq xmm1, 8
377 psrldq xmm6, 8
379 paddw xmm7, xmm6
380 paddd xmm1, xmm2
382 mov rax, arg(5) ;[Sum]
383 mov rdi, arg(4) ;[SSE]
385 movq rdx, xmm7
386 movsx rcx, dx
388 mov dword ptr [rax], ecx
389 movd DWORD PTR [rdi], xmm1
391 ; begin epilog
392 add rsp, 16
393 pop rdi
394 pop rsi
395 RESTORE_GOT
396 RESTORE_XMM
397 UNSHADOW_ARGS
398 pop rbp
399 ret
401 ;void vp9_half_horiz_vert_variance8x_h_sse2
402 ;(
403 ; unsigned char *ref_ptr,
404 ; int ref_pixels_per_line,
405 ; unsigned char *src_ptr,
406 ; int src_pixels_per_line,
407 ; unsigned int Height,
408 ; int *sum,
409 ; unsigned int *sumsquared
410 ;)
411 global sym(vp9_half_horiz_vert_variance8x_h_sse2) PRIVATE
412 sym(vp9_half_horiz_vert_variance8x_h_sse2):
413 push rbp
414 mov rbp, rsp
415 SHADOW_ARGS_TO_STACK 7
416 SAVE_XMM 7
417 GET_GOT rbx
418 push rsi
419 push rdi
420 ; end prolog
422 %if ABI_IS_32BIT=0
423 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
424 movsxd r9, dword ptr arg(3) ;src_pixels_per_line
425 %endif
427 pxor xmm6, xmm6 ; error accumulator
428 pxor xmm7, xmm7 ; sse eaccumulator
429 mov rsi, arg(0) ;ref_ptr ;
431 mov rdi, arg(2) ;src_ptr ;
432 movsxd rcx, dword ptr arg(4) ;Height ;
433 movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
435 pxor xmm0, xmm0 ;
437 movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
438 movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
439 pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
441 %if ABI_IS_32BIT
442 add rsi, dword ptr arg(1) ;ref_pixels_per_line ; next source
443 %else
444 add rsi, r8
445 %endif
447 .half_horiz_vert_variance8x_h_1:
449 movq xmm1, QWORD PTR [rsi] ;
450 movq xmm2, QWORD PTR [rsi+1] ;
451 pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
453 pavgb xmm5, xmm1 ; xmm = vertical average of the above
454 punpcklbw xmm5, xmm0 ; xmm5 = words of above
456 movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
457 punpcklbw xmm3, xmm0 ; xmm3 = words of above
459 psubw xmm5, xmm3 ; xmm5 -= xmm3
460 paddw xmm6, xmm5 ; xmm6 += accumulated column differences
461 pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
462 paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
464 movdqa xmm5, xmm1 ; save xmm1 for use on the next row
466 %if ABI_IS_32BIT
467 add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
468 add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
469 %else
470 add rsi, r8
471 add rdi, r9
472 %endif
474 sub rcx, 1 ;
475 jnz .half_horiz_vert_variance8x_h_1 ;
477 movdq2q mm6, xmm6 ;
478 movdq2q mm7, xmm7 ;
480 psrldq xmm6, 8
481 psrldq xmm7, 8
483 movdq2q mm2, xmm6
484 movdq2q mm3, xmm7
486 paddw mm6, mm2
487 paddd mm7, mm3
489 pxor mm3, mm3 ;
490 pxor mm2, mm2 ;
492 punpcklwd mm2, mm6 ;
493 punpckhwd mm3, mm6 ;
495 paddd mm2, mm3 ;
496 movq mm6, mm2 ;
498 psrlq mm6, 32 ;
499 paddd mm2, mm6 ;
501 psrad mm2, 16 ;
502 movq mm4, mm7 ;
504 psrlq mm4, 32 ;
505 paddd mm4, mm7 ;
507 mov rsi, arg(5) ; sum
508 mov rdi, arg(6) ; sumsquared
510 movd [rsi], mm2 ;
511 movd [rdi], mm4 ;
514 ; begin epilog
515 pop rdi
516 pop rsi
517 RESTORE_GOT
518 RESTORE_XMM
519 UNSHADOW_ARGS
520 pop rbp
521 ret
523 ;void vp9_half_vert_variance8x_h_sse2
524 ;(
525 ; unsigned char *ref_ptr,
526 ; int ref_pixels_per_line,
527 ; unsigned char *src_ptr,
528 ; int src_pixels_per_line,
529 ; unsigned int Height,
530 ; int *sum,
531 ; unsigned int *sumsquared
532 ;)
533 global sym(vp9_half_vert_variance8x_h_sse2) PRIVATE
534 sym(vp9_half_vert_variance8x_h_sse2):
535 push rbp
536 mov rbp, rsp
537 SHADOW_ARGS_TO_STACK 7
538 SAVE_XMM 7
539 GET_GOT rbx
540 push rsi
541 push rdi
542 ; end prolog
544 %if ABI_IS_32BIT=0
545 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
546 movsxd r9, dword ptr arg(3) ;src_pixels_per_line
547 %endif
549 pxor xmm6, xmm6 ; error accumulator
550 pxor xmm7, xmm7 ; sse eaccumulator
551 mov rsi, arg(0) ;ref_ptr ;
553 mov rdi, arg(2) ;src_ptr ;
554 movsxd rcx, dword ptr arg(4) ;Height ;
555 movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
557 pxor xmm0, xmm0 ;
558 .half_vert_variance8x_h_1:
559 movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
560 movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
562 pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
563 punpcklbw xmm5, xmm0 ; xmm5 = words of above
565 movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
566 punpcklbw xmm3, xmm0 ; xmm3 = words of above
568 psubw xmm5, xmm3 ; xmm5 -= xmm3
569 paddw xmm6, xmm5 ; xmm6 += accumulated column differences
570 pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
571 paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
573 %if ABI_IS_32BIT
574 add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
575 add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
576 %else
577 add rsi, r8
578 add rdi, r9
579 %endif
581 sub rcx, 1 ;
582 jnz .half_vert_variance8x_h_1 ;
584 movdq2q mm6, xmm6 ;
585 movdq2q mm7, xmm7 ;
587 psrldq xmm6, 8
588 psrldq xmm7, 8
590 movdq2q mm2, xmm6
591 movdq2q mm3, xmm7
593 paddw mm6, mm2
594 paddd mm7, mm3
596 pxor mm3, mm3 ;
597 pxor mm2, mm2 ;
599 punpcklwd mm2, mm6 ;
600 punpckhwd mm3, mm6 ;
602 paddd mm2, mm3 ;
603 movq mm6, mm2 ;
605 psrlq mm6, 32 ;
606 paddd mm2, mm6 ;
608 psrad mm2, 16 ;
609 movq mm4, mm7 ;
611 psrlq mm4, 32 ;
612 paddd mm4, mm7 ;
614 mov rsi, arg(5) ; sum
615 mov rdi, arg(6) ; sumsquared
617 movd [rsi], mm2 ;
618 movd [rdi], mm4 ;
621 ; begin epilog
622 pop rdi
623 pop rsi
624 RESTORE_GOT
625 RESTORE_XMM
626 UNSHADOW_ARGS
627 pop rbp
628 ret
631 ;void vp9_half_horiz_variance8x_h_sse2
632 ;(
633 ; unsigned char *ref_ptr,
634 ; int ref_pixels_per_line,
635 ; unsigned char *src_ptr,
636 ; int src_pixels_per_line,
637 ; unsigned int Height,
638 ; int *sum,
639 ; unsigned int *sumsquared
640 ;)
641 global sym(vp9_half_horiz_variance8x_h_sse2) PRIVATE
642 sym(vp9_half_horiz_variance8x_h_sse2):
643 push rbp
644 mov rbp, rsp
645 SHADOW_ARGS_TO_STACK 7
646 SAVE_XMM 7
647 GET_GOT rbx
648 push rsi
649 push rdi
650 ; end prolog
652 %if ABI_IS_32BIT=0
653 movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
654 movsxd r9, dword ptr arg(3) ;src_pixels_per_line
655 %endif
657 pxor xmm6, xmm6 ; error accumulator
658 pxor xmm7, xmm7 ; sse eaccumulator
659 mov rsi, arg(0) ;ref_ptr ;
661 mov rdi, arg(2) ;src_ptr ;
662 movsxd rcx, dword ptr arg(4) ;Height ;
664 pxor xmm0, xmm0 ;
665 .half_horiz_variance8x_h_1:
666 movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
667 movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
669 pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
670 punpcklbw xmm5, xmm0 ; xmm5 = words of above
672 movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d8
673 punpcklbw xmm3, xmm0 ; xmm3 = words of above
675 psubw xmm5, xmm3 ; xmm5 -= xmm3
676 paddw xmm6, xmm5 ; xmm6 += accumulated column differences
677 pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
678 paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
680 %if ABI_IS_32BIT
681 add esi, dword ptr arg(1) ;ref_pixels_per_line ; next source
682 add edi, dword ptr arg(3) ;src_pixels_per_line ; next destination
683 %else
684 add rsi, r8
685 add rdi, r9
686 %endif
687 sub rcx, 1 ;
688 jnz .half_horiz_variance8x_h_1 ;
690 movdq2q mm6, xmm6 ;
691 movdq2q mm7, xmm7 ;
693 psrldq xmm6, 8
694 psrldq xmm7, 8
696 movdq2q mm2, xmm6
697 movdq2q mm3, xmm7
699 paddw mm6, mm2
700 paddd mm7, mm3
702 pxor mm3, mm3 ;
703 pxor mm2, mm2 ;
705 punpcklwd mm2, mm6 ;
706 punpckhwd mm3, mm6 ;
708 paddd mm2, mm3 ;
709 movq mm6, mm2 ;
711 psrlq mm6, 32 ;
712 paddd mm2, mm6 ;
714 psrad mm2, 16 ;
715 movq mm4, mm7 ;
717 psrlq mm4, 32 ;
718 paddd mm4, mm7 ;
720 mov rsi, arg(5) ; sum
721 mov rdi, arg(6) ; sumsquared
723 movd [rsi], mm2 ;
724 movd [rdi], mm4 ;
727 ; begin epilog
728 pop rdi
729 pop rsi
730 RESTORE_GOT
731 RESTORE_XMM
732 UNSHADOW_ARGS
733 pop rbp
734 ret