Thu, 15 Jan 2015 15:59:08 +0100
Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
12 %include "vpx_ports/x86_abi_support.asm"
14 %define VP8_FILTER_WEIGHT 128
15 %define VP8_FILTER_SHIFT 7
17 ;void vp8_mbpost_proc_down_mmx(unsigned char *dst,
18 ; int pitch, int rows, int cols,int flimit)
19 extern sym(vp8_rv)
20 global sym(vp8_mbpost_proc_down_mmx) PRIVATE
21 sym(vp8_mbpost_proc_down_mmx):
22 push rbp
23 mov rbp, rsp
24 SHADOW_ARGS_TO_STACK 5
25 GET_GOT rbx
26 push rsi
27 push rdi
28 ; end prolog
30 ALIGN_STACK 16, rax
31 sub rsp, 136
33 ; unsigned char d[16][8] at [rsp]
34 ; create flimit2 at [rsp+128]
35 mov eax, dword ptr arg(4) ;flimit
36 mov [rsp+128], eax
37 mov [rsp+128+4], eax
38 %define flimit2 [rsp+128]
40 %if ABI_IS_32BIT=0
41 lea r8, [GLOBAL(sym(vp8_rv))]
42 %endif
44 ;rows +=8;
45 add dword ptr arg(2), 8
47 ;for(c=0; c<cols; c+=4)
48 .loop_col:
49 mov rsi, arg(0) ;s
50 pxor mm0, mm0 ;
52 movsxd rax, dword ptr arg(1) ;pitch ;
54 ; this copies the last row down into the border 8 rows
55 mov rdi, rsi
56 mov rdx, arg(2)
57 sub rdx, 9
58 imul rdx, rax
59 lea rdi, [rdi+rdx]
60 movq mm1, QWORD ptr[rdi] ; first row
61 mov rcx, 8
62 .init_borderd ; initialize borders
63 lea rdi, [rdi + rax]
64 movq [rdi], mm1
66 dec rcx
67 jne .init_borderd
69 neg rax ; rax = -pitch
71 ; this copies the first row up into the border 8 rows
72 mov rdi, rsi
73 movq mm1, QWORD ptr[rdi] ; first row
74 mov rcx, 8
75 .init_border ; initialize borders
76 lea rdi, [rdi + rax]
77 movq [rdi], mm1
79 dec rcx
80 jne .init_border
83 lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8]
84 neg rax
87 pxor mm5, mm5
88 pxor mm6, mm6 ;
90 pxor mm7, mm7 ;
91 mov rdi, rsi
93 mov rcx, 15 ;
95 .loop_initvar:
96 movd mm1, DWORD PTR [rdi];
97 punpcklbw mm1, mm0 ;
99 paddw mm5, mm1 ;
100 pmullw mm1, mm1 ;
102 movq mm2, mm1 ;
103 punpcklwd mm1, mm0 ;
105 punpckhwd mm2, mm0 ;
106 paddd mm6, mm1 ;
108 paddd mm7, mm2 ;
109 lea rdi, [rdi+rax] ;
111 dec rcx
112 jne .loop_initvar
113 ;save the var and sum
114 xor rdx, rdx
115 .loop_row:
116 movd mm1, DWORD PTR [rsi] ; [s-pitch*8]
117 movd mm2, DWORD PTR [rdi] ; [s+pitch*7]
119 punpcklbw mm1, mm0
120 punpcklbw mm2, mm0
122 paddw mm5, mm2
123 psubw mm5, mm1
125 pmullw mm2, mm2
126 movq mm4, mm2
128 punpcklwd mm2, mm0
129 punpckhwd mm4, mm0
131 paddd mm6, mm2
132 paddd mm7, mm4
134 pmullw mm1, mm1
135 movq mm2, mm1
137 punpcklwd mm1, mm0
138 psubd mm6, mm1
140 punpckhwd mm2, mm0
141 psubd mm7, mm2
144 movq mm3, mm6
145 pslld mm3, 4
147 psubd mm3, mm6
148 movq mm1, mm5
150 movq mm4, mm5
151 pmullw mm1, mm1
153 pmulhw mm4, mm4
154 movq mm2, mm1
156 punpcklwd mm1, mm4
157 punpckhwd mm2, mm4
159 movq mm4, mm7
160 pslld mm4, 4
162 psubd mm4, mm7
164 psubd mm3, mm1
165 psubd mm4, mm2
167 psubd mm3, flimit2
168 psubd mm4, flimit2
170 psrad mm3, 31
171 psrad mm4, 31
173 packssdw mm3, mm4
174 packsswb mm3, mm0
176 movd mm1, DWORD PTR [rsi+rax*8]
178 movq mm2, mm1
179 punpcklbw mm1, mm0
181 paddw mm1, mm5
182 mov rcx, rdx
184 and rcx, 127
185 %if ABI_IS_32BIT=1 && CONFIG_PIC=1
186 push rax
187 lea rax, [GLOBAL(sym(vp8_rv))]
188 movq mm4, [rax + rcx*2] ;vp8_rv[rcx*2]
189 pop rax
190 %elif ABI_IS_32BIT=0
191 movq mm4, [r8 + rcx*2] ;vp8_rv[rcx*2]
192 %else
193 movq mm4, [sym(vp8_rv) + rcx*2]
194 %endif
195 paddw mm1, mm4
196 psraw mm1, 4
198 packuswb mm1, mm0
199 pand mm1, mm3
201 pandn mm3, mm2
202 por mm1, mm3
204 and rcx, 15
205 movd DWORD PTR [rsp+rcx*4], mm1 ;d[rcx*4]
207 mov rcx, rdx
208 sub rcx, 8
210 and rcx, 15
211 movd mm1, DWORD PTR [rsp+rcx*4] ;d[rcx*4]
213 movd [rsi], mm1
214 lea rsi, [rsi+rax]
216 lea rdi, [rdi+rax]
217 add rdx, 1
219 cmp edx, dword arg(2) ;rows
220 jl .loop_row
223 add dword arg(0), 4 ; s += 4
224 sub dword arg(3), 4 ; cols -= 4
225 cmp dword arg(3), 0
226 jg .loop_col
228 add rsp, 136
229 pop rsp
231 ; begin epilog
232 pop rdi
233 pop rsi
234 RESTORE_GOT
235 UNSHADOW_ARGS
236 pop rbp
237 ret
238 %undef flimit2
241 ;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
242 ; unsigned char blackclamp[16],
243 ; unsigned char whiteclamp[16],
244 ; unsigned char bothclamp[16],
245 ; unsigned int Width, unsigned int Height, int Pitch)
246 extern sym(rand)
247 global sym(vp8_plane_add_noise_mmx) PRIVATE
248 sym(vp8_plane_add_noise_mmx):
249 push rbp
250 mov rbp, rsp
251 SHADOW_ARGS_TO_STACK 8
252 GET_GOT rbx
253 push rsi
254 push rdi
255 ; end prolog
257 .addnoise_loop:
258 call sym(rand) WRT_PLT
259 mov rcx, arg(1) ;noise
260 and rax, 0xff
261 add rcx, rax
263 ; we rely on the fact that the clamping vectors are stored contiguously
264 ; in black/white/both order. Note that we have to reload this here because
265 ; rdx could be trashed by rand()
266 mov rdx, arg(2) ; blackclamp
269 mov rdi, rcx
270 movsxd rcx, dword arg(5) ;[Width]
271 mov rsi, arg(0) ;Pos
272 xor rax,rax
274 .addnoise_nextset:
275 movq mm1,[rsi+rax] ; get the source
277 psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
278 paddusb mm1, [rdx+32] ;bothclamp
279 psubusb mm1, [rdx+16] ;whiteclamp
281 movq mm2,[rdi+rax] ; get the noise for this line
282 paddb mm1,mm2 ; add it in
283 movq [rsi+rax],mm1 ; store the result
285 add rax,8 ; move to the next line
287 cmp rax, rcx
288 jl .addnoise_nextset
290 movsxd rax, dword arg(7) ; Pitch
291 add arg(0), rax ; Start += Pitch
292 sub dword arg(6), 1 ; Height -= 1
293 jg .addnoise_loop
295 ; begin epilog
296 pop rdi
297 pop rsi
298 RESTORE_GOT
299 UNSHADOW_ARGS
300 pop rbp
301 ret
304 SECTION_RODATA
305 align 16
306 Blur:
307 times 16 dw 16
308 times 8 dw 64
309 times 16 dw 16
310 times 8 dw 0
312 rd:
313 times 4 dw 0x40