media/libvpx/vp8/encoder/x86/quantize_mmx.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 ;int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
michael@0 15 ; short *qcoeff_ptr,short *dequant_ptr,
michael@0 16 ; short *scan_mask, short *round_ptr,
michael@0 17 ; short *quant_ptr, short *dqcoeff_ptr);
michael@0 18 global sym(vp8_fast_quantize_b_impl_mmx) PRIVATE
michael@0 19 sym(vp8_fast_quantize_b_impl_mmx):
michael@0 20 push rbp
michael@0 21 mov rbp, rsp
michael@0 22 SHADOW_ARGS_TO_STACK 8
michael@0 23 push rsi
michael@0 24 push rdi
michael@0 25 ; end prolog
michael@0 26
michael@0 27
michael@0 28 mov rsi, arg(0) ;coeff_ptr
michael@0 29 movq mm0, [rsi]
michael@0 30
michael@0 31 mov rax, arg(1) ;zbin_ptr
michael@0 32 movq mm1, [rax]
michael@0 33
michael@0 34 movq mm3, mm0
michael@0 35 psraw mm0, 15
michael@0 36
michael@0 37 pxor mm3, mm0
michael@0 38 psubw mm3, mm0 ; abs
michael@0 39
michael@0 40 movq mm2, mm3
michael@0 41 pcmpgtw mm1, mm2
michael@0 42
michael@0 43 pandn mm1, mm2
michael@0 44 movq mm3, mm1
michael@0 45
michael@0 46 mov rdx, arg(6) ;quant_ptr
michael@0 47 movq mm1, [rdx]
michael@0 48
michael@0 49 mov rcx, arg(5) ;round_ptr
michael@0 50 movq mm2, [rcx]
michael@0 51
michael@0 52 paddw mm3, mm2
michael@0 53 pmulhuw mm3, mm1
michael@0 54
michael@0 55 pxor mm3, mm0
michael@0 56 psubw mm3, mm0 ;gain the sign back
michael@0 57
michael@0 58 mov rdi, arg(2) ;qcoeff_ptr
michael@0 59 movq mm0, mm3
michael@0 60
michael@0 61 movq [rdi], mm3
michael@0 62
michael@0 63 mov rax, arg(3) ;dequant_ptr
michael@0 64 movq mm2, [rax]
michael@0 65
michael@0 66 pmullw mm3, mm2
michael@0 67 mov rax, arg(7) ;dqcoeff_ptr
michael@0 68
michael@0 69 movq [rax], mm3
michael@0 70
michael@0 71 ; next 8
michael@0 72 movq mm4, [rsi+8]
michael@0 73
michael@0 74 mov rax, arg(1) ;zbin_ptr
michael@0 75 movq mm5, [rax+8]
michael@0 76
michael@0 77 movq mm7, mm4
michael@0 78 psraw mm4, 15
michael@0 79
michael@0 80 pxor mm7, mm4
michael@0 81 psubw mm7, mm4 ; abs
michael@0 82
michael@0 83 movq mm6, mm7
michael@0 84 pcmpgtw mm5, mm6
michael@0 85
michael@0 86 pandn mm5, mm6
michael@0 87 movq mm7, mm5
michael@0 88
michael@0 89 movq mm5, [rdx+8]
michael@0 90 movq mm6, [rcx+8]
michael@0 91
michael@0 92 paddw mm7, mm6
michael@0 93 pmulhuw mm7, mm5
michael@0 94
michael@0 95 pxor mm7, mm4
michael@0 96 psubw mm7, mm4;gain the sign back
michael@0 97
michael@0 98 mov rdi, arg(2) ;qcoeff_ptr
michael@0 99
michael@0 100 movq mm1, mm7
michael@0 101 movq [rdi+8], mm7
michael@0 102
michael@0 103 mov rax, arg(3) ;dequant_ptr
michael@0 104 movq mm6, [rax+8]
michael@0 105
michael@0 106 pmullw mm7, mm6
michael@0 107 mov rax, arg(7) ;dqcoeff_ptr
michael@0 108
michael@0 109 movq [rax+8], mm7
michael@0 110
michael@0 111
michael@0 112 ; next 8
michael@0 113 movq mm4, [rsi+16]
michael@0 114
michael@0 115 mov rax, arg(1) ;zbin_ptr
michael@0 116 movq mm5, [rax+16]
michael@0 117
michael@0 118 movq mm7, mm4
michael@0 119 psraw mm4, 15
michael@0 120
michael@0 121 pxor mm7, mm4
michael@0 122 psubw mm7, mm4 ; abs
michael@0 123
michael@0 124 movq mm6, mm7
michael@0 125 pcmpgtw mm5, mm6
michael@0 126
michael@0 127 pandn mm5, mm6
michael@0 128 movq mm7, mm5
michael@0 129
michael@0 130 movq mm5, [rdx+16]
michael@0 131 movq mm6, [rcx+16]
michael@0 132
michael@0 133 paddw mm7, mm6
michael@0 134 pmulhuw mm7, mm5
michael@0 135
michael@0 136 pxor mm7, mm4
michael@0 137 psubw mm7, mm4;gain the sign back
michael@0 138
michael@0 139 mov rdi, arg(2) ;qcoeff_ptr
michael@0 140
michael@0 141 movq mm1, mm7
michael@0 142 movq [rdi+16], mm7
michael@0 143
michael@0 144 mov rax, arg(3) ;dequant_ptr
michael@0 145 movq mm6, [rax+16]
michael@0 146
michael@0 147 pmullw mm7, mm6
michael@0 148 mov rax, arg(7) ;dqcoeff_ptr
michael@0 149
michael@0 150 movq [rax+16], mm7
michael@0 151
michael@0 152
michael@0 153 ; next 8
michael@0 154 movq mm4, [rsi+24]
michael@0 155
michael@0 156 mov rax, arg(1) ;zbin_ptr
michael@0 157 movq mm5, [rax+24]
michael@0 158
michael@0 159 movq mm7, mm4
michael@0 160 psraw mm4, 15
michael@0 161
michael@0 162 pxor mm7, mm4
michael@0 163 psubw mm7, mm4 ; abs
michael@0 164
michael@0 165 movq mm6, mm7
michael@0 166 pcmpgtw mm5, mm6
michael@0 167
michael@0 168 pandn mm5, mm6
michael@0 169 movq mm7, mm5
michael@0 170
michael@0 171 movq mm5, [rdx+24]
michael@0 172 movq mm6, [rcx+24]
michael@0 173
michael@0 174 paddw mm7, mm6
michael@0 175 pmulhuw mm7, mm5
michael@0 176
michael@0 177 pxor mm7, mm4
michael@0 178 psubw mm7, mm4;gain the sign back
michael@0 179
michael@0 180 mov rdi, arg(2) ;qcoeff_ptr
michael@0 181
michael@0 182 movq mm1, mm7
michael@0 183 movq [rdi+24], mm7
michael@0 184
michael@0 185 mov rax, arg(3) ;dequant_ptr
michael@0 186 movq mm6, [rax+24]
michael@0 187
michael@0 188 pmullw mm7, mm6
michael@0 189 mov rax, arg(7) ;dqcoeff_ptr
michael@0 190
michael@0 191 movq [rax+24], mm7
michael@0 192
michael@0 193
michael@0 194
michael@0 195 mov rdi, arg(4) ;scan_mask
michael@0 196 mov rsi, arg(2) ;qcoeff_ptr
michael@0 197
michael@0 198 pxor mm5, mm5
michael@0 199 pxor mm7, mm7
michael@0 200
michael@0 201 movq mm0, [rsi]
michael@0 202 movq mm1, [rsi+8]
michael@0 203
michael@0 204 movq mm2, [rdi]
michael@0 205 movq mm3, [rdi+8];
michael@0 206
michael@0 207 pcmpeqw mm0, mm7
michael@0 208 pcmpeqw mm1, mm7
michael@0 209
michael@0 210 pcmpeqw mm6, mm6
michael@0 211 pxor mm0, mm6
michael@0 212
michael@0 213 pxor mm1, mm6
michael@0 214 psrlw mm0, 15
michael@0 215
michael@0 216 psrlw mm1, 15
michael@0 217 pmaddwd mm0, mm2
michael@0 218
michael@0 219 pmaddwd mm1, mm3
michael@0 220 movq mm5, mm0
michael@0 221
michael@0 222 paddd mm5, mm1
michael@0 223
michael@0 224 movq mm0, [rsi+16]
michael@0 225 movq mm1, [rsi+24]
michael@0 226
michael@0 227 movq mm2, [rdi+16]
michael@0 228 movq mm3, [rdi+24];
michael@0 229
michael@0 230 pcmpeqw mm0, mm7
michael@0 231 pcmpeqw mm1, mm7
michael@0 232
michael@0 233 pcmpeqw mm6, mm6
michael@0 234 pxor mm0, mm6
michael@0 235
michael@0 236 pxor mm1, mm6
michael@0 237 psrlw mm0, 15
michael@0 238
michael@0 239 psrlw mm1, 15
michael@0 240 pmaddwd mm0, mm2
michael@0 241
michael@0 242 pmaddwd mm1, mm3
michael@0 243 paddd mm5, mm0
michael@0 244
michael@0 245 paddd mm5, mm1
michael@0 246 movq mm0, mm5
michael@0 247
michael@0 248 psrlq mm5, 32
michael@0 249 paddd mm0, mm5
michael@0 250
michael@0 251 ; eob adjustment begins here
michael@0 252 movq rcx, mm0
michael@0 253 and rcx, 0xffff
michael@0 254
michael@0 255 xor rdx, rdx
michael@0 256 sub rdx, rcx ; rdx=-rcx
michael@0 257
michael@0 258 bsr rax, rcx
michael@0 259 inc rax
michael@0 260
michael@0 261 sar rdx, 31
michael@0 262 and rax, rdx
michael@0 263 ; Substitute the sse assembly for the old mmx mixed assembly/C. The
michael@0 264 ; following is kept as reference
michael@0 265 ; movq rcx, mm0
michael@0 266 ; bsr rax, rcx
michael@0 267 ;
michael@0 268 ; mov eob, rax
michael@0 269 ; mov eee, rcx
michael@0 270 ;
michael@0 271 ;if(eee==0)
michael@0 272 ;{
michael@0 273 ; eob=-1;
michael@0 274 ;}
michael@0 275 ;else if(eee<0)
michael@0 276 ;{
michael@0 277 ; eob=15;
michael@0 278 ;}
michael@0 279 ;d->eob = eob+1;
michael@0 280
michael@0 281 ; begin epilog
michael@0 282 pop rdi
michael@0 283 pop rsi
michael@0 284 UNSHADOW_ARGS
michael@0 285 pop rbp
michael@0 286 ret

mercurial