media/libvpx/vp8/common/x86/dequantize_mmx.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14
michael@0 15 ;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q)
michael@0 16 global sym(vp8_dequantize_b_impl_mmx) PRIVATE
michael@0 17 sym(vp8_dequantize_b_impl_mmx):
michael@0 18 push rbp
michael@0 19 mov rbp, rsp
michael@0 20 SHADOW_ARGS_TO_STACK 3
michael@0 21 push rsi
michael@0 22 push rdi
michael@0 23 ; end prolog
michael@0 24
michael@0 25 mov rsi, arg(0) ;sq
michael@0 26 mov rdi, arg(1) ;dq
michael@0 27 mov rax, arg(2) ;q
michael@0 28
michael@0 29 movq mm1, [rsi]
michael@0 30 pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers.
michael@0 31 movq [rdi], mm1
michael@0 32
michael@0 33 movq mm1, [rsi+8]
michael@0 34 pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers.
michael@0 35 movq [rdi+8], mm1
michael@0 36
michael@0 37 movq mm1, [rsi+16]
michael@0 38 pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers.
michael@0 39 movq [rdi+16], mm1
michael@0 40
michael@0 41 movq mm1, [rsi+24]
michael@0 42 pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers.
michael@0 43 movq [rdi+24], mm1
michael@0 44
michael@0 45 ; begin epilog
michael@0 46 pop rdi
michael@0 47 pop rsi
michael@0 48 UNSHADOW_ARGS
michael@0 49 pop rbp
michael@0 50 ret
michael@0 51
michael@0 52
michael@0 53 ;void dequant_idct_add_mmx(
michael@0 54 ;short *input, 0
michael@0 55 ;short *dq, 1
michael@0 56 ;unsigned char *dest, 2
michael@0 57 ;int stride) 3
michael@0 58 global sym(vp8_dequant_idct_add_mmx) PRIVATE
michael@0 59 sym(vp8_dequant_idct_add_mmx):
michael@0 60 push rbp
michael@0 61 mov rbp, rsp
michael@0 62 SHADOW_ARGS_TO_STACK 4
michael@0 63 GET_GOT rbx
michael@0 64 push rdi
michael@0 65 ; end prolog
michael@0 66
michael@0 67 mov rax, arg(0) ;input
michael@0 68 mov rdx, arg(1) ;dq
michael@0 69
michael@0 70
michael@0 71 movq mm0, [rax ]
michael@0 72 pmullw mm0, [rdx]
michael@0 73
michael@0 74 movq mm1, [rax +8]
michael@0 75 pmullw mm1, [rdx +8]
michael@0 76
michael@0 77 movq mm2, [rax+16]
michael@0 78 pmullw mm2, [rdx+16]
michael@0 79
michael@0 80 movq mm3, [rax+24]
michael@0 81 pmullw mm3, [rdx+24]
michael@0 82
michael@0 83 mov rdx, arg(2) ;dest
michael@0 84
michael@0 85 pxor mm7, mm7
michael@0 86
michael@0 87
michael@0 88 movq [rax], mm7
michael@0 89 movq [rax+8], mm7
michael@0 90
michael@0 91 movq [rax+16],mm7
michael@0 92 movq [rax+24],mm7
michael@0 93
michael@0 94
michael@0 95 movsxd rdi, dword ptr arg(3) ;stride
michael@0 96
michael@0 97 psubw mm0, mm2 ; b1= 0-2
michael@0 98 paddw mm2, mm2 ;
michael@0 99
michael@0 100 movq mm5, mm1
michael@0 101 paddw mm2, mm0 ; a1 =0+2
michael@0 102
michael@0 103 pmulhw mm5, [GLOBAL(x_s1sqr2)];
michael@0 104 paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
michael@0 105
michael@0 106 movq mm7, mm3 ;
michael@0 107 pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
michael@0 108
michael@0 109 paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
michael@0 110 psubw mm7, mm5 ; c1
michael@0 111
michael@0 112 movq mm5, mm1
michael@0 113 movq mm4, mm3
michael@0 114
michael@0 115 pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
michael@0 116 paddw mm5, mm1
michael@0 117
michael@0 118 pmulhw mm3, [GLOBAL(x_s1sqr2)]
michael@0 119 paddw mm3, mm4
michael@0 120
michael@0 121 paddw mm3, mm5 ; d1
michael@0 122 movq mm6, mm2 ; a1
michael@0 123
michael@0 124 movq mm4, mm0 ; b1
michael@0 125 paddw mm2, mm3 ;0
michael@0 126
michael@0 127 paddw mm4, mm7 ;1
michael@0 128 psubw mm0, mm7 ;2
michael@0 129
michael@0 130 psubw mm6, mm3 ;3
michael@0 131
michael@0 132 movq mm1, mm2 ; 03 02 01 00
michael@0 133 movq mm3, mm4 ; 23 22 21 20
michael@0 134
michael@0 135 punpcklwd mm1, mm0 ; 11 01 10 00
michael@0 136 punpckhwd mm2, mm0 ; 13 03 12 02
michael@0 137
michael@0 138 punpcklwd mm3, mm6 ; 31 21 30 20
michael@0 139 punpckhwd mm4, mm6 ; 33 23 32 22
michael@0 140
michael@0 141 movq mm0, mm1 ; 11 01 10 00
michael@0 142 movq mm5, mm2 ; 13 03 12 02
michael@0 143
michael@0 144 punpckldq mm0, mm3 ; 30 20 10 00
michael@0 145 punpckhdq mm1, mm3 ; 31 21 11 01
michael@0 146
michael@0 147 punpckldq mm2, mm4 ; 32 22 12 02
michael@0 148 punpckhdq mm5, mm4 ; 33 23 13 03
michael@0 149
michael@0 150 movq mm3, mm5 ; 33 23 13 03
michael@0 151
michael@0 152 psubw mm0, mm2 ; b1= 0-2
michael@0 153 paddw mm2, mm2 ;
michael@0 154
michael@0 155 movq mm5, mm1
michael@0 156 paddw mm2, mm0 ; a1 =0+2
michael@0 157
michael@0 158 pmulhw mm5, [GLOBAL(x_s1sqr2)];
michael@0 159 paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2)
michael@0 160
michael@0 161 movq mm7, mm3 ;
michael@0 162 pmulhw mm7, [GLOBAL(x_c1sqr2less1)];
michael@0 163
michael@0 164 paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2)
michael@0 165 psubw mm7, mm5 ; c1
michael@0 166
michael@0 167 movq mm5, mm1
michael@0 168 movq mm4, mm3
michael@0 169
michael@0 170 pmulhw mm5, [GLOBAL(x_c1sqr2less1)]
michael@0 171 paddw mm5, mm1
michael@0 172
michael@0 173 pmulhw mm3, [GLOBAL(x_s1sqr2)]
michael@0 174 paddw mm3, mm4
michael@0 175
michael@0 176 paddw mm3, mm5 ; d1
michael@0 177 paddw mm0, [GLOBAL(fours)]
michael@0 178
michael@0 179 paddw mm2, [GLOBAL(fours)]
michael@0 180 movq mm6, mm2 ; a1
michael@0 181
michael@0 182 movq mm4, mm0 ; b1
michael@0 183 paddw mm2, mm3 ;0
michael@0 184
michael@0 185 paddw mm4, mm7 ;1
michael@0 186 psubw mm0, mm7 ;2
michael@0 187
michael@0 188 psubw mm6, mm3 ;3
michael@0 189 psraw mm2, 3
michael@0 190
michael@0 191 psraw mm0, 3
michael@0 192 psraw mm4, 3
michael@0 193
michael@0 194 psraw mm6, 3
michael@0 195
michael@0 196 movq mm1, mm2 ; 03 02 01 00
michael@0 197 movq mm3, mm4 ; 23 22 21 20
michael@0 198
michael@0 199 punpcklwd mm1, mm0 ; 11 01 10 00
michael@0 200 punpckhwd mm2, mm0 ; 13 03 12 02
michael@0 201
michael@0 202 punpcklwd mm3, mm6 ; 31 21 30 20
michael@0 203 punpckhwd mm4, mm6 ; 33 23 32 22
michael@0 204
michael@0 205 movq mm0, mm1 ; 11 01 10 00
michael@0 206 movq mm5, mm2 ; 13 03 12 02
michael@0 207
michael@0 208 punpckldq mm0, mm3 ; 30 20 10 00
michael@0 209 punpckhdq mm1, mm3 ; 31 21 11 01
michael@0 210
michael@0 211 punpckldq mm2, mm4 ; 32 22 12 02
michael@0 212 punpckhdq mm5, mm4 ; 33 23 13 03
michael@0 213
michael@0 214 pxor mm7, mm7
michael@0 215
michael@0 216 movd mm4, [rdx]
michael@0 217 punpcklbw mm4, mm7
michael@0 218 paddsw mm0, mm4
michael@0 219 packuswb mm0, mm7
michael@0 220 movd [rdx], mm0
michael@0 221
michael@0 222 movd mm4, [rdx+rdi]
michael@0 223 punpcklbw mm4, mm7
michael@0 224 paddsw mm1, mm4
michael@0 225 packuswb mm1, mm7
michael@0 226 movd [rdx+rdi], mm1
michael@0 227
michael@0 228 movd mm4, [rdx+2*rdi]
michael@0 229 punpcklbw mm4, mm7
michael@0 230 paddsw mm2, mm4
michael@0 231 packuswb mm2, mm7
michael@0 232 movd [rdx+rdi*2], mm2
michael@0 233
michael@0 234 add rdx, rdi
michael@0 235
michael@0 236 movd mm4, [rdx+2*rdi]
michael@0 237 punpcklbw mm4, mm7
michael@0 238 paddsw mm5, mm4
michael@0 239 packuswb mm5, mm7
michael@0 240 movd [rdx+rdi*2], mm5
michael@0 241
michael@0 242 ; begin epilog
michael@0 243 pop rdi
michael@0 244 RESTORE_GOT
michael@0 245 UNSHADOW_ARGS
michael@0 246 pop rbp
michael@0 247 ret
michael@0 248
michael@0 249 SECTION_RODATA
michael@0 250 align 16
michael@0 251 x_s1sqr2:
michael@0 252 times 4 dw 0x8A8C
michael@0 253 align 16
michael@0 254 x_c1sqr2less1:
michael@0 255 times 4 dw 0x4E7B
michael@0 256 align 16
michael@0 257 fours:
michael@0 258 times 4 dw 0x0004

mercurial