media/libvpx/vp8/common/x86/iwalsh_sse2.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 ;void vp8_short_inv_walsh4x4_sse2(short *input, short *output)
michael@0 15 global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE
michael@0 16 sym(vp8_short_inv_walsh4x4_sse2):
michael@0 17 push rbp
michael@0 18 mov rbp, rsp
michael@0 19 SHADOW_ARGS_TO_STACK 2
michael@0 20 ; end prolog
michael@0 21
michael@0 22 mov rcx, arg(0)
michael@0 23 mov rdx, arg(1)
michael@0 24 mov rax, 30003h
michael@0 25
michael@0 26 movdqa xmm0, [rcx + 0] ;ip[4] ip[0]
michael@0 27 movdqa xmm1, [rcx + 16] ;ip[12] ip[8]
michael@0 28
michael@0 29
michael@0 30 pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
michael@0 31 movdqa xmm3, xmm0 ;ip[4] ip[0]
michael@0 32
michael@0 33 paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
michael@0 34 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
michael@0 35
michael@0 36 movdqa xmm4, xmm0
michael@0 37 punpcklqdq xmm0, xmm3 ;d1 a1
michael@0 38 punpckhqdq xmm4, xmm3 ;c1 b1
michael@0 39
michael@0 40 movdqa xmm1, xmm4 ;c1 b1
michael@0 41 paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
michael@0 42 psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
michael@0 43
michael@0 44 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
michael@0 45 ; 13 12 11 10 03 02 01 00
michael@0 46 ;
michael@0 47 ; 33 32 31 30 23 22 21 20
michael@0 48 ;
michael@0 49 movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00
michael@0 50 punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00
michael@0 51 punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10
michael@0 52 movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00
michael@0 53 punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00
michael@0 54 punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02
michael@0 55 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
michael@0 56 movd xmm0, eax
michael@0 57 pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
michael@0 58 movdqa xmm3, xmm4 ;ip[4] ip[0]
michael@0 59
michael@0 60 pshufd xmm0, xmm0, 0 ;03 03 03 03 03 03 03 03
michael@0 61
michael@0 62 paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
michael@0 63 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
michael@0 64
michael@0 65 movdqa xmm5, xmm4
michael@0 66 punpcklqdq xmm4, xmm3 ;d1 a1
michael@0 67 punpckhqdq xmm5, xmm3 ;c1 b1
michael@0 68
michael@0 69 movdqa xmm1, xmm5 ;c1 b1
michael@0 70 paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0]
michael@0 71 psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8]
michael@0 72
michael@0 73 paddw xmm5, xmm0
michael@0 74 paddw xmm4, xmm0
michael@0 75 psraw xmm5, 3
michael@0 76 psraw xmm4, 3
michael@0 77
michael@0 78 movd eax, xmm5
michael@0 79 movd ecx, xmm4
michael@0 80 psrldq xmm5, 4
michael@0 81 psrldq xmm4, 4
michael@0 82 mov word ptr[rdx+32*0], ax
michael@0 83 mov word ptr[rdx+32*2], cx
michael@0 84 shr eax, 16
michael@0 85 shr ecx, 16
michael@0 86 mov word ptr[rdx+32*4], ax
michael@0 87 mov word ptr[rdx+32*6], cx
michael@0 88 movd eax, xmm5
michael@0 89 movd ecx, xmm4
michael@0 90 psrldq xmm5, 4
michael@0 91 psrldq xmm4, 4
michael@0 92 mov word ptr[rdx+32*8], ax
michael@0 93 mov word ptr[rdx+32*10], cx
michael@0 94 shr eax, 16
michael@0 95 shr ecx, 16
michael@0 96 mov word ptr[rdx+32*12], ax
michael@0 97 mov word ptr[rdx+32*14], cx
michael@0 98
michael@0 99 movd eax, xmm5
michael@0 100 movd ecx, xmm4
michael@0 101 psrldq xmm5, 4
michael@0 102 psrldq xmm4, 4
michael@0 103 mov word ptr[rdx+32*1], ax
michael@0 104 mov word ptr[rdx+32*3], cx
michael@0 105 shr eax, 16
michael@0 106 shr ecx, 16
michael@0 107 mov word ptr[rdx+32*5], ax
michael@0 108 mov word ptr[rdx+32*7], cx
michael@0 109 movd eax, xmm5
michael@0 110 movd ecx, xmm4
michael@0 111 mov word ptr[rdx+32*9], ax
michael@0 112 mov word ptr[rdx+32*11], cx
michael@0 113 shr eax, 16
michael@0 114 shr ecx, 16
michael@0 115 mov word ptr[rdx+32*13], ax
michael@0 116 mov word ptr[rdx+32*15], cx
michael@0 117
michael@0 118 ; begin epilog
michael@0 119 UNSHADOW_ARGS
michael@0 120 pop rbp
michael@0 121 ret

mercurial