media/libvpx/vp8/encoder/x86/subtract_mmx.asm

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 ;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
michael@0 15 ; short *diff, unsigned char *Predictor,
michael@0 16 ; int pitch);
michael@0 17 global sym(vp8_subtract_b_mmx_impl) PRIVATE
michael@0 18 sym(vp8_subtract_b_mmx_impl):
michael@0 19 push rbp
michael@0 20 mov rbp, rsp
michael@0 21 SHADOW_ARGS_TO_STACK 5
michael@0 22 push rsi
michael@0 23 push rdi
michael@0 24 ; end prolog
michael@0 25
michael@0 26
michael@0 27 mov rdi, arg(2) ;diff
michael@0 28 mov rax, arg(3) ;Predictor
michael@0 29 mov rsi, arg(0) ;z
michael@0 30 movsxd rdx, dword ptr arg(1);src_stride;
michael@0 31 movsxd rcx, dword ptr arg(4);pitch
michael@0 32 pxor mm7, mm7
michael@0 33
michael@0 34 movd mm0, [rsi]
michael@0 35 movd mm1, [rax]
michael@0 36 punpcklbw mm0, mm7
michael@0 37 punpcklbw mm1, mm7
michael@0 38 psubw mm0, mm1
michael@0 39 movq [rdi], mm0
michael@0 40
michael@0 41
michael@0 42 movd mm0, [rsi+rdx]
michael@0 43 movd mm1, [rax+rcx]
michael@0 44 punpcklbw mm0, mm7
michael@0 45 punpcklbw mm1, mm7
michael@0 46 psubw mm0, mm1
michael@0 47 movq [rdi+rcx*2],mm0
michael@0 48
michael@0 49
michael@0 50 movd mm0, [rsi+rdx*2]
michael@0 51 movd mm1, [rax+rcx*2]
michael@0 52 punpcklbw mm0, mm7
michael@0 53 punpcklbw mm1, mm7
michael@0 54 psubw mm0, mm1
michael@0 55 movq [rdi+rcx*4], mm0
michael@0 56
michael@0 57 lea rsi, [rsi+rdx*2]
michael@0 58 lea rcx, [rcx+rcx*2]
michael@0 59
michael@0 60
michael@0 61
michael@0 62 movd mm0, [rsi+rdx]
michael@0 63 movd mm1, [rax+rcx]
michael@0 64 punpcklbw mm0, mm7
michael@0 65 punpcklbw mm1, mm7
michael@0 66 psubw mm0, mm1
michael@0 67 movq [rdi+rcx*2], mm0
michael@0 68
michael@0 69 ; begin epilog
michael@0 70 pop rdi
michael@0 71 pop rsi
michael@0 72 UNSHADOW_ARGS
michael@0 73 pop rbp
michael@0 74 ret
michael@0 75
michael@0 76 ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
michael@0 77 ;unsigned char *pred, int pred_stride)
michael@0 78 global sym(vp8_subtract_mby_mmx) PRIVATE
michael@0 79 sym(vp8_subtract_mby_mmx):
michael@0 80 push rbp
michael@0 81 mov rbp, rsp
michael@0 82 SHADOW_ARGS_TO_STACK 5
michael@0 83 push rsi
michael@0 84 push rdi
michael@0 85 ; end prolog
michael@0 86
michael@0 87 mov rdi, arg(0) ;diff
michael@0 88 mov rsi, arg(1) ;src
michael@0 89 movsxd rdx, dword ptr arg(2);src_stride
michael@0 90 mov rax, arg(3) ;pred
michael@0 91 push rbx
michael@0 92 movsxd rbx, dword ptr arg(4);pred_stride
michael@0 93
michael@0 94 pxor mm0, mm0
michael@0 95 mov rcx, 16
michael@0 96
michael@0 97
michael@0 98 .submby_loop:
michael@0 99 movq mm1, [rsi]
michael@0 100 movq mm3, [rax]
michael@0 101
michael@0 102 movq mm2, mm1
michael@0 103 movq mm4, mm3
michael@0 104
michael@0 105 punpcklbw mm1, mm0
michael@0 106 punpcklbw mm3, mm0
michael@0 107
michael@0 108 punpckhbw mm2, mm0
michael@0 109 punpckhbw mm4, mm0
michael@0 110
michael@0 111 psubw mm1, mm3
michael@0 112 psubw mm2, mm4
michael@0 113
michael@0 114 movq [rdi], mm1
michael@0 115 movq [rdi+8], mm2
michael@0 116
michael@0 117 movq mm1, [rsi+8]
michael@0 118 movq mm3, [rax+8]
michael@0 119
michael@0 120 movq mm2, mm1
michael@0 121 movq mm4, mm3
michael@0 122
michael@0 123 punpcklbw mm1, mm0
michael@0 124 punpcklbw mm3, mm0
michael@0 125
michael@0 126 punpckhbw mm2, mm0
michael@0 127 punpckhbw mm4, mm0
michael@0 128
michael@0 129 psubw mm1, mm3
michael@0 130 psubw mm2, mm4
michael@0 131
michael@0 132 movq [rdi+16], mm1
michael@0 133 movq [rdi+24], mm2
michael@0 134 add rdi, 32
michael@0 135 lea rax, [rax+rbx]
michael@0 136 lea rsi, [rsi+rdx]
michael@0 137 dec rcx
michael@0 138 jnz .submby_loop
michael@0 139
michael@0 140 pop rbx
michael@0 141 pop rdi
michael@0 142 pop rsi
michael@0 143 ; begin epilog
michael@0 144 UNSHADOW_ARGS
michael@0 145 pop rbp
michael@0 146 ret
michael@0 147
michael@0 148
michael@0 149 ;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc,
michael@0 150 ; int src_stride, unsigned char *upred,
michael@0 151 ; unsigned char *vpred, int pred_stride)
michael@0 152
michael@0 153 global sym(vp8_subtract_mbuv_mmx) PRIVATE
michael@0 154 sym(vp8_subtract_mbuv_mmx):
michael@0 155 push rbp
michael@0 156 mov rbp, rsp
michael@0 157 SHADOW_ARGS_TO_STACK 7
michael@0 158 push rsi
michael@0 159 push rdi
michael@0 160 ; end prolog
michael@0 161
michael@0 162 mov rdi, arg(0) ;diff
michael@0 163 mov rsi, arg(1) ;usrc
michael@0 164 movsxd rdx, dword ptr arg(3);src_stride;
michael@0 165 mov rax, arg(4) ;upred
michael@0 166 add rdi, 256*2 ;diff = diff + 256 (shorts)
michael@0 167 mov rcx, 8
michael@0 168 push rbx
michael@0 169 movsxd rbx, dword ptr arg(6);pred_stride
michael@0 170
michael@0 171 pxor mm7, mm7
michael@0 172
michael@0 173 .submbu_loop:
michael@0 174 movq mm0, [rsi]
michael@0 175 movq mm1, [rax]
michael@0 176 movq mm3, mm0
michael@0 177 movq mm4, mm1
michael@0 178 punpcklbw mm0, mm7
michael@0 179 punpcklbw mm1, mm7
michael@0 180 punpckhbw mm3, mm7
michael@0 181 punpckhbw mm4, mm7
michael@0 182 psubw mm0, mm1
michael@0 183 psubw mm3, mm4
michael@0 184 movq [rdi], mm0
michael@0 185 movq [rdi+8], mm3
michael@0 186 add rdi, 16
michael@0 187 add rsi, rdx
michael@0 188 add rax, rbx
michael@0 189
michael@0 190 dec rcx
michael@0 191 jnz .submbu_loop
michael@0 192
michael@0 193 mov rsi, arg(2) ;vsrc
michael@0 194 mov rax, arg(5) ;vpred
michael@0 195 mov rcx, 8
michael@0 196
michael@0 197 .submbv_loop:
michael@0 198 movq mm0, [rsi]
michael@0 199 movq mm1, [rax]
michael@0 200 movq mm3, mm0
michael@0 201 movq mm4, mm1
michael@0 202 punpcklbw mm0, mm7
michael@0 203 punpcklbw mm1, mm7
michael@0 204 punpckhbw mm3, mm7
michael@0 205 punpckhbw mm4, mm7
michael@0 206 psubw mm0, mm1
michael@0 207 psubw mm3, mm4
michael@0 208 movq [rdi], mm0
michael@0 209 movq [rdi+8], mm3
michael@0 210 add rdi, 16
michael@0 211 add rsi, rdx
michael@0 212 add rax, rbx
michael@0 213
michael@0 214 dec rcx
michael@0 215 jnz .submbv_loop
michael@0 216
michael@0 217 pop rbx
michael@0 218 ; begin epilog
michael@0 219 pop rdi
michael@0 220 pop rsi
michael@0 221 UNSHADOW_ARGS
michael@0 222 pop rbp
michael@0 223 ret

mercurial