media/libvpx/vp8/encoder/x86/subtract_sse2.asm

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 ;
michael@0 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 ;
michael@0 4 ; Use of this source code is governed by a BSD-style license
michael@0 5 ; that can be found in the LICENSE file in the root of the source
michael@0 6 ; tree. An additional intellectual property rights grant can be found
michael@0 7 ; in the file PATENTS. All contributing project authors may
michael@0 8 ; be found in the AUTHORS file in the root of the source tree.
michael@0 9 ;
michael@0 10
michael@0 11
michael@0 12 %include "vpx_ports/x86_abi_support.asm"
michael@0 13
michael@0 14 ;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
michael@0 15 ; short *diff, unsigned char *Predictor,
michael@0 16 ; int pitch);
michael@0 17 global sym(vp8_subtract_b_sse2_impl) PRIVATE
michael@0 18 sym(vp8_subtract_b_sse2_impl):
michael@0 19 push rbp
michael@0 20 mov rbp, rsp
michael@0 21 SHADOW_ARGS_TO_STACK 5
michael@0 22 GET_GOT rbx
michael@0 23 push rsi
michael@0 24 push rdi
michael@0 25 ; end prolog
michael@0 26
michael@0 27 mov rdi, arg(2) ;diff
michael@0 28 mov rax, arg(3) ;Predictor
michael@0 29 mov rsi, arg(0) ;z
michael@0 30 movsxd rdx, dword ptr arg(1);src_stride;
michael@0 31 movsxd rcx, dword ptr arg(4);pitch
michael@0 32 pxor mm7, mm7
michael@0 33
michael@0 34 movd mm0, [rsi]
michael@0 35 movd mm1, [rax]
michael@0 36 punpcklbw mm0, mm7
michael@0 37 punpcklbw mm1, mm7
michael@0 38 psubw mm0, mm1
michael@0 39 movq MMWORD PTR [rdi], mm0
michael@0 40
michael@0 41 movd mm0, [rsi+rdx]
michael@0 42 movd mm1, [rax+rcx]
michael@0 43 punpcklbw mm0, mm7
michael@0 44 punpcklbw mm1, mm7
michael@0 45 psubw mm0, mm1
michael@0 46 movq MMWORD PTR [rdi+rcx*2], mm0
michael@0 47
michael@0 48 movd mm0, [rsi+rdx*2]
michael@0 49 movd mm1, [rax+rcx*2]
michael@0 50 punpcklbw mm0, mm7
michael@0 51 punpcklbw mm1, mm7
michael@0 52 psubw mm0, mm1
michael@0 53 movq MMWORD PTR [rdi+rcx*4], mm0
michael@0 54
michael@0 55 lea rsi, [rsi+rdx*2]
michael@0 56 lea rcx, [rcx+rcx*2]
michael@0 57
michael@0 58 movd mm0, [rsi+rdx]
michael@0 59 movd mm1, [rax+rcx]
michael@0 60 punpcklbw mm0, mm7
michael@0 61 punpcklbw mm1, mm7
michael@0 62 psubw mm0, mm1
michael@0 63 movq MMWORD PTR [rdi+rcx*2], mm0
michael@0 64
michael@0 65 ; begin epilog
michael@0 66 pop rdi
michael@0 67 pop rsi
michael@0 68 RESTORE_GOT
michael@0 69 UNSHADOW_ARGS
michael@0 70 pop rbp
michael@0 71 ret
michael@0 72
michael@0 73
michael@0 74 ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
michael@0 75 ;unsigned char *pred, int pred_stride)
michael@0 76 global sym(vp8_subtract_mby_sse2) PRIVATE
michael@0 77 sym(vp8_subtract_mby_sse2):
michael@0 78 push rbp
michael@0 79 mov rbp, rsp
michael@0 80 SHADOW_ARGS_TO_STACK 5
michael@0 81 GET_GOT rbx
michael@0 82 push rsi
michael@0 83 push rdi
michael@0 84 ; end prolog
michael@0 85
michael@0 86 mov rdi, arg(0) ;diff
michael@0 87 mov rsi, arg(1) ;src
michael@0 88 movsxd rdx, dword ptr arg(2);src_stride
michael@0 89 mov rax, arg(3) ;pred
michael@0 90 movdqa xmm4, [GLOBAL(t80)]
michael@0 91 push rbx
michael@0 92 mov rcx, 8 ; do two lines at one time
michael@0 93 movsxd rbx, dword ptr arg(4);pred_stride
michael@0 94
michael@0 95 .submby_loop:
michael@0 96 movdqa xmm0, [rsi] ; src
michael@0 97 movdqa xmm1, [rax] ; pred
michael@0 98
michael@0 99 movdqa xmm2, xmm0
michael@0 100 psubb xmm0, xmm1
michael@0 101
michael@0 102 pxor xmm1, xmm4 ;convert to signed values
michael@0 103 pxor xmm2, xmm4
michael@0 104 pcmpgtb xmm1, xmm2 ; obtain sign information
michael@0 105
michael@0 106 movdqa xmm2, xmm0
michael@0 107 punpcklbw xmm0, xmm1 ; put sign back to subtraction
michael@0 108 punpckhbw xmm2, xmm1 ; put sign back to subtraction
michael@0 109
michael@0 110 movdqa xmm3, [rsi + rdx]
michael@0 111 movdqa xmm5, [rax + rbx]
michael@0 112
michael@0 113 lea rsi, [rsi+rdx*2]
michael@0 114 lea rax, [rax+rbx*2]
michael@0 115
michael@0 116 movdqa [rdi], xmm0
michael@0 117 movdqa [rdi +16], xmm2
michael@0 118
michael@0 119 movdqa xmm1, xmm3
michael@0 120 psubb xmm3, xmm5
michael@0 121
michael@0 122 pxor xmm5, xmm4 ;convert to signed values
michael@0 123 pxor xmm1, xmm4
michael@0 124 pcmpgtb xmm5, xmm1 ; obtain sign information
michael@0 125
michael@0 126 movdqa xmm1, xmm3
michael@0 127 punpcklbw xmm3, xmm5 ; put sign back to subtraction
michael@0 128 punpckhbw xmm1, xmm5 ; put sign back to subtraction
michael@0 129
michael@0 130 movdqa [rdi +32], xmm3
michael@0 131 movdqa [rdi +48], xmm1
michael@0 132
michael@0 133 add rdi, 64
michael@0 134 dec rcx
michael@0 135 jnz .submby_loop
michael@0 136
michael@0 137 pop rbx
michael@0 138 pop rdi
michael@0 139 pop rsi
michael@0 140 ; begin epilog
michael@0 141 RESTORE_GOT
michael@0 142 UNSHADOW_ARGS
michael@0 143 pop rbp
michael@0 144 ret
michael@0 145
michael@0 146 ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
michael@0 147 ; int src_stride, unsigned char *upred,
michael@0 148 ; unsigned char *vpred, int pred_stride)
michael@0 149 global sym(vp8_subtract_mbuv_sse2) PRIVATE
michael@0 150 sym(vp8_subtract_mbuv_sse2):
michael@0 151 push rbp
michael@0 152 mov rbp, rsp
michael@0 153 SHADOW_ARGS_TO_STACK 7
michael@0 154 GET_GOT rbx
michael@0 155 push rsi
michael@0 156 push rdi
michael@0 157 ; end prolog
michael@0 158
michael@0 159 movdqa xmm4, [GLOBAL(t80)]
michael@0 160 mov rdi, arg(0) ;diff
michael@0 161 mov rsi, arg(1) ;usrc
michael@0 162 movsxd rdx, dword ptr arg(3);src_stride;
michael@0 163 mov rax, arg(4) ;upred
michael@0 164 add rdi, 256*2 ;diff = diff + 256 (shorts)
michael@0 165 mov rcx, 4
michael@0 166 push rbx
michael@0 167 movsxd rbx, dword ptr arg(6);pred_stride
michael@0 168
michael@0 169 ;u
michael@0 170 .submbu_loop:
michael@0 171 movq xmm0, [rsi] ; src
michael@0 172 movq xmm2, [rsi+rdx] ; src -- next line
michael@0 173 movq xmm1, [rax] ; pred
michael@0 174 movq xmm3, [rax+rbx] ; pred -- next line
michael@0 175 lea rsi, [rsi + rdx*2]
michael@0 176 lea rax, [rax + rbx*2]
michael@0 177
michael@0 178 punpcklqdq xmm0, xmm2
michael@0 179 punpcklqdq xmm1, xmm3
michael@0 180
michael@0 181 movdqa xmm2, xmm0
michael@0 182 psubb xmm0, xmm1 ; subtraction with sign missed
michael@0 183
michael@0 184 pxor xmm1, xmm4 ;convert to signed values
michael@0 185 pxor xmm2, xmm4
michael@0 186 pcmpgtb xmm1, xmm2 ; obtain sign information
michael@0 187
michael@0 188 movdqa xmm2, xmm0
michael@0 189 movdqa xmm3, xmm1
michael@0 190 punpcklbw xmm0, xmm1 ; put sign back to subtraction
michael@0 191 punpckhbw xmm2, xmm3 ; put sign back to subtraction
michael@0 192
michael@0 193 movdqa [rdi], xmm0 ; store difference
michael@0 194 movdqa [rdi +16], xmm2 ; store difference
michael@0 195 add rdi, 32
michael@0 196 sub rcx, 1
michael@0 197 jnz .submbu_loop
michael@0 198
michael@0 199 mov rsi, arg(2) ;vsrc
michael@0 200 mov rax, arg(5) ;vpred
michael@0 201 mov rcx, 4
michael@0 202
michael@0 203 ;v
michael@0 204 .submbv_loop:
michael@0 205 movq xmm0, [rsi] ; src
michael@0 206 movq xmm2, [rsi+rdx] ; src -- next line
michael@0 207 movq xmm1, [rax] ; pred
michael@0 208 movq xmm3, [rax+rbx] ; pred -- next line
michael@0 209 lea rsi, [rsi + rdx*2]
michael@0 210 lea rax, [rax + rbx*2]
michael@0 211
michael@0 212 punpcklqdq xmm0, xmm2
michael@0 213 punpcklqdq xmm1, xmm3
michael@0 214
michael@0 215 movdqa xmm2, xmm0
michael@0 216 psubb xmm0, xmm1 ; subtraction with sign missed
michael@0 217
michael@0 218 pxor xmm1, xmm4 ;convert to signed values
michael@0 219 pxor xmm2, xmm4
michael@0 220 pcmpgtb xmm1, xmm2 ; obtain sign information
michael@0 221
michael@0 222 movdqa xmm2, xmm0
michael@0 223 movdqa xmm3, xmm1
michael@0 224 punpcklbw xmm0, xmm1 ; put sign back to subtraction
michael@0 225 punpckhbw xmm2, xmm3 ; put sign back to subtraction
michael@0 226
michael@0 227 movdqa [rdi], xmm0 ; store difference
michael@0 228 movdqa [rdi +16], xmm2 ; store difference
michael@0 229 add rdi, 32
michael@0 230 sub rcx, 1
michael@0 231 jnz .submbv_loop
michael@0 232
michael@0 233 pop rbx
michael@0 234 ; begin epilog
michael@0 235 pop rdi
michael@0 236 pop rsi
michael@0 237 RESTORE_GOT
michael@0 238 UNSHADOW_ARGS
michael@0 239 pop rbp
michael@0 240 ret
michael@0 241
michael@0 242 SECTION_RODATA
michael@0 243 align 16
michael@0 244 t80:
michael@0 245 times 16 db 0x80

mercurial