media/libvpx/vp8/encoder/x86/subtract_sse2.asm

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:f8782857773b
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13
14 ;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
15 ; short *diff, unsigned char *Predictor,
16 ; int pitch);
17 global sym(vp8_subtract_b_sse2_impl) PRIVATE
18 sym(vp8_subtract_b_sse2_impl):
19 push rbp
20 mov rbp, rsp
21 SHADOW_ARGS_TO_STACK 5
22 GET_GOT rbx
23 push rsi
24 push rdi
25 ; end prolog
26
27 mov rdi, arg(2) ;diff
28 mov rax, arg(3) ;Predictor
29 mov rsi, arg(0) ;z
30 movsxd rdx, dword ptr arg(1);src_stride;
31 movsxd rcx, dword ptr arg(4);pitch
32 pxor mm7, mm7
33
34 movd mm0, [rsi]
35 movd mm1, [rax]
36 punpcklbw mm0, mm7
37 punpcklbw mm1, mm7
38 psubw mm0, mm1
39 movq MMWORD PTR [rdi], mm0
40
41 movd mm0, [rsi+rdx]
42 movd mm1, [rax+rcx]
43 punpcklbw mm0, mm7
44 punpcklbw mm1, mm7
45 psubw mm0, mm1
46 movq MMWORD PTR [rdi+rcx*2], mm0
47
48 movd mm0, [rsi+rdx*2]
49 movd mm1, [rax+rcx*2]
50 punpcklbw mm0, mm7
51 punpcklbw mm1, mm7
52 psubw mm0, mm1
53 movq MMWORD PTR [rdi+rcx*4], mm0
54
55 lea rsi, [rsi+rdx*2]
56 lea rcx, [rcx+rcx*2]
57
58 movd mm0, [rsi+rdx]
59 movd mm1, [rax+rcx]
60 punpcklbw mm0, mm7
61 punpcklbw mm1, mm7
62 psubw mm0, mm1
63 movq MMWORD PTR [rdi+rcx*2], mm0
64
65 ; begin epilog
66 pop rdi
67 pop rsi
68 RESTORE_GOT
69 UNSHADOW_ARGS
70 pop rbp
71 ret
72
73
74 ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
75 ;unsigned char *pred, int pred_stride)
76 global sym(vp8_subtract_mby_sse2) PRIVATE
77 sym(vp8_subtract_mby_sse2):
78 push rbp
79 mov rbp, rsp
80 SHADOW_ARGS_TO_STACK 5
81 GET_GOT rbx
82 push rsi
83 push rdi
84 ; end prolog
85
86 mov rdi, arg(0) ;diff
87 mov rsi, arg(1) ;src
88 movsxd rdx, dword ptr arg(2);src_stride
89 mov rax, arg(3) ;pred
90 movdqa xmm4, [GLOBAL(t80)]
91 push rbx
92 mov rcx, 8 ; do two lines at one time
93 movsxd rbx, dword ptr arg(4);pred_stride
94
95 .submby_loop:
96 movdqa xmm0, [rsi] ; src
97 movdqa xmm1, [rax] ; pred
98
99 movdqa xmm2, xmm0
100 psubb xmm0, xmm1
101
102 pxor xmm1, xmm4 ;convert to signed values
103 pxor xmm2, xmm4
104 pcmpgtb xmm1, xmm2 ; obtain sign information
105
106 movdqa xmm2, xmm0
107 punpcklbw xmm0, xmm1 ; put sign back to subtraction
108 punpckhbw xmm2, xmm1 ; put sign back to subtraction
109
110 movdqa xmm3, [rsi + rdx]
111 movdqa xmm5, [rax + rbx]
112
113 lea rsi, [rsi+rdx*2]
114 lea rax, [rax+rbx*2]
115
116 movdqa [rdi], xmm0
117 movdqa [rdi +16], xmm2
118
119 movdqa xmm1, xmm3
120 psubb xmm3, xmm5
121
122 pxor xmm5, xmm4 ;convert to signed values
123 pxor xmm1, xmm4
124 pcmpgtb xmm5, xmm1 ; obtain sign information
125
126 movdqa xmm1, xmm3
127 punpcklbw xmm3, xmm5 ; put sign back to subtraction
128 punpckhbw xmm1, xmm5 ; put sign back to subtraction
129
130 movdqa [rdi +32], xmm3
131 movdqa [rdi +48], xmm1
132
133 add rdi, 64
134 dec rcx
135 jnz .submby_loop
136
137 pop rbx
138 pop rdi
139 pop rsi
140 ; begin epilog
141 RESTORE_GOT
142 UNSHADOW_ARGS
143 pop rbp
144 ret
145
146 ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
147 ; int src_stride, unsigned char *upred,
148 ; unsigned char *vpred, int pred_stride)
149 global sym(vp8_subtract_mbuv_sse2) PRIVATE
150 sym(vp8_subtract_mbuv_sse2):
151 push rbp
152 mov rbp, rsp
153 SHADOW_ARGS_TO_STACK 7
154 GET_GOT rbx
155 push rsi
156 push rdi
157 ; end prolog
158
159 movdqa xmm4, [GLOBAL(t80)]
160 mov rdi, arg(0) ;diff
161 mov rsi, arg(1) ;usrc
162 movsxd rdx, dword ptr arg(3);src_stride;
163 mov rax, arg(4) ;upred
164 add rdi, 256*2 ;diff = diff + 256 (shorts)
165 mov rcx, 4
166 push rbx
167 movsxd rbx, dword ptr arg(6);pred_stride
168
169 ;u
170 .submbu_loop:
171 movq xmm0, [rsi] ; src
172 movq xmm2, [rsi+rdx] ; src -- next line
173 movq xmm1, [rax] ; pred
174 movq xmm3, [rax+rbx] ; pred -- next line
175 lea rsi, [rsi + rdx*2]
176 lea rax, [rax + rbx*2]
177
178 punpcklqdq xmm0, xmm2
179 punpcklqdq xmm1, xmm3
180
181 movdqa xmm2, xmm0
182 psubb xmm0, xmm1 ; subtraction with sign missed
183
184 pxor xmm1, xmm4 ;convert to signed values
185 pxor xmm2, xmm4
186 pcmpgtb xmm1, xmm2 ; obtain sign information
187
188 movdqa xmm2, xmm0
189 movdqa xmm3, xmm1
190 punpcklbw xmm0, xmm1 ; put sign back to subtraction
191 punpckhbw xmm2, xmm3 ; put sign back to subtraction
192
193 movdqa [rdi], xmm0 ; store difference
194 movdqa [rdi +16], xmm2 ; store difference
195 add rdi, 32
196 sub rcx, 1
197 jnz .submbu_loop
198
199 mov rsi, arg(2) ;vsrc
200 mov rax, arg(5) ;vpred
201 mov rcx, 4
202
203 ;v
204 .submbv_loop:
205 movq xmm0, [rsi] ; src
206 movq xmm2, [rsi+rdx] ; src -- next line
207 movq xmm1, [rax] ; pred
208 movq xmm3, [rax+rbx] ; pred -- next line
209 lea rsi, [rsi + rdx*2]
210 lea rax, [rax + rbx*2]
211
212 punpcklqdq xmm0, xmm2
213 punpcklqdq xmm1, xmm3
214
215 movdqa xmm2, xmm0
216 psubb xmm0, xmm1 ; subtraction with sign missed
217
218 pxor xmm1, xmm4 ;convert to signed values
219 pxor xmm2, xmm4
220 pcmpgtb xmm1, xmm2 ; obtain sign information
221
222 movdqa xmm2, xmm0
223 movdqa xmm3, xmm1
224 punpcklbw xmm0, xmm1 ; put sign back to subtraction
225 punpckhbw xmm2, xmm3 ; put sign back to subtraction
226
227 movdqa [rdi], xmm0 ; store difference
228 movdqa [rdi +16], xmm2 ; store difference
229 add rdi, 32
230 sub rcx, 1
231 jnz .submbv_loop
232
233 pop rbx
234 ; begin epilog
235 pop rdi
236 pop rsi
237 RESTORE_GOT
238 UNSHADOW_ARGS
239 pop rbp
240 ret
241
242 SECTION_RODATA
243 align 16
244 t80:
245 times 16 db 0x80

mercurial