|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 |
|
12 %include "vpx_ports/x86_abi_support.asm" |
|
13 |
|
14 ;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, |
|
15 ; short *diff, unsigned char *Predictor, |
|
16 ; int pitch); |
|
17 global sym(vp8_subtract_b_sse2_impl) PRIVATE |
|
18 sym(vp8_subtract_b_sse2_impl): |
|
19 push rbp |
|
20 mov rbp, rsp |
|
21 SHADOW_ARGS_TO_STACK 5 |
|
22 GET_GOT rbx |
|
23 push rsi |
|
24 push rdi |
|
25 ; end prolog |
|
26 |
|
27 mov rdi, arg(2) ;diff |
|
28 mov rax, arg(3) ;Predictor |
|
29 mov rsi, arg(0) ;z |
|
30 movsxd rdx, dword ptr arg(1);src_stride; |
|
31 movsxd rcx, dword ptr arg(4);pitch |
|
32 pxor mm7, mm7 |
|
33 |
|
34 movd mm0, [rsi] |
|
35 movd mm1, [rax] |
|
36 punpcklbw mm0, mm7 |
|
37 punpcklbw mm1, mm7 |
|
38 psubw mm0, mm1 |
|
39 movq MMWORD PTR [rdi], mm0 |
|
40 |
|
41 movd mm0, [rsi+rdx] |
|
42 movd mm1, [rax+rcx] |
|
43 punpcklbw mm0, mm7 |
|
44 punpcklbw mm1, mm7 |
|
45 psubw mm0, mm1 |
|
46 movq MMWORD PTR [rdi+rcx*2], mm0 |
|
47 |
|
48 movd mm0, [rsi+rdx*2] |
|
49 movd mm1, [rax+rcx*2] |
|
50 punpcklbw mm0, mm7 |
|
51 punpcklbw mm1, mm7 |
|
52 psubw mm0, mm1 |
|
53 movq MMWORD PTR [rdi+rcx*4], mm0 |
|
54 |
|
55 lea rsi, [rsi+rdx*2] |
|
56 lea rcx, [rcx+rcx*2] |
|
57 |
|
58 movd mm0, [rsi+rdx] |
|
59 movd mm1, [rax+rcx] |
|
60 punpcklbw mm0, mm7 |
|
61 punpcklbw mm1, mm7 |
|
62 psubw mm0, mm1 |
|
63 movq MMWORD PTR [rdi+rcx*2], mm0 |
|
64 |
|
65 ; begin epilog |
|
66 pop rdi |
|
67 pop rsi |
|
68 RESTORE_GOT |
|
69 UNSHADOW_ARGS |
|
70 pop rbp |
|
71 ret |
|
72 |
|
73 |
|
74 ;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, |
|
75 ;unsigned char *pred, int pred_stride) |
|
76 global sym(vp8_subtract_mby_sse2) PRIVATE |
|
77 sym(vp8_subtract_mby_sse2): |
|
78 push rbp |
|
79 mov rbp, rsp |
|
80 SHADOW_ARGS_TO_STACK 5 |
|
81 GET_GOT rbx |
|
82 push rsi |
|
83 push rdi |
|
84 ; end prolog |
|
85 |
|
86 mov rdi, arg(0) ;diff |
|
87 mov rsi, arg(1) ;src |
|
88 movsxd rdx, dword ptr arg(2);src_stride |
|
89 mov rax, arg(3) ;pred |
|
90 movdqa xmm4, [GLOBAL(t80)] |
|
91 push rbx |
|
92 mov rcx, 8 ; do two lines at one time |
|
93 movsxd rbx, dword ptr arg(4);pred_stride |
|
94 |
|
95 .submby_loop: |
|
96 movdqa xmm0, [rsi] ; src |
|
97 movdqa xmm1, [rax] ; pred |
|
98 |
|
99 movdqa xmm2, xmm0 |
|
100 psubb xmm0, xmm1 |
|
101 |
|
102 pxor xmm1, xmm4 ;convert to signed values |
|
103 pxor xmm2, xmm4 |
|
104 pcmpgtb xmm1, xmm2 ; obtain sign information |
|
105 |
|
106 movdqa xmm2, xmm0 |
|
107 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
|
108 punpckhbw xmm2, xmm1 ; put sign back to subtraction |
|
109 |
|
110 movdqa xmm3, [rsi + rdx] |
|
111 movdqa xmm5, [rax + rbx] |
|
112 |
|
113 lea rsi, [rsi+rdx*2] |
|
114 lea rax, [rax+rbx*2] |
|
115 |
|
116 movdqa [rdi], xmm0 |
|
117 movdqa [rdi +16], xmm2 |
|
118 |
|
119 movdqa xmm1, xmm3 |
|
120 psubb xmm3, xmm5 |
|
121 |
|
122 pxor xmm5, xmm4 ;convert to signed values |
|
123 pxor xmm1, xmm4 |
|
124 pcmpgtb xmm5, xmm1 ; obtain sign information |
|
125 |
|
126 movdqa xmm1, xmm3 |
|
127 punpcklbw xmm3, xmm5 ; put sign back to subtraction |
|
128 punpckhbw xmm1, xmm5 ; put sign back to subtraction |
|
129 |
|
130 movdqa [rdi +32], xmm3 |
|
131 movdqa [rdi +48], xmm1 |
|
132 |
|
133 add rdi, 64 |
|
134 dec rcx |
|
135 jnz .submby_loop |
|
136 |
|
137 pop rbx |
|
138 pop rdi |
|
139 pop rsi |
|
140 ; begin epilog |
|
141 RESTORE_GOT |
|
142 UNSHADOW_ARGS |
|
143 pop rbp |
|
144 ret |
|
145 |
|
146 ;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, |
|
147 ; int src_stride, unsigned char *upred, |
|
148 ; unsigned char *vpred, int pred_stride) |
|
149 global sym(vp8_subtract_mbuv_sse2) PRIVATE |
|
150 sym(vp8_subtract_mbuv_sse2): |
|
151 push rbp |
|
152 mov rbp, rsp |
|
153 SHADOW_ARGS_TO_STACK 7 |
|
154 GET_GOT rbx |
|
155 push rsi |
|
156 push rdi |
|
157 ; end prolog |
|
158 |
|
159 movdqa xmm4, [GLOBAL(t80)] |
|
160 mov rdi, arg(0) ;diff |
|
161 mov rsi, arg(1) ;usrc |
|
162 movsxd rdx, dword ptr arg(3);src_stride; |
|
163 mov rax, arg(4) ;upred |
|
164 add rdi, 256*2 ;diff = diff + 256 (shorts) |
|
165 mov rcx, 4 |
|
166 push rbx |
|
167 movsxd rbx, dword ptr arg(6);pred_stride |
|
168 |
|
169 ;u |
|
170 .submbu_loop: |
|
171 movq xmm0, [rsi] ; src |
|
172 movq xmm2, [rsi+rdx] ; src -- next line |
|
173 movq xmm1, [rax] ; pred |
|
174 movq xmm3, [rax+rbx] ; pred -- next line |
|
175 lea rsi, [rsi + rdx*2] |
|
176 lea rax, [rax + rbx*2] |
|
177 |
|
178 punpcklqdq xmm0, xmm2 |
|
179 punpcklqdq xmm1, xmm3 |
|
180 |
|
181 movdqa xmm2, xmm0 |
|
182 psubb xmm0, xmm1 ; subtraction with sign missed |
|
183 |
|
184 pxor xmm1, xmm4 ;convert to signed values |
|
185 pxor xmm2, xmm4 |
|
186 pcmpgtb xmm1, xmm2 ; obtain sign information |
|
187 |
|
188 movdqa xmm2, xmm0 |
|
189 movdqa xmm3, xmm1 |
|
190 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
|
191 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
|
192 |
|
193 movdqa [rdi], xmm0 ; store difference |
|
194 movdqa [rdi +16], xmm2 ; store difference |
|
195 add rdi, 32 |
|
196 sub rcx, 1 |
|
197 jnz .submbu_loop |
|
198 |
|
199 mov rsi, arg(2) ;vsrc |
|
200 mov rax, arg(5) ;vpred |
|
201 mov rcx, 4 |
|
202 |
|
203 ;v |
|
204 .submbv_loop: |
|
205 movq xmm0, [rsi] ; src |
|
206 movq xmm2, [rsi+rdx] ; src -- next line |
|
207 movq xmm1, [rax] ; pred |
|
208 movq xmm3, [rax+rbx] ; pred -- next line |
|
209 lea rsi, [rsi + rdx*2] |
|
210 lea rax, [rax + rbx*2] |
|
211 |
|
212 punpcklqdq xmm0, xmm2 |
|
213 punpcklqdq xmm1, xmm3 |
|
214 |
|
215 movdqa xmm2, xmm0 |
|
216 psubb xmm0, xmm1 ; subtraction with sign missed |
|
217 |
|
218 pxor xmm1, xmm4 ;convert to signed values |
|
219 pxor xmm2, xmm4 |
|
220 pcmpgtb xmm1, xmm2 ; obtain sign information |
|
221 |
|
222 movdqa xmm2, xmm0 |
|
223 movdqa xmm3, xmm1 |
|
224 punpcklbw xmm0, xmm1 ; put sign back to subtraction |
|
225 punpckhbw xmm2, xmm3 ; put sign back to subtraction |
|
226 |
|
227 movdqa [rdi], xmm0 ; store difference |
|
228 movdqa [rdi +16], xmm2 ; store difference |
|
229 add rdi, 32 |
|
230 sub rcx, 1 |
|
231 jnz .submbv_loop |
|
232 |
|
233 pop rbx |
|
234 ; begin epilog |
|
235 pop rdi |
|
236 pop rsi |
|
237 RESTORE_GOT |
|
238 UNSHADOW_ARGS |
|
239 pop rbp |
|
240 ret |
|
241 |
|
242 SECTION_RODATA |
|
243 align 16 |
|
244 t80: |
|
245 times 16 db 0x80 |