|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 |
|
12 %include "vpx_ports/x86_abi_support.asm" |
|
13 |
|
14 ;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, |
|
15 ; short *diff, unsigned char *Predictor, |
|
16 ; int pitch); |
|
17 global sym(vp8_subtract_b_mmx_impl) PRIVATE |
|
18 sym(vp8_subtract_b_mmx_impl): |
|
19 push rbp |
|
20 mov rbp, rsp |
|
21 SHADOW_ARGS_TO_STACK 5 |
|
22 push rsi |
|
23 push rdi |
|
24 ; end prolog |
|
25 |
|
26 |
|
27 mov rdi, arg(2) ;diff |
|
28 mov rax, arg(3) ;Predictor |
|
29 mov rsi, arg(0) ;z |
|
30 movsxd rdx, dword ptr arg(1);src_stride; |
|
31 movsxd rcx, dword ptr arg(4);pitch |
|
32 pxor mm7, mm7 |
|
33 |
|
34 movd mm0, [rsi] |
|
35 movd mm1, [rax] |
|
36 punpcklbw mm0, mm7 |
|
37 punpcklbw mm1, mm7 |
|
38 psubw mm0, mm1 |
|
39 movq [rdi], mm0 |
|
40 |
|
41 |
|
42 movd mm0, [rsi+rdx] |
|
43 movd mm1, [rax+rcx] |
|
44 punpcklbw mm0, mm7 |
|
45 punpcklbw mm1, mm7 |
|
46 psubw mm0, mm1 |
|
47 movq [rdi+rcx*2],mm0 |
|
48 |
|
49 |
|
50 movd mm0, [rsi+rdx*2] |
|
51 movd mm1, [rax+rcx*2] |
|
52 punpcklbw mm0, mm7 |
|
53 punpcklbw mm1, mm7 |
|
54 psubw mm0, mm1 |
|
55 movq [rdi+rcx*4], mm0 |
|
56 |
|
57 lea rsi, [rsi+rdx*2] |
|
58 lea rcx, [rcx+rcx*2] |
|
59 |
|
60 |
|
61 |
|
62 movd mm0, [rsi+rdx] |
|
63 movd mm1, [rax+rcx] |
|
64 punpcklbw mm0, mm7 |
|
65 punpcklbw mm1, mm7 |
|
66 psubw mm0, mm1 |
|
67 movq [rdi+rcx*2], mm0 |
|
68 |
|
69 ; begin epilog |
|
70 pop rdi |
|
71 pop rsi |
|
72 UNSHADOW_ARGS |
|
73 pop rbp |
|
74 ret |
|
75 |
|
76 ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, |
|
77 ;unsigned char *pred, int pred_stride) |
|
78 global sym(vp8_subtract_mby_mmx) PRIVATE |
|
79 sym(vp8_subtract_mby_mmx): |
|
80 push rbp |
|
81 mov rbp, rsp |
|
82 SHADOW_ARGS_TO_STACK 5 |
|
83 push rsi |
|
84 push rdi |
|
85 ; end prolog |
|
86 |
|
87 mov rdi, arg(0) ;diff |
|
88 mov rsi, arg(1) ;src |
|
89 movsxd rdx, dword ptr arg(2);src_stride |
|
90 mov rax, arg(3) ;pred |
|
91 push rbx |
|
92 movsxd rbx, dword ptr arg(4);pred_stride |
|
93 |
|
94 pxor mm0, mm0 |
|
95 mov rcx, 16 |
|
96 |
|
97 |
|
98 .submby_loop: |
|
99 movq mm1, [rsi] |
|
100 movq mm3, [rax] |
|
101 |
|
102 movq mm2, mm1 |
|
103 movq mm4, mm3 |
|
104 |
|
105 punpcklbw mm1, mm0 |
|
106 punpcklbw mm3, mm0 |
|
107 |
|
108 punpckhbw mm2, mm0 |
|
109 punpckhbw mm4, mm0 |
|
110 |
|
111 psubw mm1, mm3 |
|
112 psubw mm2, mm4 |
|
113 |
|
114 movq [rdi], mm1 |
|
115 movq [rdi+8], mm2 |
|
116 |
|
117 movq mm1, [rsi+8] |
|
118 movq mm3, [rax+8] |
|
119 |
|
120 movq mm2, mm1 |
|
121 movq mm4, mm3 |
|
122 |
|
123 punpcklbw mm1, mm0 |
|
124 punpcklbw mm3, mm0 |
|
125 |
|
126 punpckhbw mm2, mm0 |
|
127 punpckhbw mm4, mm0 |
|
128 |
|
129 psubw mm1, mm3 |
|
130 psubw mm2, mm4 |
|
131 |
|
132 movq [rdi+16], mm1 |
|
133 movq [rdi+24], mm2 |
|
134 add rdi, 32 |
|
135 lea rax, [rax+rbx] |
|
136 lea rsi, [rsi+rdx] |
|
137 dec rcx |
|
138 jnz .submby_loop |
|
139 |
|
140 pop rbx |
|
141 pop rdi |
|
142 pop rsi |
|
143 ; begin epilog |
|
144 UNSHADOW_ARGS |
|
145 pop rbp |
|
146 ret |
|
147 |
|
148 |
|
149 ;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, |
|
150 ; int src_stride, unsigned char *upred, |
|
151 ; unsigned char *vpred, int pred_stride) |
|
152 |
|
153 global sym(vp8_subtract_mbuv_mmx) PRIVATE |
|
154 sym(vp8_subtract_mbuv_mmx): |
|
155 push rbp |
|
156 mov rbp, rsp |
|
157 SHADOW_ARGS_TO_STACK 7 |
|
158 push rsi |
|
159 push rdi |
|
160 ; end prolog |
|
161 |
|
162 mov rdi, arg(0) ;diff |
|
163 mov rsi, arg(1) ;usrc |
|
164 movsxd rdx, dword ptr arg(3);src_stride; |
|
165 mov rax, arg(4) ;upred |
|
166 add rdi, 256*2 ;diff = diff + 256 (shorts) |
|
167 mov rcx, 8 |
|
168 push rbx |
|
169 movsxd rbx, dword ptr arg(6);pred_stride |
|
170 |
|
171 pxor mm7, mm7 |
|
172 |
|
173 .submbu_loop: |
|
174 movq mm0, [rsi] |
|
175 movq mm1, [rax] |
|
176 movq mm3, mm0 |
|
177 movq mm4, mm1 |
|
178 punpcklbw mm0, mm7 |
|
179 punpcklbw mm1, mm7 |
|
180 punpckhbw mm3, mm7 |
|
181 punpckhbw mm4, mm7 |
|
182 psubw mm0, mm1 |
|
183 psubw mm3, mm4 |
|
184 movq [rdi], mm0 |
|
185 movq [rdi+8], mm3 |
|
186 add rdi, 16 |
|
187 add rsi, rdx |
|
188 add rax, rbx |
|
189 |
|
190 dec rcx |
|
191 jnz .submbu_loop |
|
192 |
|
193 mov rsi, arg(2) ;vsrc |
|
194 mov rax, arg(5) ;vpred |
|
195 mov rcx, 8 |
|
196 |
|
197 .submbv_loop: |
|
198 movq mm0, [rsi] |
|
199 movq mm1, [rax] |
|
200 movq mm3, mm0 |
|
201 movq mm4, mm1 |
|
202 punpcklbw mm0, mm7 |
|
203 punpcklbw mm1, mm7 |
|
204 punpckhbw mm3, mm7 |
|
205 punpckhbw mm4, mm7 |
|
206 psubw mm0, mm1 |
|
207 psubw mm3, mm4 |
|
208 movq [rdi], mm0 |
|
209 movq [rdi+8], mm3 |
|
210 add rdi, 16 |
|
211 add rsi, rdx |
|
212 add rax, rbx |
|
213 |
|
214 dec rcx |
|
215 jnz .submbv_loop |
|
216 |
|
217 pop rbx |
|
218 ; begin epilog |
|
219 pop rdi |
|
220 pop rsi |
|
221 UNSHADOW_ARGS |
|
222 pop rbp |
|
223 ret |