|
1 ; |
|
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 ; |
|
4 ; Use of this source code is governed by a BSD-style license |
|
5 ; that can be found in the LICENSE file in the root of the source |
|
6 ; tree. An additional intellectual property rights grant can be found |
|
7 ; in the file PATENTS. All contributing project authors may |
|
8 ; be found in the AUTHORS file in the root of the source tree. |
|
9 ; |
|
10 |
|
11 |
|
12 %include "vpx_ports/x86_abi_support.asm" |
|
13 |
|
14 |
|
15 ;void copy_mem8x8_mmx( |
|
16 ; unsigned char *src, |
|
17 ; int src_stride, |
|
18 ; unsigned char *dst, |
|
19 ; int dst_stride |
|
20 ; ) |
|
21 global sym(vp8_copy_mem8x8_mmx) PRIVATE |
|
22 sym(vp8_copy_mem8x8_mmx): |
|
23 push rbp |
|
24 mov rbp, rsp |
|
25 SHADOW_ARGS_TO_STACK 4 |
|
26 push rsi |
|
27 push rdi |
|
28 ; end prolog |
|
29 |
|
30 mov rsi, arg(0) ;src; |
|
31 movq mm0, [rsi] |
|
32 |
|
33 movsxd rax, dword ptr arg(1) ;src_stride; |
|
34 mov rdi, arg(2) ;dst; |
|
35 |
|
36 movq mm1, [rsi+rax] |
|
37 movq mm2, [rsi+rax*2] |
|
38 |
|
39 movsxd rcx, dword ptr arg(3) ;dst_stride |
|
40 lea rsi, [rsi+rax*2] |
|
41 |
|
42 movq [rdi], mm0 |
|
43 add rsi, rax |
|
44 |
|
45 movq [rdi+rcx], mm1 |
|
46 movq [rdi+rcx*2], mm2 |
|
47 |
|
48 |
|
49 lea rdi, [rdi+rcx*2] |
|
50 movq mm3, [rsi] |
|
51 |
|
52 add rdi, rcx |
|
53 movq mm4, [rsi+rax] |
|
54 |
|
55 movq mm5, [rsi+rax*2] |
|
56 movq [rdi], mm3 |
|
57 |
|
58 lea rsi, [rsi+rax*2] |
|
59 movq [rdi+rcx], mm4 |
|
60 |
|
61 movq [rdi+rcx*2], mm5 |
|
62 lea rdi, [rdi+rcx*2] |
|
63 |
|
64 movq mm0, [rsi+rax] |
|
65 movq mm1, [rsi+rax*2] |
|
66 |
|
67 movq [rdi+rcx], mm0 |
|
68 movq [rdi+rcx*2],mm1 |
|
69 |
|
70 ; begin epilog |
|
71 pop rdi |
|
72 pop rsi |
|
73 UNSHADOW_ARGS |
|
74 pop rbp |
|
75 ret |
|
76 |
|
77 |
|
78 ;void copy_mem8x4_mmx( |
|
79 ; unsigned char *src, |
|
80 ; int src_stride, |
|
81 ; unsigned char *dst, |
|
82 ; int dst_stride |
|
83 ; ) |
|
84 global sym(vp8_copy_mem8x4_mmx) PRIVATE |
|
85 sym(vp8_copy_mem8x4_mmx): |
|
86 push rbp |
|
87 mov rbp, rsp |
|
88 SHADOW_ARGS_TO_STACK 4 |
|
89 push rsi |
|
90 push rdi |
|
91 ; end prolog |
|
92 |
|
93 mov rsi, arg(0) ;src; |
|
94 movq mm0, [rsi] |
|
95 |
|
96 movsxd rax, dword ptr arg(1) ;src_stride; |
|
97 mov rdi, arg(2) ;dst; |
|
98 |
|
99 movq mm1, [rsi+rax] |
|
100 movq mm2, [rsi+rax*2] |
|
101 |
|
102 movsxd rcx, dword ptr arg(3) ;dst_stride |
|
103 lea rsi, [rsi+rax*2] |
|
104 |
|
105 movq [rdi], mm0 |
|
106 movq [rdi+rcx], mm1 |
|
107 |
|
108 movq [rdi+rcx*2], mm2 |
|
109 lea rdi, [rdi+rcx*2] |
|
110 |
|
111 movq mm3, [rsi+rax] |
|
112 movq [rdi+rcx], mm3 |
|
113 |
|
114 ; begin epilog |
|
115 pop rdi |
|
116 pop rsi |
|
117 UNSHADOW_ARGS |
|
118 pop rbp |
|
119 ret |
|
120 |
|
121 |
|
122 ;void copy_mem16x16_mmx( |
|
123 ; unsigned char *src, |
|
124 ; int src_stride, |
|
125 ; unsigned char *dst, |
|
126 ; int dst_stride |
|
127 ; ) |
|
128 global sym(vp8_copy_mem16x16_mmx) PRIVATE |
|
129 sym(vp8_copy_mem16x16_mmx): |
|
130 push rbp |
|
131 mov rbp, rsp |
|
132 SHADOW_ARGS_TO_STACK 4 |
|
133 push rsi |
|
134 push rdi |
|
135 ; end prolog |
|
136 |
|
137 mov rsi, arg(0) ;src; |
|
138 movsxd rax, dword ptr arg(1) ;src_stride; |
|
139 |
|
140 mov rdi, arg(2) ;dst; |
|
141 movsxd rcx, dword ptr arg(3) ;dst_stride |
|
142 |
|
143 movq mm0, [rsi] |
|
144 movq mm3, [rsi+8]; |
|
145 |
|
146 movq mm1, [rsi+rax] |
|
147 movq mm4, [rsi+rax+8] |
|
148 |
|
149 movq mm2, [rsi+rax*2] |
|
150 movq mm5, [rsi+rax*2+8] |
|
151 |
|
152 lea rsi, [rsi+rax*2] |
|
153 add rsi, rax |
|
154 |
|
155 movq [rdi], mm0 |
|
156 movq [rdi+8], mm3 |
|
157 |
|
158 movq [rdi+rcx], mm1 |
|
159 movq [rdi+rcx+8], mm4 |
|
160 |
|
161 movq [rdi+rcx*2], mm2 |
|
162 movq [rdi+rcx*2+8], mm5 |
|
163 |
|
164 lea rdi, [rdi+rcx*2] |
|
165 add rdi, rcx |
|
166 |
|
167 movq mm0, [rsi] |
|
168 movq mm3, [rsi+8]; |
|
169 |
|
170 movq mm1, [rsi+rax] |
|
171 movq mm4, [rsi+rax+8] |
|
172 |
|
173 movq mm2, [rsi+rax*2] |
|
174 movq mm5, [rsi+rax*2+8] |
|
175 |
|
176 lea rsi, [rsi+rax*2] |
|
177 add rsi, rax |
|
178 |
|
179 movq [rdi], mm0 |
|
180 movq [rdi+8], mm3 |
|
181 |
|
182 movq [rdi+rcx], mm1 |
|
183 movq [rdi+rcx+8], mm4 |
|
184 |
|
185 movq [rdi+rcx*2], mm2 |
|
186 movq [rdi+rcx*2+8], mm5 |
|
187 |
|
188 lea rdi, [rdi+rcx*2] |
|
189 add rdi, rcx |
|
190 |
|
191 movq mm0, [rsi] |
|
192 movq mm3, [rsi+8]; |
|
193 |
|
194 movq mm1, [rsi+rax] |
|
195 movq mm4, [rsi+rax+8] |
|
196 |
|
197 movq mm2, [rsi+rax*2] |
|
198 movq mm5, [rsi+rax*2+8] |
|
199 |
|
200 lea rsi, [rsi+rax*2] |
|
201 add rsi, rax |
|
202 |
|
203 movq [rdi], mm0 |
|
204 movq [rdi+8], mm3 |
|
205 |
|
206 movq [rdi+rcx], mm1 |
|
207 movq [rdi+rcx+8], mm4 |
|
208 |
|
209 movq [rdi+rcx*2], mm2 |
|
210 movq [rdi+rcx*2+8], mm5 |
|
211 |
|
212 lea rdi, [rdi+rcx*2] |
|
213 add rdi, rcx |
|
214 |
|
215 movq mm0, [rsi] |
|
216 movq mm3, [rsi+8]; |
|
217 |
|
218 movq mm1, [rsi+rax] |
|
219 movq mm4, [rsi+rax+8] |
|
220 |
|
221 movq mm2, [rsi+rax*2] |
|
222 movq mm5, [rsi+rax*2+8] |
|
223 |
|
224 lea rsi, [rsi+rax*2] |
|
225 add rsi, rax |
|
226 |
|
227 movq [rdi], mm0 |
|
228 movq [rdi+8], mm3 |
|
229 |
|
230 movq [rdi+rcx], mm1 |
|
231 movq [rdi+rcx+8], mm4 |
|
232 |
|
233 movq [rdi+rcx*2], mm2 |
|
234 movq [rdi+rcx*2+8], mm5 |
|
235 |
|
236 lea rdi, [rdi+rcx*2] |
|
237 add rdi, rcx |
|
238 |
|
239 movq mm0, [rsi] |
|
240 movq mm3, [rsi+8]; |
|
241 |
|
242 movq mm1, [rsi+rax] |
|
243 movq mm4, [rsi+rax+8] |
|
244 |
|
245 movq mm2, [rsi+rax*2] |
|
246 movq mm5, [rsi+rax*2+8] |
|
247 |
|
248 lea rsi, [rsi+rax*2] |
|
249 add rsi, rax |
|
250 |
|
251 movq [rdi], mm0 |
|
252 movq [rdi+8], mm3 |
|
253 |
|
254 movq [rdi+rcx], mm1 |
|
255 movq [rdi+rcx+8], mm4 |
|
256 |
|
257 movq [rdi+rcx*2], mm2 |
|
258 movq [rdi+rcx*2+8], mm5 |
|
259 |
|
260 lea rdi, [rdi+rcx*2] |
|
261 add rdi, rcx |
|
262 |
|
263 movq mm0, [rsi] |
|
264 movq mm3, [rsi+8]; |
|
265 |
|
266 movq [rdi], mm0 |
|
267 movq [rdi+8], mm3 |
|
268 |
|
269 ; begin epilog |
|
270 pop rdi |
|
271 pop rsi |
|
272 UNSHADOW_ARGS |
|
273 pop rbp |
|
274 ret |