media/libvpx/vp8/common/x86/recon_mmx.asm

branch
TOR_BUG_9701
changeset 10
ac0c01689b40
equal deleted inserted replaced
-1:000000000000 0:1cc6d1a16565
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13
14
15 ;void copy_mem8x8_mmx(
16 ; unsigned char *src,
17 ; int src_stride,
18 ; unsigned char *dst,
19 ; int dst_stride
20 ; )
21 global sym(vp8_copy_mem8x8_mmx) PRIVATE
22 sym(vp8_copy_mem8x8_mmx):
23 push rbp
24 mov rbp, rsp
25 SHADOW_ARGS_TO_STACK 4
26 push rsi
27 push rdi
28 ; end prolog
29
30 mov rsi, arg(0) ;src;
31 movq mm0, [rsi]
32
33 movsxd rax, dword ptr arg(1) ;src_stride;
34 mov rdi, arg(2) ;dst;
35
36 movq mm1, [rsi+rax]
37 movq mm2, [rsi+rax*2]
38
39 movsxd rcx, dword ptr arg(3) ;dst_stride
40 lea rsi, [rsi+rax*2]
41
42 movq [rdi], mm0
43 add rsi, rax
44
45 movq [rdi+rcx], mm1
46 movq [rdi+rcx*2], mm2
47
48
49 lea rdi, [rdi+rcx*2]
50 movq mm3, [rsi]
51
52 add rdi, rcx
53 movq mm4, [rsi+rax]
54
55 movq mm5, [rsi+rax*2]
56 movq [rdi], mm3
57
58 lea rsi, [rsi+rax*2]
59 movq [rdi+rcx], mm4
60
61 movq [rdi+rcx*2], mm5
62 lea rdi, [rdi+rcx*2]
63
64 movq mm0, [rsi+rax]
65 movq mm1, [rsi+rax*2]
66
67 movq [rdi+rcx], mm0
68 movq [rdi+rcx*2],mm1
69
70 ; begin epilog
71 pop rdi
72 pop rsi
73 UNSHADOW_ARGS
74 pop rbp
75 ret
76
77
78 ;void copy_mem8x4_mmx(
79 ; unsigned char *src,
80 ; int src_stride,
81 ; unsigned char *dst,
82 ; int dst_stride
83 ; )
84 global sym(vp8_copy_mem8x4_mmx) PRIVATE
85 sym(vp8_copy_mem8x4_mmx):
86 push rbp
87 mov rbp, rsp
88 SHADOW_ARGS_TO_STACK 4
89 push rsi
90 push rdi
91 ; end prolog
92
93 mov rsi, arg(0) ;src;
94 movq mm0, [rsi]
95
96 movsxd rax, dword ptr arg(1) ;src_stride;
97 mov rdi, arg(2) ;dst;
98
99 movq mm1, [rsi+rax]
100 movq mm2, [rsi+rax*2]
101
102 movsxd rcx, dword ptr arg(3) ;dst_stride
103 lea rsi, [rsi+rax*2]
104
105 movq [rdi], mm0
106 movq [rdi+rcx], mm1
107
108 movq [rdi+rcx*2], mm2
109 lea rdi, [rdi+rcx*2]
110
111 movq mm3, [rsi+rax]
112 movq [rdi+rcx], mm3
113
114 ; begin epilog
115 pop rdi
116 pop rsi
117 UNSHADOW_ARGS
118 pop rbp
119 ret
120
121
122 ;void copy_mem16x16_mmx(
123 ; unsigned char *src,
124 ; int src_stride,
125 ; unsigned char *dst,
126 ; int dst_stride
127 ; )
128 global sym(vp8_copy_mem16x16_mmx) PRIVATE
129 sym(vp8_copy_mem16x16_mmx):
130 push rbp
131 mov rbp, rsp
132 SHADOW_ARGS_TO_STACK 4
133 push rsi
134 push rdi
135 ; end prolog
136
137 mov rsi, arg(0) ;src;
138 movsxd rax, dword ptr arg(1) ;src_stride;
139
140 mov rdi, arg(2) ;dst;
141 movsxd rcx, dword ptr arg(3) ;dst_stride
142
143 movq mm0, [rsi]
144 movq mm3, [rsi+8];
145
146 movq mm1, [rsi+rax]
147 movq mm4, [rsi+rax+8]
148
149 movq mm2, [rsi+rax*2]
150 movq mm5, [rsi+rax*2+8]
151
152 lea rsi, [rsi+rax*2]
153 add rsi, rax
154
155 movq [rdi], mm0
156 movq [rdi+8], mm3
157
158 movq [rdi+rcx], mm1
159 movq [rdi+rcx+8], mm4
160
161 movq [rdi+rcx*2], mm2
162 movq [rdi+rcx*2+8], mm5
163
164 lea rdi, [rdi+rcx*2]
165 add rdi, rcx
166
167 movq mm0, [rsi]
168 movq mm3, [rsi+8];
169
170 movq mm1, [rsi+rax]
171 movq mm4, [rsi+rax+8]
172
173 movq mm2, [rsi+rax*2]
174 movq mm5, [rsi+rax*2+8]
175
176 lea rsi, [rsi+rax*2]
177 add rsi, rax
178
179 movq [rdi], mm0
180 movq [rdi+8], mm3
181
182 movq [rdi+rcx], mm1
183 movq [rdi+rcx+8], mm4
184
185 movq [rdi+rcx*2], mm2
186 movq [rdi+rcx*2+8], mm5
187
188 lea rdi, [rdi+rcx*2]
189 add rdi, rcx
190
191 movq mm0, [rsi]
192 movq mm3, [rsi+8];
193
194 movq mm1, [rsi+rax]
195 movq mm4, [rsi+rax+8]
196
197 movq mm2, [rsi+rax*2]
198 movq mm5, [rsi+rax*2+8]
199
200 lea rsi, [rsi+rax*2]
201 add rsi, rax
202
203 movq [rdi], mm0
204 movq [rdi+8], mm3
205
206 movq [rdi+rcx], mm1
207 movq [rdi+rcx+8], mm4
208
209 movq [rdi+rcx*2], mm2
210 movq [rdi+rcx*2+8], mm5
211
212 lea rdi, [rdi+rcx*2]
213 add rdi, rcx
214
215 movq mm0, [rsi]
216 movq mm3, [rsi+8];
217
218 movq mm1, [rsi+rax]
219 movq mm4, [rsi+rax+8]
220
221 movq mm2, [rsi+rax*2]
222 movq mm5, [rsi+rax*2+8]
223
224 lea rsi, [rsi+rax*2]
225 add rsi, rax
226
227 movq [rdi], mm0
228 movq [rdi+8], mm3
229
230 movq [rdi+rcx], mm1
231 movq [rdi+rcx+8], mm4
232
233 movq [rdi+rcx*2], mm2
234 movq [rdi+rcx*2+8], mm5
235
236 lea rdi, [rdi+rcx*2]
237 add rdi, rcx
238
239 movq mm0, [rsi]
240 movq mm3, [rsi+8];
241
242 movq mm1, [rsi+rax]
243 movq mm4, [rsi+rax+8]
244
245 movq mm2, [rsi+rax*2]
246 movq mm5, [rsi+rax*2+8]
247
248 lea rsi, [rsi+rax*2]
249 add rsi, rax
250
251 movq [rdi], mm0
252 movq [rdi+8], mm3
253
254 movq [rdi+rcx], mm1
255 movq [rdi+rcx+8], mm4
256
257 movq [rdi+rcx*2], mm2
258 movq [rdi+rcx*2+8], mm5
259
260 lea rdi, [rdi+rcx*2]
261 add rdi, rcx
262
263 movq mm0, [rsi]
264 movq mm3, [rsi+8];
265
266 movq [rdi], mm0
267 movq [rdi+8], mm3
268
269 ; begin epilog
270 pop rdi
271 pop rsi
272 UNSHADOW_ARGS
273 pop rbp
274 ret

mercurial