media/libvpx/vp8/encoder/x86/subtract_mmx.asm

branch
TOR_BUG_9701
changeset 10
ac0c01689b40
equal deleted inserted replaced
-1:000000000000 0:6538eeb640e1
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 %include "vpx_ports/x86_abi_support.asm"
13
14 ;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
15 ; short *diff, unsigned char *Predictor,
16 ; int pitch);
17 global sym(vp8_subtract_b_mmx_impl) PRIVATE
18 sym(vp8_subtract_b_mmx_impl):
19 push rbp
20 mov rbp, rsp
21 SHADOW_ARGS_TO_STACK 5
22 push rsi
23 push rdi
24 ; end prolog
25
26
27 mov rdi, arg(2) ;diff
28 mov rax, arg(3) ;Predictor
29 mov rsi, arg(0) ;z
30 movsxd rdx, dword ptr arg(1);src_stride;
31 movsxd rcx, dword ptr arg(4);pitch
32 pxor mm7, mm7
33
34 movd mm0, [rsi]
35 movd mm1, [rax]
36 punpcklbw mm0, mm7
37 punpcklbw mm1, mm7
38 psubw mm0, mm1
39 movq [rdi], mm0
40
41
42 movd mm0, [rsi+rdx]
43 movd mm1, [rax+rcx]
44 punpcklbw mm0, mm7
45 punpcklbw mm1, mm7
46 psubw mm0, mm1
47 movq [rdi+rcx*2],mm0
48
49
50 movd mm0, [rsi+rdx*2]
51 movd mm1, [rax+rcx*2]
52 punpcklbw mm0, mm7
53 punpcklbw mm1, mm7
54 psubw mm0, mm1
55 movq [rdi+rcx*4], mm0
56
57 lea rsi, [rsi+rdx*2]
58 lea rcx, [rcx+rcx*2]
59
60
61
62 movd mm0, [rsi+rdx]
63 movd mm1, [rax+rcx]
64 punpcklbw mm0, mm7
65 punpcklbw mm1, mm7
66 psubw mm0, mm1
67 movq [rdi+rcx*2], mm0
68
69 ; begin epilog
70 pop rdi
71 pop rsi
72 UNSHADOW_ARGS
73 pop rbp
74 ret
75
76 ;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
77 ;unsigned char *pred, int pred_stride)
78 global sym(vp8_subtract_mby_mmx) PRIVATE
79 sym(vp8_subtract_mby_mmx):
80 push rbp
81 mov rbp, rsp
82 SHADOW_ARGS_TO_STACK 5
83 push rsi
84 push rdi
85 ; end prolog
86
87 mov rdi, arg(0) ;diff
88 mov rsi, arg(1) ;src
89 movsxd rdx, dword ptr arg(2);src_stride
90 mov rax, arg(3) ;pred
91 push rbx
92 movsxd rbx, dword ptr arg(4);pred_stride
93
94 pxor mm0, mm0
95 mov rcx, 16
96
97
98 .submby_loop:
99 movq mm1, [rsi]
100 movq mm3, [rax]
101
102 movq mm2, mm1
103 movq mm4, mm3
104
105 punpcklbw mm1, mm0
106 punpcklbw mm3, mm0
107
108 punpckhbw mm2, mm0
109 punpckhbw mm4, mm0
110
111 psubw mm1, mm3
112 psubw mm2, mm4
113
114 movq [rdi], mm1
115 movq [rdi+8], mm2
116
117 movq mm1, [rsi+8]
118 movq mm3, [rax+8]
119
120 movq mm2, mm1
121 movq mm4, mm3
122
123 punpcklbw mm1, mm0
124 punpcklbw mm3, mm0
125
126 punpckhbw mm2, mm0
127 punpckhbw mm4, mm0
128
129 psubw mm1, mm3
130 psubw mm2, mm4
131
132 movq [rdi+16], mm1
133 movq [rdi+24], mm2
134 add rdi, 32
135 lea rax, [rax+rbx]
136 lea rsi, [rsi+rdx]
137 dec rcx
138 jnz .submby_loop
139
140 pop rbx
141 pop rdi
142 pop rsi
143 ; begin epilog
144 UNSHADOW_ARGS
145 pop rbp
146 ret
147
148
149 ;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc,
150 ; int src_stride, unsigned char *upred,
151 ; unsigned char *vpred, int pred_stride)
152
153 global sym(vp8_subtract_mbuv_mmx) PRIVATE
154 sym(vp8_subtract_mbuv_mmx):
155 push rbp
156 mov rbp, rsp
157 SHADOW_ARGS_TO_STACK 7
158 push rsi
159 push rdi
160 ; end prolog
161
162 mov rdi, arg(0) ;diff
163 mov rsi, arg(1) ;usrc
164 movsxd rdx, dword ptr arg(3);src_stride;
165 mov rax, arg(4) ;upred
166 add rdi, 256*2 ;diff = diff + 256 (shorts)
167 mov rcx, 8
168 push rbx
169 movsxd rbx, dword ptr arg(6);pred_stride
170
171 pxor mm7, mm7
172
173 .submbu_loop:
174 movq mm0, [rsi]
175 movq mm1, [rax]
176 movq mm3, mm0
177 movq mm4, mm1
178 punpcklbw mm0, mm7
179 punpcklbw mm1, mm7
180 punpckhbw mm3, mm7
181 punpckhbw mm4, mm7
182 psubw mm0, mm1
183 psubw mm3, mm4
184 movq [rdi], mm0
185 movq [rdi+8], mm3
186 add rdi, 16
187 add rsi, rdx
188 add rax, rbx
189
190 dec rcx
191 jnz .submbu_loop
192
193 mov rsi, arg(2) ;vsrc
194 mov rax, arg(5) ;vpred
195 mov rcx, 8
196
197 .submbv_loop:
198 movq mm0, [rsi]
199 movq mm1, [rax]
200 movq mm3, mm0
201 movq mm4, mm1
202 punpcklbw mm0, mm7
203 punpcklbw mm1, mm7
204 punpckhbw mm3, mm7
205 punpckhbw mm4, mm7
206 psubw mm0, mm1
207 psubw mm3, mm4
208 movq [rdi], mm0
209 movq [rdi+8], mm3
210 add rdi, 16
211 add rsi, rdx
212 add rax, rbx
213
214 dec rcx
215 jnz .submbv_loop
216
217 pop rbx
218 ; begin epilog
219 pop rdi
220 pop rsi
221 UNSHADOW_ARGS
222 pop rbp
223 ret

mercurial