|
1 /* |
|
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license |
|
5 * that can be found in the LICENSE file in the root of the source |
|
6 * tree. An additional intellectual property rights grant can be found |
|
7 * in the file PATENTS. All contributing project authors may |
|
8 * be found in the AUTHORS file in the root of the source tree. |
|
9 */ |
|
10 |
|
11 #include "vpx_config.h" |
|
12 #include "vp8/common/variance.h" |
|
13 #include "vp8/common/pragmas.h" |
|
14 #include "vpx_ports/mem.h" |
|
15 #include "vp8/common/x86/filter_x86.h" |
|
16 |
|
17 extern void filter_block1d_h6_mmx |
|
18 ( |
|
19 const unsigned char *src_ptr, |
|
20 unsigned short *output_ptr, |
|
21 unsigned int src_pixels_per_line, |
|
22 unsigned int pixel_step, |
|
23 unsigned int output_height, |
|
24 unsigned int output_width, |
|
25 short *filter |
|
26 ); |
|
27 extern void filter_block1d_v6_mmx |
|
28 ( |
|
29 const short *src_ptr, |
|
30 unsigned char *output_ptr, |
|
31 unsigned int pixels_per_line, |
|
32 unsigned int pixel_step, |
|
33 unsigned int output_height, |
|
34 unsigned int output_width, |
|
35 short *filter |
|
36 ); |
|
37 |
|
38 extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr); |
|
39 extern unsigned int vp8_get8x8var_mmx |
|
40 ( |
|
41 const unsigned char *src_ptr, |
|
42 int source_stride, |
|
43 const unsigned char *ref_ptr, |
|
44 int recon_stride, |
|
45 unsigned int *SSE, |
|
46 int *Sum |
|
47 ); |
|
48 extern unsigned int vp8_get4x4var_mmx |
|
49 ( |
|
50 const unsigned char *src_ptr, |
|
51 int source_stride, |
|
52 const unsigned char *ref_ptr, |
|
53 int recon_stride, |
|
54 unsigned int *SSE, |
|
55 int *Sum |
|
56 ); |
|
57 extern void vp8_filter_block2d_bil4x4_var_mmx |
|
58 ( |
|
59 const unsigned char *ref_ptr, |
|
60 int ref_pixels_per_line, |
|
61 const unsigned char *src_ptr, |
|
62 int src_pixels_per_line, |
|
63 const short *HFilter, |
|
64 const short *VFilter, |
|
65 int *sum, |
|
66 unsigned int *sumsquared |
|
67 ); |
|
68 extern void vp8_filter_block2d_bil_var_mmx |
|
69 ( |
|
70 const unsigned char *ref_ptr, |
|
71 int ref_pixels_per_line, |
|
72 const unsigned char *src_ptr, |
|
73 int src_pixels_per_line, |
|
74 unsigned int Height, |
|
75 const short *HFilter, |
|
76 const short *VFilter, |
|
77 int *sum, |
|
78 unsigned int *sumsquared |
|
79 ); |
|
80 |
|
81 |
|
82 unsigned int vp8_variance4x4_mmx( |
|
83 const unsigned char *src_ptr, |
|
84 int source_stride, |
|
85 const unsigned char *ref_ptr, |
|
86 int recon_stride, |
|
87 unsigned int *sse) |
|
88 { |
|
89 unsigned int var; |
|
90 int avg; |
|
91 |
|
92 vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; |
|
93 *sse = var; |
|
94 return (var - (((unsigned int)avg * avg) >> 4)); |
|
95 |
|
96 } |
|
97 |
|
98 unsigned int vp8_variance8x8_mmx( |
|
99 const unsigned char *src_ptr, |
|
100 int source_stride, |
|
101 const unsigned char *ref_ptr, |
|
102 int recon_stride, |
|
103 unsigned int *sse) |
|
104 { |
|
105 unsigned int var; |
|
106 int avg; |
|
107 |
|
108 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ; |
|
109 *sse = var; |
|
110 |
|
111 return (var - (((unsigned int)avg * avg) >> 6)); |
|
112 |
|
113 } |
|
114 |
|
115 unsigned int vp8_mse16x16_mmx( |
|
116 const unsigned char *src_ptr, |
|
117 int source_stride, |
|
118 const unsigned char *ref_ptr, |
|
119 int recon_stride, |
|
120 unsigned int *sse) |
|
121 { |
|
122 unsigned int sse0, sse1, sse2, sse3, var; |
|
123 int sum0, sum1, sum2, sum3; |
|
124 |
|
125 |
|
126 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; |
|
127 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); |
|
128 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; |
|
129 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); |
|
130 |
|
131 var = sse0 + sse1 + sse2 + sse3; |
|
132 *sse = var; |
|
133 return var; |
|
134 } |
|
135 |
|
136 |
|
137 unsigned int vp8_variance16x16_mmx( |
|
138 const unsigned char *src_ptr, |
|
139 int source_stride, |
|
140 const unsigned char *ref_ptr, |
|
141 int recon_stride, |
|
142 unsigned int *sse) |
|
143 { |
|
144 unsigned int sse0, sse1, sse2, sse3, var; |
|
145 int sum0, sum1, sum2, sum3, avg; |
|
146 |
|
147 |
|
148 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; |
|
149 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); |
|
150 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ; |
|
151 vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3); |
|
152 |
|
153 var = sse0 + sse1 + sse2 + sse3; |
|
154 avg = sum0 + sum1 + sum2 + sum3; |
|
155 *sse = var; |
|
156 return (var - (((unsigned int)avg * avg) >> 8)); |
|
157 } |
|
158 |
|
159 unsigned int vp8_variance16x8_mmx( |
|
160 const unsigned char *src_ptr, |
|
161 int source_stride, |
|
162 const unsigned char *ref_ptr, |
|
163 int recon_stride, |
|
164 unsigned int *sse) |
|
165 { |
|
166 unsigned int sse0, sse1, var; |
|
167 int sum0, sum1, avg; |
|
168 |
|
169 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; |
|
170 vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1); |
|
171 |
|
172 var = sse0 + sse1; |
|
173 avg = sum0 + sum1; |
|
174 *sse = var; |
|
175 return (var - (((unsigned int)avg * avg) >> 7)); |
|
176 |
|
177 } |
|
178 |
|
179 |
|
180 unsigned int vp8_variance8x16_mmx( |
|
181 const unsigned char *src_ptr, |
|
182 int source_stride, |
|
183 const unsigned char *ref_ptr, |
|
184 int recon_stride, |
|
185 unsigned int *sse) |
|
186 { |
|
187 unsigned int sse0, sse1, var; |
|
188 int sum0, sum1, avg; |
|
189 |
|
190 vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ; |
|
191 vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ; |
|
192 |
|
193 var = sse0 + sse1; |
|
194 avg = sum0 + sum1; |
|
195 *sse = var; |
|
196 |
|
197 return (var - (((unsigned int)avg * avg) >> 7)); |
|
198 |
|
199 } |
|
200 |
|
201 |
|
202 unsigned int vp8_sub_pixel_variance4x4_mmx |
|
203 ( |
|
204 const unsigned char *src_ptr, |
|
205 int src_pixels_per_line, |
|
206 int xoffset, |
|
207 int yoffset, |
|
208 const unsigned char *dst_ptr, |
|
209 int dst_pixels_per_line, |
|
210 unsigned int *sse) |
|
211 |
|
212 { |
|
213 int xsum; |
|
214 unsigned int xxsum; |
|
215 vp8_filter_block2d_bil4x4_var_mmx( |
|
216 src_ptr, src_pixels_per_line, |
|
217 dst_ptr, dst_pixels_per_line, |
|
218 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], |
|
219 &xsum, &xxsum |
|
220 ); |
|
221 *sse = xxsum; |
|
222 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); |
|
223 } |
|
224 |
|
225 |
|
226 unsigned int vp8_sub_pixel_variance8x8_mmx |
|
227 ( |
|
228 const unsigned char *src_ptr, |
|
229 int src_pixels_per_line, |
|
230 int xoffset, |
|
231 int yoffset, |
|
232 const unsigned char *dst_ptr, |
|
233 int dst_pixels_per_line, |
|
234 unsigned int *sse |
|
235 ) |
|
236 { |
|
237 |
|
238 int xsum; |
|
239 unsigned int xxsum; |
|
240 vp8_filter_block2d_bil_var_mmx( |
|
241 src_ptr, src_pixels_per_line, |
|
242 dst_ptr, dst_pixels_per_line, 8, |
|
243 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], |
|
244 &xsum, &xxsum |
|
245 ); |
|
246 *sse = xxsum; |
|
247 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); |
|
248 } |
|
249 |
|
250 unsigned int vp8_sub_pixel_variance16x16_mmx |
|
251 ( |
|
252 const unsigned char *src_ptr, |
|
253 int src_pixels_per_line, |
|
254 int xoffset, |
|
255 int yoffset, |
|
256 const unsigned char *dst_ptr, |
|
257 int dst_pixels_per_line, |
|
258 unsigned int *sse |
|
259 ) |
|
260 { |
|
261 |
|
262 int xsum0, xsum1; |
|
263 unsigned int xxsum0, xxsum1; |
|
264 |
|
265 |
|
266 vp8_filter_block2d_bil_var_mmx( |
|
267 src_ptr, src_pixels_per_line, |
|
268 dst_ptr, dst_pixels_per_line, 16, |
|
269 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], |
|
270 &xsum0, &xxsum0 |
|
271 ); |
|
272 |
|
273 |
|
274 vp8_filter_block2d_bil_var_mmx( |
|
275 src_ptr + 8, src_pixels_per_line, |
|
276 dst_ptr + 8, dst_pixels_per_line, 16, |
|
277 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], |
|
278 &xsum1, &xxsum1 |
|
279 ); |
|
280 |
|
281 xsum0 += xsum1; |
|
282 xxsum0 += xxsum1; |
|
283 |
|
284 *sse = xxsum0; |
|
285 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
|
286 |
|
287 |
|
288 } |
|
289 |
|
290 unsigned int vp8_sub_pixel_mse16x16_mmx( |
|
291 const unsigned char *src_ptr, |
|
292 int src_pixels_per_line, |
|
293 int xoffset, |
|
294 int yoffset, |
|
295 const unsigned char *dst_ptr, |
|
296 int dst_pixels_per_line, |
|
297 unsigned int *sse |
|
298 ) |
|
299 { |
|
300 vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse); |
|
301 return *sse; |
|
302 } |
|
303 |
|
304 unsigned int vp8_sub_pixel_variance16x8_mmx |
|
305 ( |
|
306 const unsigned char *src_ptr, |
|
307 int src_pixels_per_line, |
|
308 int xoffset, |
|
309 int yoffset, |
|
310 const unsigned char *dst_ptr, |
|
311 int dst_pixels_per_line, |
|
312 unsigned int *sse |
|
313 ) |
|
314 { |
|
315 int xsum0, xsum1; |
|
316 unsigned int xxsum0, xxsum1; |
|
317 |
|
318 |
|
319 vp8_filter_block2d_bil_var_mmx( |
|
320 src_ptr, src_pixels_per_line, |
|
321 dst_ptr, dst_pixels_per_line, 8, |
|
322 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], |
|
323 &xsum0, &xxsum0 |
|
324 ); |
|
325 |
|
326 |
|
327 vp8_filter_block2d_bil_var_mmx( |
|
328 src_ptr + 8, src_pixels_per_line, |
|
329 dst_ptr + 8, dst_pixels_per_line, 8, |
|
330 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], |
|
331 &xsum1, &xxsum1 |
|
332 ); |
|
333 |
|
334 xsum0 += xsum1; |
|
335 xxsum0 += xxsum1; |
|
336 |
|
337 *sse = xxsum0; |
|
338 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); |
|
339 } |
|
340 |
|
341 unsigned int vp8_sub_pixel_variance8x16_mmx |
|
342 ( |
|
343 const unsigned char *src_ptr, |
|
344 int src_pixels_per_line, |
|
345 int xoffset, |
|
346 int yoffset, |
|
347 const unsigned char *dst_ptr, |
|
348 int dst_pixels_per_line, |
|
349 unsigned int *sse |
|
350 ) |
|
351 { |
|
352 int xsum; |
|
353 unsigned int xxsum; |
|
354 vp8_filter_block2d_bil_var_mmx( |
|
355 src_ptr, src_pixels_per_line, |
|
356 dst_ptr, dst_pixels_per_line, 16, |
|
357 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset], |
|
358 &xsum, &xxsum |
|
359 ); |
|
360 *sse = xxsum; |
|
361 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); |
|
362 } |
|
363 |
|
364 |
|
365 unsigned int vp8_variance_halfpixvar16x16_h_mmx( |
|
366 const unsigned char *src_ptr, |
|
367 int source_stride, |
|
368 const unsigned char *ref_ptr, |
|
369 int recon_stride, |
|
370 unsigned int *sse) |
|
371 { |
|
372 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, |
|
373 ref_ptr, recon_stride, sse); |
|
374 } |
|
375 |
|
376 |
|
377 unsigned int vp8_variance_halfpixvar16x16_v_mmx( |
|
378 const unsigned char *src_ptr, |
|
379 int source_stride, |
|
380 const unsigned char *ref_ptr, |
|
381 int recon_stride, |
|
382 unsigned int *sse) |
|
383 { |
|
384 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, |
|
385 ref_ptr, recon_stride, sse); |
|
386 } |
|
387 |
|
388 |
|
389 unsigned int vp8_variance_halfpixvar16x16_hv_mmx( |
|
390 const unsigned char *src_ptr, |
|
391 int source_stride, |
|
392 const unsigned char *ref_ptr, |
|
393 int recon_stride, |
|
394 unsigned int *sse) |
|
395 { |
|
396 return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, |
|
397 ref_ptr, recon_stride, sse); |
|
398 } |