Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
11 #include "./vpx_config.h"
13 #include "vp9/encoder/vp9_variance.h"
14 #include "vp9/common/vp9_pragmas.h"
15 #include "vpx_ports/mem.h"
17 extern unsigned int vp9_get4x4var_mmx
18 (
19 const unsigned char *src_ptr,
20 int source_stride,
21 const unsigned char *ref_ptr,
22 int recon_stride,
23 unsigned int *SSE,
24 int *Sum
25 );
27 unsigned int vp9_get_mb_ss_sse2
28 (
29 const int16_t *src_ptr
30 );
31 unsigned int vp9_get16x16var_sse2
32 (
33 const unsigned char *src_ptr,
34 int source_stride,
35 const unsigned char *ref_ptr,
36 int recon_stride,
37 unsigned int *SSE,
38 int *Sum
39 );
40 unsigned int vp9_get8x8var_sse2
41 (
42 const unsigned char *src_ptr,
43 int source_stride,
44 const unsigned char *ref_ptr,
45 int recon_stride,
46 unsigned int *SSE,
47 int *Sum
48 );
49 void vp9_half_horiz_vert_variance8x_h_sse2
50 (
51 const unsigned char *ref_ptr,
52 int ref_pixels_per_line,
53 const unsigned char *src_ptr,
54 int src_pixels_per_line,
55 unsigned int Height,
56 int *sum,
57 unsigned int *sumsquared
58 );
59 void vp9_half_horiz_vert_variance16x_h_sse2
60 (
61 const unsigned char *ref_ptr,
62 int ref_pixels_per_line,
63 const unsigned char *src_ptr,
64 int src_pixels_per_line,
65 unsigned int Height,
66 int *sum,
67 unsigned int *sumsquared
68 );
69 void vp9_half_horiz_variance8x_h_sse2
70 (
71 const unsigned char *ref_ptr,
72 int ref_pixels_per_line,
73 const unsigned char *src_ptr,
74 int src_pixels_per_line,
75 unsigned int Height,
76 int *sum,
77 unsigned int *sumsquared
78 );
79 void vp9_half_horiz_variance16x_h_sse2
80 (
81 const unsigned char *ref_ptr,
82 int ref_pixels_per_line,
83 const unsigned char *src_ptr,
84 int src_pixels_per_line,
85 unsigned int Height,
86 int *sum,
87 unsigned int *sumsquared
88 );
89 void vp9_half_vert_variance8x_h_sse2
90 (
91 const unsigned char *ref_ptr,
92 int ref_pixels_per_line,
93 const unsigned char *src_ptr,
94 int src_pixels_per_line,
95 unsigned int Height,
96 int *sum,
97 unsigned int *sumsquared
98 );
99 void vp9_half_vert_variance16x_h_sse2
100 (
101 const unsigned char *ref_ptr,
102 int ref_pixels_per_line,
103 const unsigned char *src_ptr,
104 int src_pixels_per_line,
105 unsigned int Height,
106 int *sum,
107 unsigned int *sumsquared
108 );
110 typedef unsigned int (*get_var_sse2) (
111 const unsigned char *src_ptr,
112 int source_stride,
113 const unsigned char *ref_ptr,
114 int recon_stride,
115 unsigned int *SSE,
116 int *Sum
117 );
119 static void variance_sse2(const unsigned char *src_ptr, int source_stride,
120 const unsigned char *ref_ptr, int recon_stride,
121 int w, int h, unsigned int *sse, int *sum,
122 get_var_sse2 var_fn, int block_size) {
123 unsigned int sse0;
124 int sum0;
125 int i, j;
127 *sse = 0;
128 *sum = 0;
130 for (i = 0; i < h; i += block_size) {
131 for (j = 0; j < w; j += block_size) {
132 var_fn(src_ptr + source_stride * i + j, source_stride,
133 ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0);
134 *sse += sse0;
135 *sum += sum0;
136 }
137 }
138 }
140 unsigned int vp9_variance4x4_sse2(
141 const unsigned char *src_ptr,
142 int source_stride,
143 const unsigned char *ref_ptr,
144 int recon_stride,
145 unsigned int *sse) {
146 unsigned int var;
147 int avg;
149 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4,
150 &var, &avg, vp9_get4x4var_mmx, 4);
151 *sse = var;
152 return (var - (((unsigned int)avg * avg) >> 4));
153 }
155 unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr,
156 int source_stride,
157 const uint8_t *ref_ptr,
158 int recon_stride,
159 unsigned int *sse) {
160 unsigned int var;
161 int avg;
163 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4,
164 &var, &avg, vp9_get4x4var_mmx, 4);
165 *sse = var;
166 return (var - (((unsigned int)avg * avg) >> 5));
167 }
169 unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr,
170 int source_stride,
171 const uint8_t *ref_ptr,
172 int recon_stride,
173 unsigned int *sse) {
174 unsigned int var;
175 int avg;
177 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8,
178 &var, &avg, vp9_get4x4var_mmx, 4);
179 *sse = var;
180 return (var - (((unsigned int)avg * avg) >> 5));
181 }
183 unsigned int vp9_variance8x8_sse2
184 (
185 const unsigned char *src_ptr,
186 int source_stride,
187 const unsigned char *ref_ptr,
188 int recon_stride,
189 unsigned int *sse) {
190 unsigned int var;
191 int avg;
193 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8,
194 &var, &avg, vp9_get8x8var_sse2, 8);
195 *sse = var;
196 return (var - (((unsigned int)avg * avg) >> 6));
197 }
199 unsigned int vp9_variance16x8_sse2
200 (
201 const unsigned char *src_ptr,
202 int source_stride,
203 const unsigned char *ref_ptr,
204 int recon_stride,
205 unsigned int *sse) {
206 unsigned int var;
207 int avg;
209 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8,
210 &var, &avg, vp9_get8x8var_sse2, 8);
211 *sse = var;
212 return (var - (((unsigned int)avg * avg) >> 7));
213 }
215 unsigned int vp9_variance8x16_sse2
216 (
217 const unsigned char *src_ptr,
218 int source_stride,
219 const unsigned char *ref_ptr,
220 int recon_stride,
221 unsigned int *sse) {
222 unsigned int var;
223 int avg;
225 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16,
226 &var, &avg, vp9_get8x8var_sse2, 8);
227 *sse = var;
228 return (var - (((unsigned int)avg * avg) >> 7));
229 }
231 unsigned int vp9_variance16x16_sse2
232 (
233 const unsigned char *src_ptr,
234 int source_stride,
235 const unsigned char *ref_ptr,
236 int recon_stride,
237 unsigned int *sse) {
238 unsigned int var;
239 int avg;
241 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16,
242 &var, &avg, vp9_get16x16var_sse2, 16);
243 *sse = var;
244 return (var - (((unsigned int)avg * avg) >> 8));
245 }
247 unsigned int vp9_mse16x16_sse2(
248 const unsigned char *src_ptr,
249 int source_stride,
250 const unsigned char *ref_ptr,
251 int recon_stride,
252 unsigned int *sse) {
253 unsigned int sse0;
254 int sum0;
255 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
256 &sum0);
257 *sse = sse0;
258 return sse0;
259 }
261 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr,
262 int source_stride,
263 const uint8_t *ref_ptr,
264 int recon_stride,
265 unsigned int *sse) {
266 unsigned int var;
267 int avg;
269 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32,
270 &var, &avg, vp9_get16x16var_sse2, 16);
271 *sse = var;
272 return (var - (((int64_t)avg * avg) >> 10));
273 }
275 unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr,
276 int source_stride,
277 const uint8_t *ref_ptr,
278 int recon_stride,
279 unsigned int *sse) {
280 unsigned int var;
281 int avg;
283 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16,
284 &var, &avg, vp9_get16x16var_sse2, 16);
285 *sse = var;
286 return (var - (((int64_t)avg * avg) >> 9));
287 }
289 unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr,
290 int source_stride,
291 const uint8_t *ref_ptr,
292 int recon_stride,
293 unsigned int *sse) {
294 unsigned int var;
295 int avg;
297 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32,
298 &var, &avg, vp9_get16x16var_sse2, 16);
299 *sse = var;
300 return (var - (((int64_t)avg * avg) >> 9));
301 }
303 unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr,
304 int source_stride,
305 const uint8_t *ref_ptr,
306 int recon_stride,
307 unsigned int *sse) {
308 unsigned int var;
309 int avg;
311 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64,
312 &var, &avg, vp9_get16x16var_sse2, 16);
313 *sse = var;
314 return (var - (((int64_t)avg * avg) >> 12));
315 }
317 unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr,
318 int source_stride,
319 const uint8_t *ref_ptr,
320 int recon_stride,
321 unsigned int *sse) {
322 unsigned int var;
323 int avg;
325 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32,
326 &var, &avg, vp9_get16x16var_sse2, 16);
327 *sse = var;
328 return (var - (((int64_t)avg * avg) >> 11));
329 }
331 unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr,
332 int source_stride,
333 const uint8_t *ref_ptr,
334 int recon_stride,
335 unsigned int *sse) {
336 unsigned int var;
337 int avg;
339 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64,
340 &var, &avg, vp9_get16x16var_sse2, 16);
341 *sse = var;
342 return (var - (((int64_t)avg * avg) >> 11));
343 }
345 #define DECL(w, opt) \
346 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \
347 ptrdiff_t src_stride, \
348 int x_offset, int y_offset, \
349 const uint8_t *dst, \
350 ptrdiff_t dst_stride, \
351 int height, unsigned int *sse)
352 #define DECLS(opt1, opt2) \
353 DECL(4, opt2); \
354 DECL(8, opt1); \
355 DECL(16, opt1)
357 DECLS(sse2, sse);
358 DECLS(ssse3, ssse3);
359 #undef DECLS
360 #undef DECL
362 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \
363 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \
364 int src_stride, \
365 int x_offset, \
366 int y_offset, \
367 const uint8_t *dst, \
368 int dst_stride, \
369 unsigned int *sse_ptr) { \
370 unsigned int sse; \
371 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \
372 y_offset, dst, dst_stride, \
373 h, &sse); \
374 if (w > wf) { \
375 unsigned int sse2; \
376 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \
377 x_offset, y_offset, \
378 dst + 16, dst_stride, \
379 h, &sse2); \
380 se += se2; \
381 sse += sse2; \
382 if (w > wf * 2) { \
383 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
384 x_offset, y_offset, \
385 dst + 32, dst_stride, \
386 h, &sse2); \
387 se += se2; \
388 sse += sse2; \
389 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
390 x_offset, y_offset, \
391 dst + 48, dst_stride, \
392 h, &sse2); \
393 se += se2; \
394 sse += sse2; \
395 } \
396 } \
397 *sse_ptr = sse; \
398 return sse - ((cast se * se) >> (wlog2 + hlog2)); \
399 }
401 #define FNS(opt1, opt2) \
402 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
403 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
404 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
405 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
406 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
407 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
408 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \
409 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \
410 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \
411 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
412 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
413 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
414 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
416 FNS(sse2, sse);
417 FNS(ssse3, ssse3);
419 #undef FNS
420 #undef FN
422 #define DECL(w, opt) \
423 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \
424 ptrdiff_t src_stride, \
425 int x_offset, int y_offset, \
426 const uint8_t *dst, \
427 ptrdiff_t dst_stride, \
428 const uint8_t *sec, \
429 ptrdiff_t sec_stride, \
430 int height, unsigned int *sse)
431 #define DECLS(opt1, opt2) \
432 DECL(4, opt2); \
433 DECL(8, opt1); \
434 DECL(16, opt1)
436 DECLS(sse2, sse);
437 DECLS(ssse3, ssse3);
438 #undef DECL
439 #undef DECLS
441 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \
442 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \
443 int src_stride, \
444 int x_offset, \
445 int y_offset, \
446 const uint8_t *dst, \
447 int dst_stride, \
448 unsigned int *sseptr, \
449 const uint8_t *sec) { \
450 unsigned int sse; \
451 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \
452 y_offset, dst, dst_stride, \
453 sec, w, h, &sse); \
454 if (w > wf) { \
455 unsigned int sse2; \
456 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \
457 x_offset, y_offset, \
458 dst + 16, dst_stride, \
459 sec + 16, w, h, &sse2); \
460 se += se2; \
461 sse += sse2; \
462 if (w > wf * 2) { \
463 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \
464 x_offset, y_offset, \
465 dst + 32, dst_stride, \
466 sec + 32, w, h, &sse2); \
467 se += se2; \
468 sse += sse2; \
469 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \
470 x_offset, y_offset, \
471 dst + 48, dst_stride, \
472 sec + 48, w, h, &sse2); \
473 se += se2; \
474 sse += sse2; \
475 } \
476 } \
477 *sseptr = sse; \
478 return sse - ((cast se * se) >> (wlog2 + hlog2)); \
479 }
481 #define FNS(opt1, opt2) \
482 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
483 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
484 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
485 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
486 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
487 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
488 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \
489 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \
490 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \
491 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
492 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
493 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
494 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
496 FNS(sse2, sse);
497 FNS(ssse3, ssse3);
499 #undef FNS
500 #undef FN
502 unsigned int vp9_variance_halfpixvar16x16_h_sse2(
503 const unsigned char *src_ptr,
504 int src_pixels_per_line,
505 const unsigned char *dst_ptr,
506 int dst_pixels_per_line,
507 unsigned int *sse) {
508 int xsum0;
509 unsigned int xxsum0;
511 vp9_half_horiz_variance16x_h_sse2(
512 src_ptr, src_pixels_per_line,
513 dst_ptr, dst_pixels_per_line, 16,
514 &xsum0, &xxsum0);
516 *sse = xxsum0;
517 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
518 }
521 unsigned int vp9_variance_halfpixvar16x16_v_sse2(
522 const unsigned char *src_ptr,
523 int src_pixels_per_line,
524 const unsigned char *dst_ptr,
525 int dst_pixels_per_line,
526 unsigned int *sse) {
527 int xsum0;
528 unsigned int xxsum0;
529 vp9_half_vert_variance16x_h_sse2(
530 src_ptr, src_pixels_per_line,
531 dst_ptr, dst_pixels_per_line, 16,
532 &xsum0, &xxsum0);
534 *sse = xxsum0;
535 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
536 }
539 unsigned int vp9_variance_halfpixvar16x16_hv_sse2(
540 const unsigned char *src_ptr,
541 int src_pixels_per_line,
542 const unsigned char *dst_ptr,
543 int dst_pixels_per_line,
544 unsigned int *sse) {
545 int xsum0;
546 unsigned int xxsum0;
548 vp9_half_horiz_vert_variance16x_h_sse2(
549 src_ptr, src_pixels_per_line,
550 dst_ptr, dst_pixels_per_line, 16,
551 &xsum0, &xxsum0);
553 *sse = xxsum0;
554 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
555 }