media/libvpx/vp9/encoder/x86/vp9_variance_sse2.c

Thu, 15 Jan 2015 15:59:08 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:59:08 +0100
branch
TOR_BUG_9701
changeset 10
ac0c01689b40
permissions
-rw-r--r--

Implement a real Private Browsing Mode condition by changing the API/ABI;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /*
michael@0 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "./vpx_config.h"
michael@0 12
michael@0 13 #include "vp9/encoder/vp9_variance.h"
michael@0 14 #include "vp9/common/vp9_pragmas.h"
michael@0 15 #include "vpx_ports/mem.h"
michael@0 16
michael@0 17 extern unsigned int vp9_get4x4var_mmx
michael@0 18 (
michael@0 19 const unsigned char *src_ptr,
michael@0 20 int source_stride,
michael@0 21 const unsigned char *ref_ptr,
michael@0 22 int recon_stride,
michael@0 23 unsigned int *SSE,
michael@0 24 int *Sum
michael@0 25 );
michael@0 26
michael@0 27 unsigned int vp9_get_mb_ss_sse2
michael@0 28 (
michael@0 29 const int16_t *src_ptr
michael@0 30 );
michael@0 31 unsigned int vp9_get16x16var_sse2
michael@0 32 (
michael@0 33 const unsigned char *src_ptr,
michael@0 34 int source_stride,
michael@0 35 const unsigned char *ref_ptr,
michael@0 36 int recon_stride,
michael@0 37 unsigned int *SSE,
michael@0 38 int *Sum
michael@0 39 );
michael@0 40 unsigned int vp9_get8x8var_sse2
michael@0 41 (
michael@0 42 const unsigned char *src_ptr,
michael@0 43 int source_stride,
michael@0 44 const unsigned char *ref_ptr,
michael@0 45 int recon_stride,
michael@0 46 unsigned int *SSE,
michael@0 47 int *Sum
michael@0 48 );
michael@0 49 void vp9_half_horiz_vert_variance8x_h_sse2
michael@0 50 (
michael@0 51 const unsigned char *ref_ptr,
michael@0 52 int ref_pixels_per_line,
michael@0 53 const unsigned char *src_ptr,
michael@0 54 int src_pixels_per_line,
michael@0 55 unsigned int Height,
michael@0 56 int *sum,
michael@0 57 unsigned int *sumsquared
michael@0 58 );
michael@0 59 void vp9_half_horiz_vert_variance16x_h_sse2
michael@0 60 (
michael@0 61 const unsigned char *ref_ptr,
michael@0 62 int ref_pixels_per_line,
michael@0 63 const unsigned char *src_ptr,
michael@0 64 int src_pixels_per_line,
michael@0 65 unsigned int Height,
michael@0 66 int *sum,
michael@0 67 unsigned int *sumsquared
michael@0 68 );
michael@0 69 void vp9_half_horiz_variance8x_h_sse2
michael@0 70 (
michael@0 71 const unsigned char *ref_ptr,
michael@0 72 int ref_pixels_per_line,
michael@0 73 const unsigned char *src_ptr,
michael@0 74 int src_pixels_per_line,
michael@0 75 unsigned int Height,
michael@0 76 int *sum,
michael@0 77 unsigned int *sumsquared
michael@0 78 );
michael@0 79 void vp9_half_horiz_variance16x_h_sse2
michael@0 80 (
michael@0 81 const unsigned char *ref_ptr,
michael@0 82 int ref_pixels_per_line,
michael@0 83 const unsigned char *src_ptr,
michael@0 84 int src_pixels_per_line,
michael@0 85 unsigned int Height,
michael@0 86 int *sum,
michael@0 87 unsigned int *sumsquared
michael@0 88 );
michael@0 89 void vp9_half_vert_variance8x_h_sse2
michael@0 90 (
michael@0 91 const unsigned char *ref_ptr,
michael@0 92 int ref_pixels_per_line,
michael@0 93 const unsigned char *src_ptr,
michael@0 94 int src_pixels_per_line,
michael@0 95 unsigned int Height,
michael@0 96 int *sum,
michael@0 97 unsigned int *sumsquared
michael@0 98 );
michael@0 99 void vp9_half_vert_variance16x_h_sse2
michael@0 100 (
michael@0 101 const unsigned char *ref_ptr,
michael@0 102 int ref_pixels_per_line,
michael@0 103 const unsigned char *src_ptr,
michael@0 104 int src_pixels_per_line,
michael@0 105 unsigned int Height,
michael@0 106 int *sum,
michael@0 107 unsigned int *sumsquared
michael@0 108 );
michael@0 109
michael@0 110 typedef unsigned int (*get_var_sse2) (
michael@0 111 const unsigned char *src_ptr,
michael@0 112 int source_stride,
michael@0 113 const unsigned char *ref_ptr,
michael@0 114 int recon_stride,
michael@0 115 unsigned int *SSE,
michael@0 116 int *Sum
michael@0 117 );
michael@0 118
michael@0 119 static void variance_sse2(const unsigned char *src_ptr, int source_stride,
michael@0 120 const unsigned char *ref_ptr, int recon_stride,
michael@0 121 int w, int h, unsigned int *sse, int *sum,
michael@0 122 get_var_sse2 var_fn, int block_size) {
michael@0 123 unsigned int sse0;
michael@0 124 int sum0;
michael@0 125 int i, j;
michael@0 126
michael@0 127 *sse = 0;
michael@0 128 *sum = 0;
michael@0 129
michael@0 130 for (i = 0; i < h; i += block_size) {
michael@0 131 for (j = 0; j < w; j += block_size) {
michael@0 132 var_fn(src_ptr + source_stride * i + j, source_stride,
michael@0 133 ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0);
michael@0 134 *sse += sse0;
michael@0 135 *sum += sum0;
michael@0 136 }
michael@0 137 }
michael@0 138 }
michael@0 139
michael@0 140 unsigned int vp9_variance4x4_sse2(
michael@0 141 const unsigned char *src_ptr,
michael@0 142 int source_stride,
michael@0 143 const unsigned char *ref_ptr,
michael@0 144 int recon_stride,
michael@0 145 unsigned int *sse) {
michael@0 146 unsigned int var;
michael@0 147 int avg;
michael@0 148
michael@0 149 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4,
michael@0 150 &var, &avg, vp9_get4x4var_mmx, 4);
michael@0 151 *sse = var;
michael@0 152 return (var - (((unsigned int)avg * avg) >> 4));
michael@0 153 }
michael@0 154
michael@0 155 unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr,
michael@0 156 int source_stride,
michael@0 157 const uint8_t *ref_ptr,
michael@0 158 int recon_stride,
michael@0 159 unsigned int *sse) {
michael@0 160 unsigned int var;
michael@0 161 int avg;
michael@0 162
michael@0 163 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4,
michael@0 164 &var, &avg, vp9_get4x4var_mmx, 4);
michael@0 165 *sse = var;
michael@0 166 return (var - (((unsigned int)avg * avg) >> 5));
michael@0 167 }
michael@0 168
michael@0 169 unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr,
michael@0 170 int source_stride,
michael@0 171 const uint8_t *ref_ptr,
michael@0 172 int recon_stride,
michael@0 173 unsigned int *sse) {
michael@0 174 unsigned int var;
michael@0 175 int avg;
michael@0 176
michael@0 177 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8,
michael@0 178 &var, &avg, vp9_get4x4var_mmx, 4);
michael@0 179 *sse = var;
michael@0 180 return (var - (((unsigned int)avg * avg) >> 5));
michael@0 181 }
michael@0 182
michael@0 183 unsigned int vp9_variance8x8_sse2
michael@0 184 (
michael@0 185 const unsigned char *src_ptr,
michael@0 186 int source_stride,
michael@0 187 const unsigned char *ref_ptr,
michael@0 188 int recon_stride,
michael@0 189 unsigned int *sse) {
michael@0 190 unsigned int var;
michael@0 191 int avg;
michael@0 192
michael@0 193 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8,
michael@0 194 &var, &avg, vp9_get8x8var_sse2, 8);
michael@0 195 *sse = var;
michael@0 196 return (var - (((unsigned int)avg * avg) >> 6));
michael@0 197 }
michael@0 198
michael@0 199 unsigned int vp9_variance16x8_sse2
michael@0 200 (
michael@0 201 const unsigned char *src_ptr,
michael@0 202 int source_stride,
michael@0 203 const unsigned char *ref_ptr,
michael@0 204 int recon_stride,
michael@0 205 unsigned int *sse) {
michael@0 206 unsigned int var;
michael@0 207 int avg;
michael@0 208
michael@0 209 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8,
michael@0 210 &var, &avg, vp9_get8x8var_sse2, 8);
michael@0 211 *sse = var;
michael@0 212 return (var - (((unsigned int)avg * avg) >> 7));
michael@0 213 }
michael@0 214
michael@0 215 unsigned int vp9_variance8x16_sse2
michael@0 216 (
michael@0 217 const unsigned char *src_ptr,
michael@0 218 int source_stride,
michael@0 219 const unsigned char *ref_ptr,
michael@0 220 int recon_stride,
michael@0 221 unsigned int *sse) {
michael@0 222 unsigned int var;
michael@0 223 int avg;
michael@0 224
michael@0 225 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16,
michael@0 226 &var, &avg, vp9_get8x8var_sse2, 8);
michael@0 227 *sse = var;
michael@0 228 return (var - (((unsigned int)avg * avg) >> 7));
michael@0 229 }
michael@0 230
michael@0 231 unsigned int vp9_variance16x16_sse2
michael@0 232 (
michael@0 233 const unsigned char *src_ptr,
michael@0 234 int source_stride,
michael@0 235 const unsigned char *ref_ptr,
michael@0 236 int recon_stride,
michael@0 237 unsigned int *sse) {
michael@0 238 unsigned int var;
michael@0 239 int avg;
michael@0 240
michael@0 241 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16,
michael@0 242 &var, &avg, vp9_get16x16var_sse2, 16);
michael@0 243 *sse = var;
michael@0 244 return (var - (((unsigned int)avg * avg) >> 8));
michael@0 245 }
michael@0 246
michael@0 247 unsigned int vp9_mse16x16_sse2(
michael@0 248 const unsigned char *src_ptr,
michael@0 249 int source_stride,
michael@0 250 const unsigned char *ref_ptr,
michael@0 251 int recon_stride,
michael@0 252 unsigned int *sse) {
michael@0 253 unsigned int sse0;
michael@0 254 int sum0;
michael@0 255 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
michael@0 256 &sum0);
michael@0 257 *sse = sse0;
michael@0 258 return sse0;
michael@0 259 }
michael@0 260
michael@0 261 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr,
michael@0 262 int source_stride,
michael@0 263 const uint8_t *ref_ptr,
michael@0 264 int recon_stride,
michael@0 265 unsigned int *sse) {
michael@0 266 unsigned int var;
michael@0 267 int avg;
michael@0 268
michael@0 269 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32,
michael@0 270 &var, &avg, vp9_get16x16var_sse2, 16);
michael@0 271 *sse = var;
michael@0 272 return (var - (((int64_t)avg * avg) >> 10));
michael@0 273 }
michael@0 274
michael@0 275 unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr,
michael@0 276 int source_stride,
michael@0 277 const uint8_t *ref_ptr,
michael@0 278 int recon_stride,
michael@0 279 unsigned int *sse) {
michael@0 280 unsigned int var;
michael@0 281 int avg;
michael@0 282
michael@0 283 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16,
michael@0 284 &var, &avg, vp9_get16x16var_sse2, 16);
michael@0 285 *sse = var;
michael@0 286 return (var - (((int64_t)avg * avg) >> 9));
michael@0 287 }
michael@0 288
michael@0 289 unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr,
michael@0 290 int source_stride,
michael@0 291 const uint8_t *ref_ptr,
michael@0 292 int recon_stride,
michael@0 293 unsigned int *sse) {
michael@0 294 unsigned int var;
michael@0 295 int avg;
michael@0 296
michael@0 297 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32,
michael@0 298 &var, &avg, vp9_get16x16var_sse2, 16);
michael@0 299 *sse = var;
michael@0 300 return (var - (((int64_t)avg * avg) >> 9));
michael@0 301 }
michael@0 302
michael@0 303 unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr,
michael@0 304 int source_stride,
michael@0 305 const uint8_t *ref_ptr,
michael@0 306 int recon_stride,
michael@0 307 unsigned int *sse) {
michael@0 308 unsigned int var;
michael@0 309 int avg;
michael@0 310
michael@0 311 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64,
michael@0 312 &var, &avg, vp9_get16x16var_sse2, 16);
michael@0 313 *sse = var;
michael@0 314 return (var - (((int64_t)avg * avg) >> 12));
michael@0 315 }
michael@0 316
michael@0 317 unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr,
michael@0 318 int source_stride,
michael@0 319 const uint8_t *ref_ptr,
michael@0 320 int recon_stride,
michael@0 321 unsigned int *sse) {
michael@0 322 unsigned int var;
michael@0 323 int avg;
michael@0 324
michael@0 325 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32,
michael@0 326 &var, &avg, vp9_get16x16var_sse2, 16);
michael@0 327 *sse = var;
michael@0 328 return (var - (((int64_t)avg * avg) >> 11));
michael@0 329 }
michael@0 330
michael@0 331 unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr,
michael@0 332 int source_stride,
michael@0 333 const uint8_t *ref_ptr,
michael@0 334 int recon_stride,
michael@0 335 unsigned int *sse) {
michael@0 336 unsigned int var;
michael@0 337 int avg;
michael@0 338
michael@0 339 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64,
michael@0 340 &var, &avg, vp9_get16x16var_sse2, 16);
michael@0 341 *sse = var;
michael@0 342 return (var - (((int64_t)avg * avg) >> 11));
michael@0 343 }
michael@0 344
michael@0 345 #define DECL(w, opt) \
michael@0 346 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \
michael@0 347 ptrdiff_t src_stride, \
michael@0 348 int x_offset, int y_offset, \
michael@0 349 const uint8_t *dst, \
michael@0 350 ptrdiff_t dst_stride, \
michael@0 351 int height, unsigned int *sse)
michael@0 352 #define DECLS(opt1, opt2) \
michael@0 353 DECL(4, opt2); \
michael@0 354 DECL(8, opt1); \
michael@0 355 DECL(16, opt1)
michael@0 356
michael@0 357 DECLS(sse2, sse);
michael@0 358 DECLS(ssse3, ssse3);
michael@0 359 #undef DECLS
michael@0 360 #undef DECL
michael@0 361
michael@0 362 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \
michael@0 363 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \
michael@0 364 int src_stride, \
michael@0 365 int x_offset, \
michael@0 366 int y_offset, \
michael@0 367 const uint8_t *dst, \
michael@0 368 int dst_stride, \
michael@0 369 unsigned int *sse_ptr) { \
michael@0 370 unsigned int sse; \
michael@0 371 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \
michael@0 372 y_offset, dst, dst_stride, \
michael@0 373 h, &sse); \
michael@0 374 if (w > wf) { \
michael@0 375 unsigned int sse2; \
michael@0 376 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \
michael@0 377 x_offset, y_offset, \
michael@0 378 dst + 16, dst_stride, \
michael@0 379 h, &sse2); \
michael@0 380 se += se2; \
michael@0 381 sse += sse2; \
michael@0 382 if (w > wf * 2) { \
michael@0 383 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
michael@0 384 x_offset, y_offset, \
michael@0 385 dst + 32, dst_stride, \
michael@0 386 h, &sse2); \
michael@0 387 se += se2; \
michael@0 388 sse += sse2; \
michael@0 389 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
michael@0 390 x_offset, y_offset, \
michael@0 391 dst + 48, dst_stride, \
michael@0 392 h, &sse2); \
michael@0 393 se += se2; \
michael@0 394 sse += sse2; \
michael@0 395 } \
michael@0 396 } \
michael@0 397 *sse_ptr = sse; \
michael@0 398 return sse - ((cast se * se) >> (wlog2 + hlog2)); \
michael@0 399 }
michael@0 400
michael@0 401 #define FNS(opt1, opt2) \
michael@0 402 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
michael@0 403 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
michael@0 404 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
michael@0 405 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
michael@0 406 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
michael@0 407 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
michael@0 408 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \
michael@0 409 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \
michael@0 410 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \
michael@0 411 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
michael@0 412 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
michael@0 413 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
michael@0 414 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
michael@0 415
michael@0 416 FNS(sse2, sse);
michael@0 417 FNS(ssse3, ssse3);
michael@0 418
michael@0 419 #undef FNS
michael@0 420 #undef FN
michael@0 421
michael@0 422 #define DECL(w, opt) \
michael@0 423 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \
michael@0 424 ptrdiff_t src_stride, \
michael@0 425 int x_offset, int y_offset, \
michael@0 426 const uint8_t *dst, \
michael@0 427 ptrdiff_t dst_stride, \
michael@0 428 const uint8_t *sec, \
michael@0 429 ptrdiff_t sec_stride, \
michael@0 430 int height, unsigned int *sse)
michael@0 431 #define DECLS(opt1, opt2) \
michael@0 432 DECL(4, opt2); \
michael@0 433 DECL(8, opt1); \
michael@0 434 DECL(16, opt1)
michael@0 435
michael@0 436 DECLS(sse2, sse);
michael@0 437 DECLS(ssse3, ssse3);
michael@0 438 #undef DECL
michael@0 439 #undef DECLS
michael@0 440
michael@0 441 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \
michael@0 442 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \
michael@0 443 int src_stride, \
michael@0 444 int x_offset, \
michael@0 445 int y_offset, \
michael@0 446 const uint8_t *dst, \
michael@0 447 int dst_stride, \
michael@0 448 unsigned int *sseptr, \
michael@0 449 const uint8_t *sec) { \
michael@0 450 unsigned int sse; \
michael@0 451 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \
michael@0 452 y_offset, dst, dst_stride, \
michael@0 453 sec, w, h, &sse); \
michael@0 454 if (w > wf) { \
michael@0 455 unsigned int sse2; \
michael@0 456 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \
michael@0 457 x_offset, y_offset, \
michael@0 458 dst + 16, dst_stride, \
michael@0 459 sec + 16, w, h, &sse2); \
michael@0 460 se += se2; \
michael@0 461 sse += sse2; \
michael@0 462 if (w > wf * 2) { \
michael@0 463 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \
michael@0 464 x_offset, y_offset, \
michael@0 465 dst + 32, dst_stride, \
michael@0 466 sec + 32, w, h, &sse2); \
michael@0 467 se += se2; \
michael@0 468 sse += sse2; \
michael@0 469 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \
michael@0 470 x_offset, y_offset, \
michael@0 471 dst + 48, dst_stride, \
michael@0 472 sec + 48, w, h, &sse2); \
michael@0 473 se += se2; \
michael@0 474 sse += sse2; \
michael@0 475 } \
michael@0 476 } \
michael@0 477 *sseptr = sse; \
michael@0 478 return sse - ((cast se * se) >> (wlog2 + hlog2)); \
michael@0 479 }
michael@0 480
michael@0 481 #define FNS(opt1, opt2) \
michael@0 482 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
michael@0 483 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
michael@0 484 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
michael@0 485 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
michael@0 486 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
michael@0 487 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
michael@0 488 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \
michael@0 489 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \
michael@0 490 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \
michael@0 491 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
michael@0 492 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
michael@0 493 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
michael@0 494 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
michael@0 495
michael@0 496 FNS(sse2, sse);
michael@0 497 FNS(ssse3, ssse3);
michael@0 498
michael@0 499 #undef FNS
michael@0 500 #undef FN
michael@0 501
michael@0 502 unsigned int vp9_variance_halfpixvar16x16_h_sse2(
michael@0 503 const unsigned char *src_ptr,
michael@0 504 int src_pixels_per_line,
michael@0 505 const unsigned char *dst_ptr,
michael@0 506 int dst_pixels_per_line,
michael@0 507 unsigned int *sse) {
michael@0 508 int xsum0;
michael@0 509 unsigned int xxsum0;
michael@0 510
michael@0 511 vp9_half_horiz_variance16x_h_sse2(
michael@0 512 src_ptr, src_pixels_per_line,
michael@0 513 dst_ptr, dst_pixels_per_line, 16,
michael@0 514 &xsum0, &xxsum0);
michael@0 515
michael@0 516 *sse = xxsum0;
michael@0 517 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
michael@0 518 }
michael@0 519
michael@0 520
michael@0 521 unsigned int vp9_variance_halfpixvar16x16_v_sse2(
michael@0 522 const unsigned char *src_ptr,
michael@0 523 int src_pixels_per_line,
michael@0 524 const unsigned char *dst_ptr,
michael@0 525 int dst_pixels_per_line,
michael@0 526 unsigned int *sse) {
michael@0 527 int xsum0;
michael@0 528 unsigned int xxsum0;
michael@0 529 vp9_half_vert_variance16x_h_sse2(
michael@0 530 src_ptr, src_pixels_per_line,
michael@0 531 dst_ptr, dst_pixels_per_line, 16,
michael@0 532 &xsum0, &xxsum0);
michael@0 533
michael@0 534 *sse = xxsum0;
michael@0 535 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
michael@0 536 }
michael@0 537
michael@0 538
michael@0 539 unsigned int vp9_variance_halfpixvar16x16_hv_sse2(
michael@0 540 const unsigned char *src_ptr,
michael@0 541 int src_pixels_per_line,
michael@0 542 const unsigned char *dst_ptr,
michael@0 543 int dst_pixels_per_line,
michael@0 544 unsigned int *sse) {
michael@0 545 int xsum0;
michael@0 546 unsigned int xxsum0;
michael@0 547
michael@0 548 vp9_half_horiz_vert_variance16x_h_sse2(
michael@0 549 src_ptr, src_pixels_per_line,
michael@0 550 dst_ptr, dst_pixels_per_line, 16,
michael@0 551 &xsum0, &xxsum0);
michael@0 552
michael@0 553 *sse = xxsum0;
michael@0 554 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
michael@0 555 }

mercurial