media/libyuv/source/compare.cc

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /*
michael@0 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "libyuv/compare.h"
michael@0 12
michael@0 13 #include <float.h>
michael@0 14 #include <math.h>
michael@0 15 #ifdef _OPENMP
michael@0 16 #include <omp.h>
michael@0 17 #endif
michael@0 18
michael@0 19 #include "libyuv/basic_types.h"
michael@0 20 #include "libyuv/cpu_id.h"
michael@0 21 #include "libyuv/row.h"
michael@0 22
michael@0 23 #ifdef __cplusplus
michael@0 24 namespace libyuv {
michael@0 25 extern "C" {
michael@0 26 #endif
michael@0 27
michael@0 28 // hash seed of 5381 recommended.
michael@0 29 // Internal C version of HashDjb2 with int sized count for efficiency.
michael@0 30 uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
michael@0 31
michael@0 32 // This module is for Visual C x86
michael@0 33 #if !defined(LIBYUV_DISABLE_X86) && \
michael@0 34 (defined(_M_IX86) || \
michael@0 35 (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
michael@0 36 #define HAS_HASHDJB2_SSE41
michael@0 37 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
michael@0 38
michael@0 39 #if _MSC_VER >= 1700
michael@0 40 #define HAS_HASHDJB2_AVX2
michael@0 41 uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
michael@0 42 #endif
michael@0 43
michael@0 44 #endif // HAS_HASHDJB2_SSE41
michael@0 45
michael@0 46 // hash seed of 5381 recommended.
michael@0 47 LIBYUV_API
michael@0 48 uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
michael@0 49 const int kBlockSize = 1 << 15; // 32768;
michael@0 50 int remainder;
michael@0 51 uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
michael@0 52 #if defined(HAS_HASHDJB2_SSE41)
michael@0 53 if (TestCpuFlag(kCpuHasSSE41)) {
michael@0 54 HashDjb2_SSE = HashDjb2_SSE41;
michael@0 55 }
michael@0 56 #endif
michael@0 57 #if defined(HAS_HASHDJB2_AVX2)
michael@0 58 if (TestCpuFlag(kCpuHasAVX2)) {
michael@0 59 HashDjb2_SSE = HashDjb2_AVX2;
michael@0 60 }
michael@0 61 #endif
michael@0 62
michael@0 63 while (count >= (uint64)(kBlockSize)) {
michael@0 64 seed = HashDjb2_SSE(src, kBlockSize, seed);
michael@0 65 src += kBlockSize;
michael@0 66 count -= kBlockSize;
michael@0 67 }
michael@0 68 remainder = (int)(count) & ~15;
michael@0 69 if (remainder) {
michael@0 70 seed = HashDjb2_SSE(src, remainder, seed);
michael@0 71 src += remainder;
michael@0 72 count -= remainder;
michael@0 73 }
michael@0 74 remainder = (int)(count) & 15;
michael@0 75 if (remainder) {
michael@0 76 seed = HashDjb2_C(src, remainder, seed);
michael@0 77 }
michael@0 78 return seed;
michael@0 79 }
michael@0 80
michael@0 81 uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
michael@0 82 #if !defined(LIBYUV_DISABLE_NEON) && \
michael@0 83 (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
michael@0 84 #define HAS_SUMSQUAREERROR_NEON
michael@0 85 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
michael@0 86 #endif
michael@0 87 #if !defined(LIBYUV_DISABLE_X86) && \
michael@0 88 (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
michael@0 89 #define HAS_SUMSQUAREERROR_SSE2
michael@0 90 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
michael@0 91 #endif
michael@0 92 // Visual C 2012 required for AVX2.
michael@0 93 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
michael@0 94 #define HAS_SUMSQUAREERROR_AVX2
michael@0 95 uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
michael@0 96 #endif
michael@0 97
michael@0 98 // TODO(fbarchard): Refactor into row function.
michael@0 99 LIBYUV_API
michael@0 100 uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
michael@0 101 int count) {
michael@0 102 // SumSquareError returns values 0 to 65535 for each squared difference.
michael@0 103 // Up to 65536 of those can be summed and remain within a uint32.
michael@0 104 // After each block of 65536 pixels, accumulate into a uint64.
michael@0 105 const int kBlockSize = 65536;
michael@0 106 int remainder = count & (kBlockSize - 1) & ~31;
michael@0 107 uint64 sse = 0;
michael@0 108 int i;
michael@0 109 uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
michael@0 110 SumSquareError_C;
michael@0 111 #if defined(HAS_SUMSQUAREERROR_NEON)
michael@0 112 if (TestCpuFlag(kCpuHasNEON)) {
michael@0 113 SumSquareError = SumSquareError_NEON;
michael@0 114 }
michael@0 115 #endif
michael@0 116 #if defined(HAS_SUMSQUAREERROR_SSE2)
michael@0 117 if (TestCpuFlag(kCpuHasSSE2) &&
michael@0 118 IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
michael@0 119 // Note only used for multiples of 16 so count is not checked.
michael@0 120 SumSquareError = SumSquareError_SSE2;
michael@0 121 }
michael@0 122 #endif
michael@0 123 #if defined(HAS_SUMSQUAREERROR_AVX2)
michael@0 124 if (TestCpuFlag(kCpuHasAVX2)) {
michael@0 125 // Note only used for multiples of 32 so count is not checked.
michael@0 126 SumSquareError = SumSquareError_AVX2;
michael@0 127 }
michael@0 128 #endif
michael@0 129 #ifdef _OPENMP
michael@0 130 #pragma omp parallel for reduction(+: sse)
michael@0 131 #endif
michael@0 132 for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
michael@0 133 sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
michael@0 134 }
michael@0 135 src_a += count & ~(kBlockSize - 1);
michael@0 136 src_b += count & ~(kBlockSize - 1);
michael@0 137 if (remainder) {
michael@0 138 sse += SumSquareError(src_a, src_b, remainder);
michael@0 139 src_a += remainder;
michael@0 140 src_b += remainder;
michael@0 141 }
michael@0 142 remainder = count & 31;
michael@0 143 if (remainder) {
michael@0 144 sse += SumSquareError_C(src_a, src_b, remainder);
michael@0 145 }
michael@0 146 return sse;
michael@0 147 }
michael@0 148
michael@0 149 LIBYUV_API
michael@0 150 uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
michael@0 151 const uint8* src_b, int stride_b,
michael@0 152 int width, int height) {
michael@0 153 uint64 sse = 0;
michael@0 154 int h;
michael@0 155 // Coalesce rows.
michael@0 156 if (stride_a == width &&
michael@0 157 stride_b == width) {
michael@0 158 width *= height;
michael@0 159 height = 1;
michael@0 160 stride_a = stride_b = 0;
michael@0 161 }
michael@0 162 for (h = 0; h < height; ++h) {
michael@0 163 sse += ComputeSumSquareError(src_a, src_b, width);
michael@0 164 src_a += stride_a;
michael@0 165 src_b += stride_b;
michael@0 166 }
michael@0 167 return sse;
michael@0 168 }
michael@0 169
michael@0 170 LIBYUV_API
michael@0 171 double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
michael@0 172 double psnr;
michael@0 173 if (sse > 0) {
michael@0 174 double mse = (double)(count) / (double)(sse);
michael@0 175 psnr = 10.0 * log10(255.0 * 255.0 * mse);
michael@0 176 } else {
michael@0 177 psnr = kMaxPsnr; // Limit to prevent divide by 0
michael@0 178 }
michael@0 179
michael@0 180 if (psnr > kMaxPsnr)
michael@0 181 psnr = kMaxPsnr;
michael@0 182
michael@0 183 return psnr;
michael@0 184 }
michael@0 185
michael@0 186 LIBYUV_API
michael@0 187 double CalcFramePsnr(const uint8* src_a, int stride_a,
michael@0 188 const uint8* src_b, int stride_b,
michael@0 189 int width, int height) {
michael@0 190 const uint64 samples = width * height;
michael@0 191 const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
michael@0 192 src_b, stride_b,
michael@0 193 width, height);
michael@0 194 return SumSquareErrorToPsnr(sse, samples);
michael@0 195 }
michael@0 196
michael@0 197 LIBYUV_API
michael@0 198 double I420Psnr(const uint8* src_y_a, int stride_y_a,
michael@0 199 const uint8* src_u_a, int stride_u_a,
michael@0 200 const uint8* src_v_a, int stride_v_a,
michael@0 201 const uint8* src_y_b, int stride_y_b,
michael@0 202 const uint8* src_u_b, int stride_u_b,
michael@0 203 const uint8* src_v_b, int stride_v_b,
michael@0 204 int width, int height) {
michael@0 205 const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
michael@0 206 src_y_b, stride_y_b,
michael@0 207 width, height);
michael@0 208 const int width_uv = (width + 1) >> 1;
michael@0 209 const int height_uv = (height + 1) >> 1;
michael@0 210 const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
michael@0 211 src_u_b, stride_u_b,
michael@0 212 width_uv, height_uv);
michael@0 213 const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
michael@0 214 src_v_b, stride_v_b,
michael@0 215 width_uv, height_uv);
michael@0 216 const uint64 samples = width * height + 2 * (width_uv * height_uv);
michael@0 217 const uint64 sse = sse_y + sse_u + sse_v;
michael@0 218 return SumSquareErrorToPsnr(sse, samples);
michael@0 219 }
michael@0 220
michael@0 221 static const int64 cc1 = 26634; // (64^2*(.01*255)^2
michael@0 222 static const int64 cc2 = 239708; // (64^2*(.03*255)^2
michael@0 223
michael@0 224 static double Ssim8x8_C(const uint8* src_a, int stride_a,
michael@0 225 const uint8* src_b, int stride_b) {
michael@0 226 int64 sum_a = 0;
michael@0 227 int64 sum_b = 0;
michael@0 228 int64 sum_sq_a = 0;
michael@0 229 int64 sum_sq_b = 0;
michael@0 230 int64 sum_axb = 0;
michael@0 231
michael@0 232 int i;
michael@0 233 for (i = 0; i < 8; ++i) {
michael@0 234 int j;
michael@0 235 for (j = 0; j < 8; ++j) {
michael@0 236 sum_a += src_a[j];
michael@0 237 sum_b += src_b[j];
michael@0 238 sum_sq_a += src_a[j] * src_a[j];
michael@0 239 sum_sq_b += src_b[j] * src_b[j];
michael@0 240 sum_axb += src_a[j] * src_b[j];
michael@0 241 }
michael@0 242
michael@0 243 src_a += stride_a;
michael@0 244 src_b += stride_b;
michael@0 245 }
michael@0 246
michael@0 247 {
michael@0 248 const int64 count = 64;
michael@0 249 // scale the constants by number of pixels
michael@0 250 const int64 c1 = (cc1 * count * count) >> 12;
michael@0 251 const int64 c2 = (cc2 * count * count) >> 12;
michael@0 252
michael@0 253 const int64 sum_a_x_sum_b = sum_a * sum_b;
michael@0 254
michael@0 255 const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
michael@0 256 (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
michael@0 257
michael@0 258 const int64 sum_a_sq = sum_a*sum_a;
michael@0 259 const int64 sum_b_sq = sum_b*sum_b;
michael@0 260
michael@0 261 const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
michael@0 262 (count * sum_sq_a - sum_a_sq +
michael@0 263 count * sum_sq_b - sum_b_sq + c2);
michael@0 264
michael@0 265 if (ssim_d == 0.0) {
michael@0 266 return DBL_MAX;
michael@0 267 }
michael@0 268 return ssim_n * 1.0 / ssim_d;
michael@0 269 }
michael@0 270 }
michael@0 271
michael@0 272 // We are using a 8x8 moving window with starting location of each 8x8 window
michael@0 273 // on the 4x4 pixel grid. Such arrangement allows the windows to overlap
michael@0 274 // block boundaries to penalize blocking artifacts.
michael@0 275 LIBYUV_API
michael@0 276 double CalcFrameSsim(const uint8* src_a, int stride_a,
michael@0 277 const uint8* src_b, int stride_b,
michael@0 278 int width, int height) {
michael@0 279 int samples = 0;
michael@0 280 double ssim_total = 0;
michael@0 281 double (*Ssim8x8)(const uint8* src_a, int stride_a,
michael@0 282 const uint8* src_b, int stride_b) = Ssim8x8_C;
michael@0 283
michael@0 284 // sample point start with each 4x4 location
michael@0 285 int i;
michael@0 286 for (i = 0; i < height - 8; i += 4) {
michael@0 287 int j;
michael@0 288 for (j = 0; j < width - 8; j += 4) {
michael@0 289 ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
michael@0 290 samples++;
michael@0 291 }
michael@0 292
michael@0 293 src_a += stride_a * 4;
michael@0 294 src_b += stride_b * 4;
michael@0 295 }
michael@0 296
michael@0 297 ssim_total /= samples;
michael@0 298 return ssim_total;
michael@0 299 }
michael@0 300
michael@0 301 LIBYUV_API
michael@0 302 double I420Ssim(const uint8* src_y_a, int stride_y_a,
michael@0 303 const uint8* src_u_a, int stride_u_a,
michael@0 304 const uint8* src_v_a, int stride_v_a,
michael@0 305 const uint8* src_y_b, int stride_y_b,
michael@0 306 const uint8* src_u_b, int stride_u_b,
michael@0 307 const uint8* src_v_b, int stride_v_b,
michael@0 308 int width, int height) {
michael@0 309 const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
michael@0 310 src_y_b, stride_y_b, width, height);
michael@0 311 const int width_uv = (width + 1) >> 1;
michael@0 312 const int height_uv = (height + 1) >> 1;
michael@0 313 const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
michael@0 314 src_u_b, stride_u_b,
michael@0 315 width_uv, height_uv);
michael@0 316 const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
michael@0 317 src_v_b, stride_v_b,
michael@0 318 width_uv, height_uv);
michael@0 319 return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
michael@0 320 }
michael@0 321
michael@0 322 #ifdef __cplusplus
michael@0 323 } // extern "C"
michael@0 324 } // namespace libyuv
michael@0 325 #endif

mercurial