media/libyuv/source/compare.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libyuv/source/compare.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,325 @@
     1.4 +/*
     1.5 + *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS. All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +#include "libyuv/compare.h"
    1.15 +
    1.16 +#include <float.h>
    1.17 +#include <math.h>
    1.18 +#ifdef _OPENMP
    1.19 +#include <omp.h>
    1.20 +#endif
    1.21 +
    1.22 +#include "libyuv/basic_types.h"
    1.23 +#include "libyuv/cpu_id.h"
    1.24 +#include "libyuv/row.h"
    1.25 +
    1.26 +#ifdef __cplusplus
    1.27 +namespace libyuv {
    1.28 +extern "C" {
    1.29 +#endif
    1.30 +
    1.31 +// hash seed of 5381 recommended.
    1.32 +// Internal C version of HashDjb2 with int sized count for efficiency.
    1.33 +uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
    1.34 +
    1.35 +// This module is for Visual C x86
    1.36 +#if !defined(LIBYUV_DISABLE_X86) && \
    1.37 +    (defined(_M_IX86) || \
    1.38 +    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
    1.39 +#define HAS_HASHDJB2_SSE41
    1.40 +uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
    1.41 +
    1.42 +#if _MSC_VER >= 1700
    1.43 +#define HAS_HASHDJB2_AVX2
    1.44 +uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
    1.45 +#endif
    1.46 +
    1.47 +#endif  // HAS_HASHDJB2_SSE41
    1.48 +
    1.49 +// hash seed of 5381 recommended.
    1.50 +LIBYUV_API
    1.51 +uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
    1.52 +  const int kBlockSize = 1 << 15;  // 32768;
    1.53 +  int remainder;
    1.54 +  uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
    1.55 +#if defined(HAS_HASHDJB2_SSE41)
    1.56 +  if (TestCpuFlag(kCpuHasSSE41)) {
    1.57 +    HashDjb2_SSE = HashDjb2_SSE41;
    1.58 +  }
    1.59 +#endif
    1.60 +#if defined(HAS_HASHDJB2_AVX2)
    1.61 +  if (TestCpuFlag(kCpuHasAVX2)) {
    1.62 +    HashDjb2_SSE = HashDjb2_AVX2;
    1.63 +  }
    1.64 +#endif
    1.65 +
    1.66 +  while (count >= (uint64)(kBlockSize)) {
    1.67 +    seed = HashDjb2_SSE(src, kBlockSize, seed);
    1.68 +    src += kBlockSize;
    1.69 +    count -= kBlockSize;
    1.70 +  }
    1.71 +  remainder = (int)(count) & ~15;
    1.72 +  if (remainder) {
    1.73 +    seed = HashDjb2_SSE(src, remainder, seed);
    1.74 +    src += remainder;
    1.75 +    count -= remainder;
    1.76 +  }
    1.77 +  remainder = (int)(count) & 15;
    1.78 +  if (remainder) {
    1.79 +    seed = HashDjb2_C(src, remainder, seed);
    1.80 +  }
    1.81 +  return seed;
    1.82 +}
    1.83 +
    1.84 +uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
    1.85 +#if !defined(LIBYUV_DISABLE_NEON) && \
    1.86 +    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
    1.87 +#define HAS_SUMSQUAREERROR_NEON
    1.88 +uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
    1.89 +#endif
    1.90 +#if !defined(LIBYUV_DISABLE_X86) && \
    1.91 +    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
    1.92 +#define HAS_SUMSQUAREERROR_SSE2
    1.93 +uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
    1.94 +#endif
    1.95 +// Visual C 2012 required for AVX2.
    1.96 +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
    1.97 +#define HAS_SUMSQUAREERROR_AVX2
    1.98 +uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
    1.99 +#endif
   1.100 +
   1.101 +// TODO(fbarchard): Refactor into row function.
   1.102 +LIBYUV_API
   1.103 +uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
   1.104 +                             int count) {
   1.105 +  // SumSquareError returns values 0 to 65535 for each squared difference.
   1.106 +  // Up to 65536 of those can be summed and remain within a uint32.
   1.107 +  // After each block of 65536 pixels, accumulate into a uint64.
   1.108 +  const int kBlockSize = 65536;
   1.109 +  int remainder = count & (kBlockSize - 1) & ~31;
   1.110 +  uint64 sse = 0;
   1.111 +  int i;
   1.112 +  uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
   1.113 +      SumSquareError_C;
   1.114 +#if defined(HAS_SUMSQUAREERROR_NEON)
   1.115 +  if (TestCpuFlag(kCpuHasNEON)) {
   1.116 +    SumSquareError = SumSquareError_NEON;
   1.117 +  }
   1.118 +#endif
   1.119 +#if defined(HAS_SUMSQUAREERROR_SSE2)
   1.120 +  if (TestCpuFlag(kCpuHasSSE2) &&
   1.121 +      IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
   1.122 +    // Note only used for multiples of 16 so count is not checked.
   1.123 +    SumSquareError = SumSquareError_SSE2;
   1.124 +  }
   1.125 +#endif
   1.126 +#if defined(HAS_SUMSQUAREERROR_AVX2)
   1.127 +  if (TestCpuFlag(kCpuHasAVX2)) {
   1.128 +    // Note only used for multiples of 32 so count is not checked.
   1.129 +    SumSquareError = SumSquareError_AVX2;
   1.130 +  }
   1.131 +#endif
   1.132 +#ifdef _OPENMP
   1.133 +#pragma omp parallel for reduction(+: sse)
   1.134 +#endif
   1.135 +  for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
   1.136 +    sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
   1.137 +  }
   1.138 +  src_a += count & ~(kBlockSize - 1);
   1.139 +  src_b += count & ~(kBlockSize - 1);
   1.140 +  if (remainder) {
   1.141 +    sse += SumSquareError(src_a, src_b, remainder);
   1.142 +    src_a += remainder;
   1.143 +    src_b += remainder;
   1.144 +  }
   1.145 +  remainder = count & 31;
   1.146 +  if (remainder) {
   1.147 +    sse += SumSquareError_C(src_a, src_b, remainder);
   1.148 +  }
   1.149 +  return sse;
   1.150 +}
   1.151 +
   1.152 +LIBYUV_API
   1.153 +uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
   1.154 +                                  const uint8* src_b, int stride_b,
   1.155 +                                  int width, int height) {
   1.156 +  uint64 sse = 0;
   1.157 +  int h;
   1.158 +  // Coalesce rows.
   1.159 +  if (stride_a == width &&
   1.160 +      stride_b == width) {
   1.161 +    width *= height;
   1.162 +    height = 1;
   1.163 +    stride_a = stride_b = 0;
   1.164 +  }
   1.165 +  for (h = 0; h < height; ++h) {
   1.166 +    sse += ComputeSumSquareError(src_a, src_b, width);
   1.167 +    src_a += stride_a;
   1.168 +    src_b += stride_b;
   1.169 +  }
   1.170 +  return sse;
   1.171 +}
   1.172 +
   1.173 +LIBYUV_API
   1.174 +double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
   1.175 +  double psnr;
   1.176 +  if (sse > 0) {
   1.177 +    double mse = (double)(count) / (double)(sse);
   1.178 +    psnr = 10.0 * log10(255.0 * 255.0 * mse);
   1.179 +  } else {
   1.180 +    psnr = kMaxPsnr;      // Limit to prevent divide by 0
   1.181 +  }
   1.182 +
   1.183 +  if (psnr > kMaxPsnr)
   1.184 +    psnr = kMaxPsnr;
   1.185 +
   1.186 +  return psnr;
   1.187 +}
   1.188 +
   1.189 +LIBYUV_API
   1.190 +double CalcFramePsnr(const uint8* src_a, int stride_a,
   1.191 +                     const uint8* src_b, int stride_b,
   1.192 +                     int width, int height) {
   1.193 +  const uint64 samples = width * height;
   1.194 +  const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
   1.195 +                                                src_b, stride_b,
   1.196 +                                                width, height);
   1.197 +  return SumSquareErrorToPsnr(sse, samples);
   1.198 +}
   1.199 +
   1.200 +LIBYUV_API
   1.201 +double I420Psnr(const uint8* src_y_a, int stride_y_a,
   1.202 +                const uint8* src_u_a, int stride_u_a,
   1.203 +                const uint8* src_v_a, int stride_v_a,
   1.204 +                const uint8* src_y_b, int stride_y_b,
   1.205 +                const uint8* src_u_b, int stride_u_b,
   1.206 +                const uint8* src_v_b, int stride_v_b,
   1.207 +                int width, int height) {
   1.208 +  const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
   1.209 +                                                  src_y_b, stride_y_b,
   1.210 +                                                  width, height);
   1.211 +  const int width_uv = (width + 1) >> 1;
   1.212 +  const int height_uv = (height + 1) >> 1;
   1.213 +  const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
   1.214 +                                                  src_u_b, stride_u_b,
   1.215 +                                                  width_uv, height_uv);
   1.216 +  const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
   1.217 +                                                  src_v_b, stride_v_b,
   1.218 +                                                  width_uv, height_uv);
   1.219 +  const uint64 samples = width * height + 2 * (width_uv * height_uv);
   1.220 +  const uint64 sse = sse_y + sse_u + sse_v;
   1.221 +  return SumSquareErrorToPsnr(sse, samples);
   1.222 +}
   1.223 +
   1.224 +static const int64 cc1 =  26634;  // (64^2*(.01*255)^2
   1.225 +static const int64 cc2 = 239708;  // (64^2*(.03*255)^2
   1.226 +
   1.227 +static double Ssim8x8_C(const uint8* src_a, int stride_a,
   1.228 +                        const uint8* src_b, int stride_b) {
   1.229 +  int64 sum_a = 0;
   1.230 +  int64 sum_b = 0;
   1.231 +  int64 sum_sq_a = 0;
   1.232 +  int64 sum_sq_b = 0;
   1.233 +  int64 sum_axb = 0;
   1.234 +
   1.235 +  int i;
   1.236 +  for (i = 0; i < 8; ++i) {
   1.237 +    int j;
   1.238 +    for (j = 0; j < 8; ++j) {
   1.239 +      sum_a += src_a[j];
   1.240 +      sum_b += src_b[j];
   1.241 +      sum_sq_a += src_a[j] * src_a[j];
   1.242 +      sum_sq_b += src_b[j] * src_b[j];
   1.243 +      sum_axb += src_a[j] * src_b[j];
   1.244 +    }
   1.245 +
   1.246 +    src_a += stride_a;
   1.247 +    src_b += stride_b;
   1.248 +  }
   1.249 +
   1.250 +  {
   1.251 +    const int64 count = 64;
   1.252 +    // scale the constants by number of pixels
   1.253 +    const int64 c1 = (cc1 * count * count) >> 12;
   1.254 +    const int64 c2 = (cc2 * count * count) >> 12;
   1.255 +
   1.256 +    const int64 sum_a_x_sum_b = sum_a * sum_b;
   1.257 +
   1.258 +    const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
   1.259 +                         (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
   1.260 +
   1.261 +    const int64 sum_a_sq = sum_a*sum_a;
   1.262 +    const int64 sum_b_sq = sum_b*sum_b;
   1.263 +
   1.264 +    const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
   1.265 +                         (count * sum_sq_a - sum_a_sq +
   1.266 +                          count * sum_sq_b - sum_b_sq + c2);
   1.267 +
   1.268 +    if (ssim_d == 0.0) {
   1.269 +      return DBL_MAX;
   1.270 +    }
   1.271 +    return ssim_n * 1.0 / ssim_d;
   1.272 +  }
   1.273 +}
   1.274 +
   1.275 +// We are using a 8x8 moving window with starting location of each 8x8 window
   1.276 +// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
   1.277 +// block boundaries to penalize blocking artifacts.
   1.278 +LIBYUV_API
   1.279 +double CalcFrameSsim(const uint8* src_a, int stride_a,
   1.280 +                     const uint8* src_b, int stride_b,
   1.281 +                     int width, int height) {
   1.282 +  int samples = 0;
   1.283 +  double ssim_total = 0;
   1.284 +  double (*Ssim8x8)(const uint8* src_a, int stride_a,
   1.285 +                    const uint8* src_b, int stride_b) = Ssim8x8_C;
   1.286 +
   1.287 +  // sample point start with each 4x4 location
   1.288 +  int i;
   1.289 +  for (i = 0; i < height - 8; i += 4) {
   1.290 +    int j;
   1.291 +    for (j = 0; j < width - 8; j += 4) {
   1.292 +      ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
   1.293 +      samples++;
   1.294 +    }
   1.295 +
   1.296 +    src_a += stride_a * 4;
   1.297 +    src_b += stride_b * 4;
   1.298 +  }
   1.299 +
   1.300 +  ssim_total /= samples;
   1.301 +  return ssim_total;
   1.302 +}
   1.303 +
   1.304 +LIBYUV_API
   1.305 +double I420Ssim(const uint8* src_y_a, int stride_y_a,
   1.306 +                const uint8* src_u_a, int stride_u_a,
   1.307 +                const uint8* src_v_a, int stride_v_a,
   1.308 +                const uint8* src_y_b, int stride_y_b,
   1.309 +                const uint8* src_u_b, int stride_u_b,
   1.310 +                const uint8* src_v_b, int stride_v_b,
   1.311 +                int width, int height) {
   1.312 +  const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
   1.313 +                                      src_y_b, stride_y_b, width, height);
   1.314 +  const int width_uv = (width + 1) >> 1;
   1.315 +  const int height_uv = (height + 1) >> 1;
   1.316 +  const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
   1.317 +                                      src_u_b, stride_u_b,
   1.318 +                                      width_uv, height_uv);
   1.319 +  const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
   1.320 +                                      src_v_b, stride_v_b,
   1.321 +                                      width_uv, height_uv);
   1.322 +  return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
   1.323 +}
   1.324 +
   1.325 +#ifdef __cplusplus
   1.326 +}  // extern "C"
   1.327 +}  // namespace libyuv
   1.328 +#endif

mercurial