1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libyuv/source/compare.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,325 @@ 1.4 +/* 1.5 + * Copyright 2011 The LibYuv Project Authors. All rights reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "libyuv/compare.h" 1.15 + 1.16 +#include <float.h> 1.17 +#include <math.h> 1.18 +#ifdef _OPENMP 1.19 +#include <omp.h> 1.20 +#endif 1.21 + 1.22 +#include "libyuv/basic_types.h" 1.23 +#include "libyuv/cpu_id.h" 1.24 +#include "libyuv/row.h" 1.25 + 1.26 +#ifdef __cplusplus 1.27 +namespace libyuv { 1.28 +extern "C" { 1.29 +#endif 1.30 + 1.31 +// hash seed of 5381 recommended. 1.32 +// Internal C version of HashDjb2 with int sized count for efficiency. 1.33 +uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); 1.34 + 1.35 +// This module is for Visual C x86 1.36 +#if !defined(LIBYUV_DISABLE_X86) && \ 1.37 + (defined(_M_IX86) || \ 1.38 + (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))) 1.39 +#define HAS_HASHDJB2_SSE41 1.40 +uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); 1.41 + 1.42 +#if _MSC_VER >= 1700 1.43 +#define HAS_HASHDJB2_AVX2 1.44 +uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); 1.45 +#endif 1.46 + 1.47 +#endif // HAS_HASHDJB2_SSE41 1.48 + 1.49 +// hash seed of 5381 recommended. 1.50 +LIBYUV_API 1.51 +uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { 1.52 + const int kBlockSize = 1 << 15; // 32768; 1.53 + int remainder; 1.54 + uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; 1.55 +#if defined(HAS_HASHDJB2_SSE41) 1.56 + if (TestCpuFlag(kCpuHasSSE41)) { 1.57 + HashDjb2_SSE = HashDjb2_SSE41; 1.58 + } 1.59 +#endif 1.60 +#if defined(HAS_HASHDJB2_AVX2) 1.61 + if (TestCpuFlag(kCpuHasAVX2)) { 1.62 + HashDjb2_SSE = HashDjb2_AVX2; 1.63 + } 1.64 +#endif 1.65 + 1.66 + while (count >= (uint64)(kBlockSize)) { 1.67 + seed = HashDjb2_SSE(src, kBlockSize, seed); 1.68 + src += kBlockSize; 1.69 + count -= kBlockSize; 1.70 + } 1.71 + remainder = (int)(count) & ~15; 1.72 + if (remainder) { 1.73 + seed = HashDjb2_SSE(src, remainder, seed); 1.74 + src += remainder; 1.75 + count -= remainder; 1.76 + } 1.77 + remainder = (int)(count) & 15; 1.78 + if (remainder) { 1.79 + seed = HashDjb2_C(src, remainder, seed); 1.80 + } 1.81 + return seed; 1.82 +} 1.83 + 1.84 +uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); 1.85 +#if !defined(LIBYUV_DISABLE_NEON) && \ 1.86 + (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) 1.87 +#define HAS_SUMSQUAREERROR_NEON 1.88 +uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); 1.89 +#endif 1.90 +#if !defined(LIBYUV_DISABLE_X86) && \ 1.91 + (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) 1.92 +#define HAS_SUMSQUAREERROR_SSE2 1.93 +uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); 1.94 +#endif 1.95 +// Visual C 2012 required for AVX2. 1.96 +#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700 1.97 +#define HAS_SUMSQUAREERROR_AVX2 1.98 +uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); 1.99 +#endif 1.100 + 1.101 +// TODO(fbarchard): Refactor into row function. 1.102 +LIBYUV_API 1.103 +uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, 1.104 + int count) { 1.105 + // SumSquareError returns values 0 to 65535 for each squared difference. 1.106 + // Up to 65536 of those can be summed and remain within a uint32. 1.107 + // After each block of 65536 pixels, accumulate into a uint64. 1.108 + const int kBlockSize = 65536; 1.109 + int remainder = count & (kBlockSize - 1) & ~31; 1.110 + uint64 sse = 0; 1.111 + int i; 1.112 + uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = 1.113 + SumSquareError_C; 1.114 +#if defined(HAS_SUMSQUAREERROR_NEON) 1.115 + if (TestCpuFlag(kCpuHasNEON)) { 1.116 + SumSquareError = SumSquareError_NEON; 1.117 + } 1.118 +#endif 1.119 +#if defined(HAS_SUMSQUAREERROR_SSE2) 1.120 + if (TestCpuFlag(kCpuHasSSE2) && 1.121 + IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) { 1.122 + // Note only used for multiples of 16 so count is not checked. 1.123 + SumSquareError = SumSquareError_SSE2; 1.124 + } 1.125 +#endif 1.126 +#if defined(HAS_SUMSQUAREERROR_AVX2) 1.127 + if (TestCpuFlag(kCpuHasAVX2)) { 1.128 + // Note only used for multiples of 32 so count is not checked. 1.129 + SumSquareError = SumSquareError_AVX2; 1.130 + } 1.131 +#endif 1.132 +#ifdef _OPENMP 1.133 +#pragma omp parallel for reduction(+: sse) 1.134 +#endif 1.135 + for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { 1.136 + sse += SumSquareError(src_a + i, src_b + i, kBlockSize); 1.137 + } 1.138 + src_a += count & ~(kBlockSize - 1); 1.139 + src_b += count & ~(kBlockSize - 1); 1.140 + if (remainder) { 1.141 + sse += SumSquareError(src_a, src_b, remainder); 1.142 + src_a += remainder; 1.143 + src_b += remainder; 1.144 + } 1.145 + remainder = count & 31; 1.146 + if (remainder) { 1.147 + sse += SumSquareError_C(src_a, src_b, remainder); 1.148 + } 1.149 + return sse; 1.150 +} 1.151 + 1.152 +LIBYUV_API 1.153 +uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, 1.154 + const uint8* src_b, int stride_b, 1.155 + int width, int height) { 1.156 + uint64 sse = 0; 1.157 + int h; 1.158 + // Coalesce rows. 1.159 + if (stride_a == width && 1.160 + stride_b == width) { 1.161 + width *= height; 1.162 + height = 1; 1.163 + stride_a = stride_b = 0; 1.164 + } 1.165 + for (h = 0; h < height; ++h) { 1.166 + sse += ComputeSumSquareError(src_a, src_b, width); 1.167 + src_a += stride_a; 1.168 + src_b += stride_b; 1.169 + } 1.170 + return sse; 1.171 +} 1.172 + 1.173 +LIBYUV_API 1.174 +double SumSquareErrorToPsnr(uint64 sse, uint64 count) { 1.175 + double psnr; 1.176 + if (sse > 0) { 1.177 + double mse = (double)(count) / (double)(sse); 1.178 + psnr = 10.0 * log10(255.0 * 255.0 * mse); 1.179 + } else { 1.180 + psnr = kMaxPsnr; // Limit to prevent divide by 0 1.181 + } 1.182 + 1.183 + if (psnr > kMaxPsnr) 1.184 + psnr = kMaxPsnr; 1.185 + 1.186 + return psnr; 1.187 +} 1.188 + 1.189 +LIBYUV_API 1.190 +double CalcFramePsnr(const uint8* src_a, int stride_a, 1.191 + const uint8* src_b, int stride_b, 1.192 + int width, int height) { 1.193 + const uint64 samples = width * height; 1.194 + const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, 1.195 + src_b, stride_b, 1.196 + width, height); 1.197 + return SumSquareErrorToPsnr(sse, samples); 1.198 +} 1.199 + 1.200 +LIBYUV_API 1.201 +double I420Psnr(const uint8* src_y_a, int stride_y_a, 1.202 + const uint8* src_u_a, int stride_u_a, 1.203 + const uint8* src_v_a, int stride_v_a, 1.204 + const uint8* src_y_b, int stride_y_b, 1.205 + const uint8* src_u_b, int stride_u_b, 1.206 + const uint8* src_v_b, int stride_v_b, 1.207 + int width, int height) { 1.208 + const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, 1.209 + src_y_b, stride_y_b, 1.210 + width, height); 1.211 + const int width_uv = (width + 1) >> 1; 1.212 + const int height_uv = (height + 1) >> 1; 1.213 + const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a, 1.214 + src_u_b, stride_u_b, 1.215 + width_uv, height_uv); 1.216 + const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a, 1.217 + src_v_b, stride_v_b, 1.218 + width_uv, height_uv); 1.219 + const uint64 samples = width * height + 2 * (width_uv * height_uv); 1.220 + const uint64 sse = sse_y + sse_u + sse_v; 1.221 + return SumSquareErrorToPsnr(sse, samples); 1.222 +} 1.223 + 1.224 +static const int64 cc1 = 26634; // (64^2*(.01*255)^2 1.225 +static const int64 cc2 = 239708; // (64^2*(.03*255)^2 1.226 + 1.227 +static double Ssim8x8_C(const uint8* src_a, int stride_a, 1.228 + const uint8* src_b, int stride_b) { 1.229 + int64 sum_a = 0; 1.230 + int64 sum_b = 0; 1.231 + int64 sum_sq_a = 0; 1.232 + int64 sum_sq_b = 0; 1.233 + int64 sum_axb = 0; 1.234 + 1.235 + int i; 1.236 + for (i = 0; i < 8; ++i) { 1.237 + int j; 1.238 + for (j = 0; j < 8; ++j) { 1.239 + sum_a += src_a[j]; 1.240 + sum_b += src_b[j]; 1.241 + sum_sq_a += src_a[j] * src_a[j]; 1.242 + sum_sq_b += src_b[j] * src_b[j]; 1.243 + sum_axb += src_a[j] * src_b[j]; 1.244 + } 1.245 + 1.246 + src_a += stride_a; 1.247 + src_b += stride_b; 1.248 + } 1.249 + 1.250 + { 1.251 + const int64 count = 64; 1.252 + // scale the constants by number of pixels 1.253 + const int64 c1 = (cc1 * count * count) >> 12; 1.254 + const int64 c2 = (cc2 * count * count) >> 12; 1.255 + 1.256 + const int64 sum_a_x_sum_b = sum_a * sum_b; 1.257 + 1.258 + const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * 1.259 + (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); 1.260 + 1.261 + const int64 sum_a_sq = sum_a*sum_a; 1.262 + const int64 sum_b_sq = sum_b*sum_b; 1.263 + 1.264 + const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) * 1.265 + (count * sum_sq_a - sum_a_sq + 1.266 + count * sum_sq_b - sum_b_sq + c2); 1.267 + 1.268 + if (ssim_d == 0.0) { 1.269 + return DBL_MAX; 1.270 + } 1.271 + return ssim_n * 1.0 / ssim_d; 1.272 + } 1.273 +} 1.274 + 1.275 +// We are using a 8x8 moving window with starting location of each 8x8 window 1.276 +// on the 4x4 pixel grid. Such arrangement allows the windows to overlap 1.277 +// block boundaries to penalize blocking artifacts. 1.278 +LIBYUV_API 1.279 +double CalcFrameSsim(const uint8* src_a, int stride_a, 1.280 + const uint8* src_b, int stride_b, 1.281 + int width, int height) { 1.282 + int samples = 0; 1.283 + double ssim_total = 0; 1.284 + double (*Ssim8x8)(const uint8* src_a, int stride_a, 1.285 + const uint8* src_b, int stride_b) = Ssim8x8_C; 1.286 + 1.287 + // sample point start with each 4x4 location 1.288 + int i; 1.289 + for (i = 0; i < height - 8; i += 4) { 1.290 + int j; 1.291 + for (j = 0; j < width - 8; j += 4) { 1.292 + ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b); 1.293 + samples++; 1.294 + } 1.295 + 1.296 + src_a += stride_a * 4; 1.297 + src_b += stride_b * 4; 1.298 + } 1.299 + 1.300 + ssim_total /= samples; 1.301 + return ssim_total; 1.302 +} 1.303 + 1.304 +LIBYUV_API 1.305 +double I420Ssim(const uint8* src_y_a, int stride_y_a, 1.306 + const uint8* src_u_a, int stride_u_a, 1.307 + const uint8* src_v_a, int stride_v_a, 1.308 + const uint8* src_y_b, int stride_y_b, 1.309 + const uint8* src_u_b, int stride_u_b, 1.310 + const uint8* src_v_b, int stride_v_b, 1.311 + int width, int height) { 1.312 + const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a, 1.313 + src_y_b, stride_y_b, width, height); 1.314 + const int width_uv = (width + 1) >> 1; 1.315 + const int height_uv = (height + 1) >> 1; 1.316 + const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, 1.317 + src_u_b, stride_u_b, 1.318 + width_uv, height_uv); 1.319 + const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, 1.320 + src_v_b, stride_v_b, 1.321 + width_uv, height_uv); 1.322 + return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v); 1.323 +} 1.324 + 1.325 +#ifdef __cplusplus 1.326 +} // extern "C" 1.327 +} // namespace libyuv 1.328 +#endif