Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
michael@0 | 3 | * |
michael@0 | 4 | * Use of this source code is governed by a BSD-style license |
michael@0 | 5 | * that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | * tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | * in the file PATENTS. All contributing project authors may |
michael@0 | 8 | * be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include "libyuv/compare.h" |
michael@0 | 12 | |
michael@0 | 13 | #include <float.h> |
michael@0 | 14 | #include <math.h> |
michael@0 | 15 | #ifdef _OPENMP |
michael@0 | 16 | #include <omp.h> |
michael@0 | 17 | #endif |
michael@0 | 18 | |
michael@0 | 19 | #include "libyuv/basic_types.h" |
michael@0 | 20 | #include "libyuv/cpu_id.h" |
michael@0 | 21 | #include "libyuv/row.h" |
michael@0 | 22 | |
michael@0 | 23 | #ifdef __cplusplus |
michael@0 | 24 | namespace libyuv { |
michael@0 | 25 | extern "C" { |
michael@0 | 26 | #endif |
michael@0 | 27 | |
michael@0 | 28 | // hash seed of 5381 recommended. |
michael@0 | 29 | // Internal C version of HashDjb2 with int sized count for efficiency. |
michael@0 | 30 | uint32 HashDjb2_C(const uint8* src, int count, uint32 seed); |
michael@0 | 31 | |
michael@0 | 32 | // This module is for Visual C x86 |
michael@0 | 33 | #if !defined(LIBYUV_DISABLE_X86) && \ |
michael@0 | 34 | (defined(_M_IX86) || \ |
michael@0 | 35 | (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))) |
michael@0 | 36 | #define HAS_HASHDJB2_SSE41 |
michael@0 | 37 | uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed); |
michael@0 | 38 | |
michael@0 | 39 | #if _MSC_VER >= 1700 |
michael@0 | 40 | #define HAS_HASHDJB2_AVX2 |
michael@0 | 41 | uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed); |
michael@0 | 42 | #endif |
michael@0 | 43 | |
michael@0 | 44 | #endif // HAS_HASHDJB2_SSE41 |
michael@0 | 45 | |
michael@0 | 46 | // hash seed of 5381 recommended. |
michael@0 | 47 | LIBYUV_API |
michael@0 | 48 | uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) { |
michael@0 | 49 | const int kBlockSize = 1 << 15; // 32768; |
michael@0 | 50 | int remainder; |
michael@0 | 51 | uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C; |
michael@0 | 52 | #if defined(HAS_HASHDJB2_SSE41) |
michael@0 | 53 | if (TestCpuFlag(kCpuHasSSE41)) { |
michael@0 | 54 | HashDjb2_SSE = HashDjb2_SSE41; |
michael@0 | 55 | } |
michael@0 | 56 | #endif |
michael@0 | 57 | #if defined(HAS_HASHDJB2_AVX2) |
michael@0 | 58 | if (TestCpuFlag(kCpuHasAVX2)) { |
michael@0 | 59 | HashDjb2_SSE = HashDjb2_AVX2; |
michael@0 | 60 | } |
michael@0 | 61 | #endif |
michael@0 | 62 | |
michael@0 | 63 | while (count >= (uint64)(kBlockSize)) { |
michael@0 | 64 | seed = HashDjb2_SSE(src, kBlockSize, seed); |
michael@0 | 65 | src += kBlockSize; |
michael@0 | 66 | count -= kBlockSize; |
michael@0 | 67 | } |
michael@0 | 68 | remainder = (int)(count) & ~15; |
michael@0 | 69 | if (remainder) { |
michael@0 | 70 | seed = HashDjb2_SSE(src, remainder, seed); |
michael@0 | 71 | src += remainder; |
michael@0 | 72 | count -= remainder; |
michael@0 | 73 | } |
michael@0 | 74 | remainder = (int)(count) & 15; |
michael@0 | 75 | if (remainder) { |
michael@0 | 76 | seed = HashDjb2_C(src, remainder, seed); |
michael@0 | 77 | } |
michael@0 | 78 | return seed; |
michael@0 | 79 | } |
michael@0 | 80 | |
michael@0 | 81 | uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count); |
michael@0 | 82 | #if !defined(LIBYUV_DISABLE_NEON) && \ |
michael@0 | 83 | (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) |
michael@0 | 84 | #define HAS_SUMSQUAREERROR_NEON |
michael@0 | 85 | uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count); |
michael@0 | 86 | #endif |
michael@0 | 87 | #if !defined(LIBYUV_DISABLE_X86) && \ |
michael@0 | 88 | (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) |
michael@0 | 89 | #define HAS_SUMSQUAREERROR_SSE2 |
michael@0 | 90 | uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count); |
michael@0 | 91 | #endif |
michael@0 | 92 | // Visual C 2012 required for AVX2. |
michael@0 | 93 | #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700 |
michael@0 | 94 | #define HAS_SUMSQUAREERROR_AVX2 |
michael@0 | 95 | uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count); |
michael@0 | 96 | #endif |
michael@0 | 97 | |
michael@0 | 98 | // TODO(fbarchard): Refactor into row function. |
michael@0 | 99 | LIBYUV_API |
michael@0 | 100 | uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b, |
michael@0 | 101 | int count) { |
michael@0 | 102 | // SumSquareError returns values 0 to 65535 for each squared difference. |
michael@0 | 103 | // Up to 65536 of those can be summed and remain within a uint32. |
michael@0 | 104 | // After each block of 65536 pixels, accumulate into a uint64. |
michael@0 | 105 | const int kBlockSize = 65536; |
michael@0 | 106 | int remainder = count & (kBlockSize - 1) & ~31; |
michael@0 | 107 | uint64 sse = 0; |
michael@0 | 108 | int i; |
michael@0 | 109 | uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) = |
michael@0 | 110 | SumSquareError_C; |
michael@0 | 111 | #if defined(HAS_SUMSQUAREERROR_NEON) |
michael@0 | 112 | if (TestCpuFlag(kCpuHasNEON)) { |
michael@0 | 113 | SumSquareError = SumSquareError_NEON; |
michael@0 | 114 | } |
michael@0 | 115 | #endif |
michael@0 | 116 | #if defined(HAS_SUMSQUAREERROR_SSE2) |
michael@0 | 117 | if (TestCpuFlag(kCpuHasSSE2) && |
michael@0 | 118 | IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) { |
michael@0 | 119 | // Note only used for multiples of 16 so count is not checked. |
michael@0 | 120 | SumSquareError = SumSquareError_SSE2; |
michael@0 | 121 | } |
michael@0 | 122 | #endif |
michael@0 | 123 | #if defined(HAS_SUMSQUAREERROR_AVX2) |
michael@0 | 124 | if (TestCpuFlag(kCpuHasAVX2)) { |
michael@0 | 125 | // Note only used for multiples of 32 so count is not checked. |
michael@0 | 126 | SumSquareError = SumSquareError_AVX2; |
michael@0 | 127 | } |
michael@0 | 128 | #endif |
michael@0 | 129 | #ifdef _OPENMP |
michael@0 | 130 | #pragma omp parallel for reduction(+: sse) |
michael@0 | 131 | #endif |
michael@0 | 132 | for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { |
michael@0 | 133 | sse += SumSquareError(src_a + i, src_b + i, kBlockSize); |
michael@0 | 134 | } |
michael@0 | 135 | src_a += count & ~(kBlockSize - 1); |
michael@0 | 136 | src_b += count & ~(kBlockSize - 1); |
michael@0 | 137 | if (remainder) { |
michael@0 | 138 | sse += SumSquareError(src_a, src_b, remainder); |
michael@0 | 139 | src_a += remainder; |
michael@0 | 140 | src_b += remainder; |
michael@0 | 141 | } |
michael@0 | 142 | remainder = count & 31; |
michael@0 | 143 | if (remainder) { |
michael@0 | 144 | sse += SumSquareError_C(src_a, src_b, remainder); |
michael@0 | 145 | } |
michael@0 | 146 | return sse; |
michael@0 | 147 | } |
michael@0 | 148 | |
michael@0 | 149 | LIBYUV_API |
michael@0 | 150 | uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a, |
michael@0 | 151 | const uint8* src_b, int stride_b, |
michael@0 | 152 | int width, int height) { |
michael@0 | 153 | uint64 sse = 0; |
michael@0 | 154 | int h; |
michael@0 | 155 | // Coalesce rows. |
michael@0 | 156 | if (stride_a == width && |
michael@0 | 157 | stride_b == width) { |
michael@0 | 158 | width *= height; |
michael@0 | 159 | height = 1; |
michael@0 | 160 | stride_a = stride_b = 0; |
michael@0 | 161 | } |
michael@0 | 162 | for (h = 0; h < height; ++h) { |
michael@0 | 163 | sse += ComputeSumSquareError(src_a, src_b, width); |
michael@0 | 164 | src_a += stride_a; |
michael@0 | 165 | src_b += stride_b; |
michael@0 | 166 | } |
michael@0 | 167 | return sse; |
michael@0 | 168 | } |
michael@0 | 169 | |
michael@0 | 170 | LIBYUV_API |
michael@0 | 171 | double SumSquareErrorToPsnr(uint64 sse, uint64 count) { |
michael@0 | 172 | double psnr; |
michael@0 | 173 | if (sse > 0) { |
michael@0 | 174 | double mse = (double)(count) / (double)(sse); |
michael@0 | 175 | psnr = 10.0 * log10(255.0 * 255.0 * mse); |
michael@0 | 176 | } else { |
michael@0 | 177 | psnr = kMaxPsnr; // Limit to prevent divide by 0 |
michael@0 | 178 | } |
michael@0 | 179 | |
michael@0 | 180 | if (psnr > kMaxPsnr) |
michael@0 | 181 | psnr = kMaxPsnr; |
michael@0 | 182 | |
michael@0 | 183 | return psnr; |
michael@0 | 184 | } |
michael@0 | 185 | |
michael@0 | 186 | LIBYUV_API |
michael@0 | 187 | double CalcFramePsnr(const uint8* src_a, int stride_a, |
michael@0 | 188 | const uint8* src_b, int stride_b, |
michael@0 | 189 | int width, int height) { |
michael@0 | 190 | const uint64 samples = width * height; |
michael@0 | 191 | const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a, |
michael@0 | 192 | src_b, stride_b, |
michael@0 | 193 | width, height); |
michael@0 | 194 | return SumSquareErrorToPsnr(sse, samples); |
michael@0 | 195 | } |
michael@0 | 196 | |
michael@0 | 197 | LIBYUV_API |
michael@0 | 198 | double I420Psnr(const uint8* src_y_a, int stride_y_a, |
michael@0 | 199 | const uint8* src_u_a, int stride_u_a, |
michael@0 | 200 | const uint8* src_v_a, int stride_v_a, |
michael@0 | 201 | const uint8* src_y_b, int stride_y_b, |
michael@0 | 202 | const uint8* src_u_b, int stride_u_b, |
michael@0 | 203 | const uint8* src_v_b, int stride_v_b, |
michael@0 | 204 | int width, int height) { |
michael@0 | 205 | const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a, |
michael@0 | 206 | src_y_b, stride_y_b, |
michael@0 | 207 | width, height); |
michael@0 | 208 | const int width_uv = (width + 1) >> 1; |
michael@0 | 209 | const int height_uv = (height + 1) >> 1; |
michael@0 | 210 | const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a, |
michael@0 | 211 | src_u_b, stride_u_b, |
michael@0 | 212 | width_uv, height_uv); |
michael@0 | 213 | const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a, |
michael@0 | 214 | src_v_b, stride_v_b, |
michael@0 | 215 | width_uv, height_uv); |
michael@0 | 216 | const uint64 samples = width * height + 2 * (width_uv * height_uv); |
michael@0 | 217 | const uint64 sse = sse_y + sse_u + sse_v; |
michael@0 | 218 | return SumSquareErrorToPsnr(sse, samples); |
michael@0 | 219 | } |
michael@0 | 220 | |
michael@0 | 221 | static const int64 cc1 = 26634; // (64^2*(.01*255)^2 |
michael@0 | 222 | static const int64 cc2 = 239708; // (64^2*(.03*255)^2 |
michael@0 | 223 | |
michael@0 | 224 | static double Ssim8x8_C(const uint8* src_a, int stride_a, |
michael@0 | 225 | const uint8* src_b, int stride_b) { |
michael@0 | 226 | int64 sum_a = 0; |
michael@0 | 227 | int64 sum_b = 0; |
michael@0 | 228 | int64 sum_sq_a = 0; |
michael@0 | 229 | int64 sum_sq_b = 0; |
michael@0 | 230 | int64 sum_axb = 0; |
michael@0 | 231 | |
michael@0 | 232 | int i; |
michael@0 | 233 | for (i = 0; i < 8; ++i) { |
michael@0 | 234 | int j; |
michael@0 | 235 | for (j = 0; j < 8; ++j) { |
michael@0 | 236 | sum_a += src_a[j]; |
michael@0 | 237 | sum_b += src_b[j]; |
michael@0 | 238 | sum_sq_a += src_a[j] * src_a[j]; |
michael@0 | 239 | sum_sq_b += src_b[j] * src_b[j]; |
michael@0 | 240 | sum_axb += src_a[j] * src_b[j]; |
michael@0 | 241 | } |
michael@0 | 242 | |
michael@0 | 243 | src_a += stride_a; |
michael@0 | 244 | src_b += stride_b; |
michael@0 | 245 | } |
michael@0 | 246 | |
michael@0 | 247 | { |
michael@0 | 248 | const int64 count = 64; |
michael@0 | 249 | // scale the constants by number of pixels |
michael@0 | 250 | const int64 c1 = (cc1 * count * count) >> 12; |
michael@0 | 251 | const int64 c2 = (cc2 * count * count) >> 12; |
michael@0 | 252 | |
michael@0 | 253 | const int64 sum_a_x_sum_b = sum_a * sum_b; |
michael@0 | 254 | |
michael@0 | 255 | const int64 ssim_n = (2 * sum_a_x_sum_b + c1) * |
michael@0 | 256 | (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); |
michael@0 | 257 | |
michael@0 | 258 | const int64 sum_a_sq = sum_a*sum_a; |
michael@0 | 259 | const int64 sum_b_sq = sum_b*sum_b; |
michael@0 | 260 | |
michael@0 | 261 | const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) * |
michael@0 | 262 | (count * sum_sq_a - sum_a_sq + |
michael@0 | 263 | count * sum_sq_b - sum_b_sq + c2); |
michael@0 | 264 | |
michael@0 | 265 | if (ssim_d == 0.0) { |
michael@0 | 266 | return DBL_MAX; |
michael@0 | 267 | } |
michael@0 | 268 | return ssim_n * 1.0 / ssim_d; |
michael@0 | 269 | } |
michael@0 | 270 | } |
michael@0 | 271 | |
michael@0 | 272 | // We are using a 8x8 moving window with starting location of each 8x8 window |
michael@0 | 273 | // on the 4x4 pixel grid. Such arrangement allows the windows to overlap |
michael@0 | 274 | // block boundaries to penalize blocking artifacts. |
michael@0 | 275 | LIBYUV_API |
michael@0 | 276 | double CalcFrameSsim(const uint8* src_a, int stride_a, |
michael@0 | 277 | const uint8* src_b, int stride_b, |
michael@0 | 278 | int width, int height) { |
michael@0 | 279 | int samples = 0; |
michael@0 | 280 | double ssim_total = 0; |
michael@0 | 281 | double (*Ssim8x8)(const uint8* src_a, int stride_a, |
michael@0 | 282 | const uint8* src_b, int stride_b) = Ssim8x8_C; |
michael@0 | 283 | |
michael@0 | 284 | // sample point start with each 4x4 location |
michael@0 | 285 | int i; |
michael@0 | 286 | for (i = 0; i < height - 8; i += 4) { |
michael@0 | 287 | int j; |
michael@0 | 288 | for (j = 0; j < width - 8; j += 4) { |
michael@0 | 289 | ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b); |
michael@0 | 290 | samples++; |
michael@0 | 291 | } |
michael@0 | 292 | |
michael@0 | 293 | src_a += stride_a * 4; |
michael@0 | 294 | src_b += stride_b * 4; |
michael@0 | 295 | } |
michael@0 | 296 | |
michael@0 | 297 | ssim_total /= samples; |
michael@0 | 298 | return ssim_total; |
michael@0 | 299 | } |
michael@0 | 300 | |
michael@0 | 301 | LIBYUV_API |
michael@0 | 302 | double I420Ssim(const uint8* src_y_a, int stride_y_a, |
michael@0 | 303 | const uint8* src_u_a, int stride_u_a, |
michael@0 | 304 | const uint8* src_v_a, int stride_v_a, |
michael@0 | 305 | const uint8* src_y_b, int stride_y_b, |
michael@0 | 306 | const uint8* src_u_b, int stride_u_b, |
michael@0 | 307 | const uint8* src_v_b, int stride_v_b, |
michael@0 | 308 | int width, int height) { |
michael@0 | 309 | const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a, |
michael@0 | 310 | src_y_b, stride_y_b, width, height); |
michael@0 | 311 | const int width_uv = (width + 1) >> 1; |
michael@0 | 312 | const int height_uv = (height + 1) >> 1; |
michael@0 | 313 | const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, |
michael@0 | 314 | src_u_b, stride_u_b, |
michael@0 | 315 | width_uv, height_uv); |
michael@0 | 316 | const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, |
michael@0 | 317 | src_v_b, stride_v_b, |
michael@0 | 318 | width_uv, height_uv); |
michael@0 | 319 | return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v); |
michael@0 | 320 | } |
michael@0 | 321 | |
michael@0 | 322 | #ifdef __cplusplus |
michael@0 | 323 | } // extern "C" |
michael@0 | 324 | } // namespace libyuv |
michael@0 | 325 | #endif |