media/libyuv/util/psnr.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "./psnr.h" // NOLINT
michael@0 12
michael@0 13 #include <math.h>
michael@0 14
michael@0 15 #ifdef _OPENMP
michael@0 16 #include <omp.h>
michael@0 17 #endif
michael@0 18 #ifdef _MSC_VER
michael@0 19 #include <intrin.h> // For __cpuid()
michael@0 20 #endif
michael@0 21
michael@0 22 #ifdef __cplusplus
michael@0 23 extern "C" {
michael@0 24 #endif
michael@0 25
michael@0 26 typedef unsigned int uint32; // NOLINT
michael@0 27 #ifdef _MSC_VER
michael@0 28 typedef unsigned __int64 uint64;
michael@0 29 #else // COMPILER_MSVC
michael@0 30 #if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
michael@0 31 typedef unsigned long uint64; // NOLINT
michael@0 32 #else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
michael@0 33 typedef unsigned long long uint64; // NOLINT
michael@0 34 #endif // __LP64__
michael@0 35 #endif // _MSC_VER
michael@0 36
michael@0 37 // PSNR formula: psnr = 10 * log10 (Peak Signal^2 * size / sse)
michael@0 38 double ComputePSNR(double sse, double size) {
michael@0 39 const double kMINSSE = 255.0 * 255.0 * size / pow(10., kMaxPSNR / 10.);
michael@0 40 if (sse <= kMINSSE)
michael@0 41 sse = kMINSSE; // Produces max PSNR of 128
michael@0 42 return 10.0 * log10(65025.0 * size / sse);
michael@0 43 }
michael@0 44
michael@0 45 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
michael@0 46 #define HAS_SUMSQUAREERROR_NEON
michael@0 47 static uint32 SumSquareError_NEON(const uint8* src_a,
michael@0 48 const uint8* src_b, int count) {
michael@0 49 volatile uint32 sse;
michael@0 50 asm volatile ( // NOLINT
michael@0 51 "vmov.u8 q7, #0 \n"
michael@0 52 "vmov.u8 q9, #0 \n"
michael@0 53 "vmov.u8 q8, #0 \n"
michael@0 54 "vmov.u8 q10, #0 \n"
michael@0 55
michael@0 56 "1: \n"
michael@0 57 "vld1.u8 {q0}, [%0]! \n"
michael@0 58 "vld1.u8 {q1}, [%1]! \n"
michael@0 59 "vsubl.u8 q2, d0, d2 \n"
michael@0 60 "vsubl.u8 q3, d1, d3 \n"
michael@0 61 "vmlal.s16 q7, d4, d4 \n"
michael@0 62 "vmlal.s16 q8, d6, d6 \n"
michael@0 63 "vmlal.s16 q8, d5, d5 \n"
michael@0 64 "vmlal.s16 q10, d7, d7 \n"
michael@0 65 "subs %2, %2, #16 \n"
michael@0 66 "bhi 1b \n"
michael@0 67
michael@0 68 "vadd.u32 q7, q7, q8 \n"
michael@0 69 "vadd.u32 q9, q9, q10 \n"
michael@0 70 "vadd.u32 q10, q7, q9 \n"
michael@0 71 "vpaddl.u32 q1, q10 \n"
michael@0 72 "vadd.u64 d0, d2, d3 \n"
michael@0 73 "vmov.32 %3, d0[0] \n"
michael@0 74 : "+r"(src_a),
michael@0 75 "+r"(src_b),
michael@0 76 "+r"(count),
michael@0 77 "=r"(sse)
michael@0 78 :
michael@0 79 : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10");
michael@0 80 return sse;
michael@0 81 }
michael@0 82 #elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
michael@0 83 #define HAS_SUMSQUAREERROR_SSE2
michael@0 84 __declspec(naked)
michael@0 85 static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,
michael@0 86 const uint8* /*src_b*/, int /*count*/) {
michael@0 87 __asm {
michael@0 88 mov eax, [esp + 4] // src_a
michael@0 89 mov edx, [esp + 8] // src_b
michael@0 90 mov ecx, [esp + 12] // count
michael@0 91 pxor xmm0, xmm0
michael@0 92 pxor xmm5, xmm5
michael@0 93 sub edx, eax
michael@0 94
michael@0 95 wloop:
michael@0 96 movdqu xmm1, [eax]
michael@0 97 movdqu xmm2, [eax + edx]
michael@0 98 lea eax, [eax + 16]
michael@0 99 movdqu xmm3, xmm1
michael@0 100 psubusb xmm1, xmm2
michael@0 101 psubusb xmm2, xmm3
michael@0 102 por xmm1, xmm2
michael@0 103 movdqu xmm2, xmm1
michael@0 104 punpcklbw xmm1, xmm5
michael@0 105 punpckhbw xmm2, xmm5
michael@0 106 pmaddwd xmm1, xmm1
michael@0 107 pmaddwd xmm2, xmm2
michael@0 108 paddd xmm0, xmm1
michael@0 109 paddd xmm0, xmm2
michael@0 110 sub ecx, 16
michael@0 111 ja wloop
michael@0 112
michael@0 113 pshufd xmm1, xmm0, 0EEh
michael@0 114 paddd xmm0, xmm1
michael@0 115 pshufd xmm1, xmm0, 01h
michael@0 116 paddd xmm0, xmm1
michael@0 117 movd eax, xmm0
michael@0 118 ret
michael@0 119 }
michael@0 120 }
michael@0 121 #elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
michael@0 122 #define HAS_SUMSQUAREERROR_SSE2
michael@0 123 static uint32 SumSquareError_SSE2(const uint8* src_a,
michael@0 124 const uint8* src_b, int count) {
michael@0 125 uint32 sse;
michael@0 126 asm volatile ( // NOLINT
michael@0 127 "pxor %%xmm0,%%xmm0 \n"
michael@0 128 "pxor %%xmm5,%%xmm5 \n"
michael@0 129 "sub %0,%1 \n"
michael@0 130
michael@0 131 "1: \n"
michael@0 132 "movdqu (%0),%%xmm1 \n"
michael@0 133 "movdqu (%0,%1,1),%%xmm2 \n"
michael@0 134 "lea 0x10(%0),%0 \n"
michael@0 135 "movdqu %%xmm1,%%xmm3 \n"
michael@0 136 "psubusb %%xmm2,%%xmm1 \n"
michael@0 137 "psubusb %%xmm3,%%xmm2 \n"
michael@0 138 "por %%xmm2,%%xmm1 \n"
michael@0 139 "movdqu %%xmm1,%%xmm2 \n"
michael@0 140 "punpcklbw %%xmm5,%%xmm1 \n"
michael@0 141 "punpckhbw %%xmm5,%%xmm2 \n"
michael@0 142 "pmaddwd %%xmm1,%%xmm1 \n"
michael@0 143 "pmaddwd %%xmm2,%%xmm2 \n"
michael@0 144 "paddd %%xmm1,%%xmm0 \n"
michael@0 145 "paddd %%xmm2,%%xmm0 \n"
michael@0 146 "sub $0x10,%2 \n"
michael@0 147 "ja 1b \n"
michael@0 148
michael@0 149 "pshufd $0xee,%%xmm0,%%xmm1 \n"
michael@0 150 "paddd %%xmm1,%%xmm0 \n"
michael@0 151 "pshufd $0x1,%%xmm0,%%xmm1 \n"
michael@0 152 "paddd %%xmm1,%%xmm0 \n"
michael@0 153 "movd %%xmm0,%3 \n"
michael@0 154
michael@0 155 : "+r"(src_a), // %0
michael@0 156 "+r"(src_b), // %1
michael@0 157 "+r"(count), // %2
michael@0 158 "=g"(sse) // %3
michael@0 159 :
michael@0 160 : "memory", "cc"
michael@0 161 #if defined(__SSE2__)
michael@0 162 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
michael@0 163 #endif
michael@0 164 ); // NOLINT
michael@0 165 return sse;
michael@0 166 }
michael@0 167 #endif // LIBYUV_DISABLE_X86 etc
michael@0 168
michael@0 169 #if defined(HAS_SUMSQUAREERROR_SSE2)
michael@0 170 #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
michael@0 171 static __inline void __cpuid(int cpu_info[4], int info_type) {
michael@0 172 asm volatile ( // NOLINT
michael@0 173 "mov %%ebx, %%edi \n"
michael@0 174 "cpuid \n"
michael@0 175 "xchg %%edi, %%ebx \n"
michael@0 176 : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
michael@0 177 : "a"(info_type));
michael@0 178 }
michael@0 179 #elif defined(__i386__) || defined(__x86_64__)
michael@0 180 static __inline void __cpuid(int cpu_info[4], int info_type) {
michael@0 181 asm volatile ( // NOLINT
michael@0 182 "cpuid \n"
michael@0 183 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
michael@0 184 : "a"(info_type));
michael@0 185 }
michael@0 186 #endif
michael@0 187
michael@0 188 static int CpuHasSSE2() {
michael@0 189 #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)
michael@0 190 int cpu_info[4];
michael@0 191 __cpuid(cpu_info, 1);
michael@0 192 if (cpu_info[3] & 0x04000000) {
michael@0 193 return 1;
michael@0 194 }
michael@0 195 #endif
michael@0 196 return 0;
michael@0 197 }
michael@0 198 #endif // HAS_SUMSQUAREERROR_SSE2
michael@0 199
michael@0 200 static uint32 SumSquareError_C(const uint8* src_a,
michael@0 201 const uint8* src_b, int count) {
michael@0 202 uint32 sse = 0u;
michael@0 203 for (int x = 0; x < count; ++x) {
michael@0 204 int diff = src_a[x] - src_b[x];
michael@0 205 sse += static_cast<uint32>(diff * diff);
michael@0 206 }
michael@0 207 return sse;
michael@0 208 }
michael@0 209
michael@0 210 double ComputeSumSquareError(const uint8* src_a,
michael@0 211 const uint8* src_b, int count) {
michael@0 212 uint32 (*SumSquareError)(const uint8* src_a,
michael@0 213 const uint8* src_b, int count) = SumSquareError_C;
michael@0 214 #if defined(HAS_SUMSQUAREERROR_NEON)
michael@0 215 SumSquareError = SumSquareError_NEON;
michael@0 216 #endif
michael@0 217 #if defined(HAS_SUMSQUAREERROR_SSE2)
michael@0 218 if (CpuHasSSE2()) {
michael@0 219 SumSquareError = SumSquareError_SSE2;
michael@0 220 }
michael@0 221 #endif
michael@0 222 const int kBlockSize = 1 << 15;
michael@0 223 uint64 sse = 0;
michael@0 224 #ifdef _OPENMP
michael@0 225 #pragma omp parallel for reduction(+: sse)
michael@0 226 #endif
michael@0 227 for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
michael@0 228 sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
michael@0 229 }
michael@0 230 src_a += count & ~(kBlockSize - 1);
michael@0 231 src_b += count & ~(kBlockSize - 1);
michael@0 232 int remainder = count & (kBlockSize - 1) & ~15;
michael@0 233 if (remainder) {
michael@0 234 sse += SumSquareError(src_a, src_b, remainder);
michael@0 235 src_a += remainder;
michael@0 236 src_b += remainder;
michael@0 237 }
michael@0 238 remainder = count & 15;
michael@0 239 if (remainder) {
michael@0 240 sse += SumSquareError_C(src_a, src_b, remainder);
michael@0 241 }
michael@0 242 return static_cast<double>(sse);
michael@0 243 }
michael@0 244
michael@0 245 #ifdef __cplusplus
michael@0 246 } // extern "C"
michael@0 247 #endif

mercurial