media/libyuv/util/psnr.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libyuv/util/psnr.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,247 @@
     1.4 +/*
     1.5 + *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS. All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +#include "./psnr.h"  // NOLINT
    1.15 +
    1.16 +#include <math.h>
    1.17 +
    1.18 +#ifdef _OPENMP
    1.19 +#include <omp.h>
    1.20 +#endif
    1.21 +#ifdef _MSC_VER
    1.22 +#include <intrin.h>  // For __cpuid()
    1.23 +#endif
    1.24 +
    1.25 +#ifdef __cplusplus
    1.26 +extern "C" {
    1.27 +#endif
    1.28 +
    1.29 +typedef unsigned int uint32;  // NOLINT
    1.30 +#ifdef _MSC_VER
    1.31 +typedef unsigned __int64 uint64;
    1.32 +#else  // COMPILER_MSVC
    1.33 +#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
    1.34 +typedef unsigned long uint64;  // NOLINT
    1.35 +#else  // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
    1.36 +typedef unsigned long long uint64;  // NOLINT
    1.37 +#endif  // __LP64__
    1.38 +#endif  // _MSC_VER
    1.39 +
    1.40 +// PSNR formula: psnr = 10 * log10 (Peak Signal^2 * size / sse)
    1.41 +double ComputePSNR(double sse, double size) {
    1.42 +  const double kMINSSE = 255.0 * 255.0 * size / pow(10., kMaxPSNR / 10.);
    1.43 +  if (sse <= kMINSSE)
    1.44 +    sse = kMINSSE;  // Produces max PSNR of 128
    1.45 +  return 10.0 * log10(65025.0 * size / sse);
    1.46 +}
    1.47 +
    1.48 +#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
    1.49 +#define HAS_SUMSQUAREERROR_NEON
    1.50 +static uint32 SumSquareError_NEON(const uint8* src_a,
    1.51 +                                  const uint8* src_b, int count) {
    1.52 +  volatile uint32 sse;
    1.53 +  asm volatile (  // NOLINT
    1.54 +    "vmov.u8    q7, #0                         \n"
    1.55 +    "vmov.u8    q9, #0                         \n"
    1.56 +    "vmov.u8    q8, #0                         \n"
    1.57 +    "vmov.u8    q10, #0                        \n"
    1.58 +
    1.59 +    "1:                                        \n"
    1.60 +    "vld1.u8    {q0}, [%0]!                    \n"
    1.61 +    "vld1.u8    {q1}, [%1]!                    \n"
    1.62 +    "vsubl.u8   q2, d0, d2                     \n"
    1.63 +    "vsubl.u8   q3, d1, d3                     \n"
    1.64 +    "vmlal.s16  q7, d4, d4                     \n"
    1.65 +    "vmlal.s16  q8, d6, d6                     \n"
    1.66 +    "vmlal.s16  q8, d5, d5                     \n"
    1.67 +    "vmlal.s16  q10, d7, d7                    \n"
    1.68 +    "subs       %2, %2, #16                    \n"
    1.69 +    "bhi        1b                             \n"
    1.70 +
    1.71 +    "vadd.u32   q7, q7, q8                     \n"
    1.72 +    "vadd.u32   q9, q9, q10                    \n"
    1.73 +    "vadd.u32   q10, q7, q9                    \n"
    1.74 +    "vpaddl.u32 q1, q10                        \n"
    1.75 +    "vadd.u64   d0, d2, d3                     \n"
    1.76 +    "vmov.32    %3, d0[0]                      \n"
    1.77 +    : "+r"(src_a),
    1.78 +      "+r"(src_b),
    1.79 +      "+r"(count),
    1.80 +      "=r"(sse)
    1.81 +    :
    1.82 +    : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10");
    1.83 +  return sse;
    1.84 +}
    1.85 +#elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
    1.86 +#define HAS_SUMSQUAREERROR_SSE2
    1.87 +__declspec(naked)
    1.88 +static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,
    1.89 +                                  const uint8* /*src_b*/, int /*count*/) {
    1.90 +  __asm {
    1.91 +    mov        eax, [esp + 4]    // src_a
    1.92 +    mov        edx, [esp + 8]    // src_b
    1.93 +    mov        ecx, [esp + 12]   // count
    1.94 +    pxor       xmm0, xmm0
    1.95 +    pxor       xmm5, xmm5
    1.96 +    sub        edx, eax
    1.97 +
    1.98 +  wloop:
    1.99 +    movdqu     xmm1, [eax]
   1.100 +    movdqu     xmm2, [eax + edx]
   1.101 +    lea        eax,  [eax + 16]
   1.102 +    movdqu     xmm3, xmm1
   1.103 +    psubusb    xmm1, xmm2
   1.104 +    psubusb    xmm2, xmm3
   1.105 +    por        xmm1, xmm2
   1.106 +    movdqu     xmm2, xmm1
   1.107 +    punpcklbw  xmm1, xmm5
   1.108 +    punpckhbw  xmm2, xmm5
   1.109 +    pmaddwd    xmm1, xmm1
   1.110 +    pmaddwd    xmm2, xmm2
   1.111 +    paddd      xmm0, xmm1
   1.112 +    paddd      xmm0, xmm2
   1.113 +    sub        ecx, 16
   1.114 +    ja         wloop
   1.115 +
   1.116 +    pshufd     xmm1, xmm0, 0EEh
   1.117 +    paddd      xmm0, xmm1
   1.118 +    pshufd     xmm1, xmm0, 01h
   1.119 +    paddd      xmm0, xmm1
   1.120 +    movd       eax, xmm0
   1.121 +    ret
   1.122 +  }
   1.123 +}
   1.124 +#elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
   1.125 +#define HAS_SUMSQUAREERROR_SSE2
   1.126 +static uint32 SumSquareError_SSE2(const uint8* src_a,
   1.127 +                                  const uint8* src_b, int count) {
   1.128 +  uint32 sse;
   1.129 +  asm volatile (  // NOLINT
   1.130 +    "pxor      %%xmm0,%%xmm0                   \n"
   1.131 +    "pxor      %%xmm5,%%xmm5                   \n"
   1.132 +    "sub       %0,%1                           \n"
   1.133 +
   1.134 +  "1:                                          \n"
   1.135 +    "movdqu    (%0),%%xmm1                     \n"
   1.136 +    "movdqu    (%0,%1,1),%%xmm2                \n"
   1.137 +    "lea       0x10(%0),%0                     \n"
   1.138 +    "movdqu    %%xmm1,%%xmm3                   \n"
   1.139 +    "psubusb   %%xmm2,%%xmm1                   \n"
   1.140 +    "psubusb   %%xmm3,%%xmm2                   \n"
   1.141 +    "por       %%xmm2,%%xmm1                   \n"
   1.142 +    "movdqu    %%xmm1,%%xmm2                   \n"
   1.143 +    "punpcklbw %%xmm5,%%xmm1                   \n"
   1.144 +    "punpckhbw %%xmm5,%%xmm2                   \n"
   1.145 +    "pmaddwd   %%xmm1,%%xmm1                   \n"
   1.146 +    "pmaddwd   %%xmm2,%%xmm2                   \n"
   1.147 +    "paddd     %%xmm1,%%xmm0                   \n"
   1.148 +    "paddd     %%xmm2,%%xmm0                   \n"
   1.149 +    "sub       $0x10,%2                        \n"
   1.150 +    "ja        1b                              \n"
   1.151 +
   1.152 +    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
   1.153 +    "paddd     %%xmm1,%%xmm0                   \n"
   1.154 +    "pshufd    $0x1,%%xmm0,%%xmm1              \n"
   1.155 +    "paddd     %%xmm1,%%xmm0                   \n"
   1.156 +    "movd      %%xmm0,%3                       \n"
   1.157 +
   1.158 +  : "+r"(src_a),      // %0
   1.159 +    "+r"(src_b),      // %1
   1.160 +    "+r"(count),      // %2
   1.161 +    "=g"(sse)         // %3
   1.162 +  :
   1.163 +  : "memory", "cc"
   1.164 +#if defined(__SSE2__)
   1.165 +    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   1.166 +#endif
   1.167 +  );  // NOLINT
   1.168 +  return sse;
   1.169 +}
   1.170 +#endif  // LIBYUV_DISABLE_X86 etc
   1.171 +
   1.172 +#if defined(HAS_SUMSQUAREERROR_SSE2)
   1.173 +#if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
   1.174 +static __inline void __cpuid(int cpu_info[4], int info_type) {
   1.175 +  asm volatile (  // NOLINT
   1.176 +    "mov %%ebx, %%edi                          \n"
   1.177 +    "cpuid                                     \n"
   1.178 +    "xchg %%edi, %%ebx                         \n"
   1.179 +    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
   1.180 +    : "a"(info_type));
   1.181 +}
   1.182 +#elif defined(__i386__) || defined(__x86_64__)
   1.183 +static __inline void __cpuid(int cpu_info[4], int info_type) {
   1.184 +  asm volatile (  // NOLINT
   1.185 +    "cpuid                                     \n"
   1.186 +    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
   1.187 +    : "a"(info_type));
   1.188 +}
   1.189 +#endif
   1.190 +
   1.191 +static int CpuHasSSE2() {
   1.192 +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)
   1.193 +  int cpu_info[4];
   1.194 +  __cpuid(cpu_info, 1);
   1.195 +  if (cpu_info[3] & 0x04000000) {
   1.196 +    return 1;
   1.197 +  }
   1.198 +#endif
   1.199 +  return 0;
   1.200 +}
   1.201 +#endif  // HAS_SUMSQUAREERROR_SSE2
   1.202 +
   1.203 +static uint32 SumSquareError_C(const uint8* src_a,
   1.204 +                               const uint8* src_b, int count) {
   1.205 +  uint32 sse = 0u;
   1.206 +  for (int x = 0; x < count; ++x) {
   1.207 +    int diff = src_a[x] - src_b[x];
   1.208 +    sse += static_cast<uint32>(diff * diff);
   1.209 +  }
   1.210 +  return sse;
   1.211 +}
   1.212 +
   1.213 +double ComputeSumSquareError(const uint8* src_a,
   1.214 +                             const uint8* src_b, int count) {
   1.215 +  uint32 (*SumSquareError)(const uint8* src_a,
   1.216 +                           const uint8* src_b, int count) = SumSquareError_C;
   1.217 +#if defined(HAS_SUMSQUAREERROR_NEON)
   1.218 +  SumSquareError = SumSquareError_NEON;
   1.219 +#endif
   1.220 +#if defined(HAS_SUMSQUAREERROR_SSE2)
   1.221 +  if (CpuHasSSE2()) {
   1.222 +    SumSquareError = SumSquareError_SSE2;
   1.223 +  }
   1.224 +#endif
   1.225 +  const int kBlockSize = 1 << 15;
   1.226 +  uint64 sse = 0;
   1.227 +#ifdef _OPENMP
   1.228 +#pragma omp parallel for reduction(+: sse)
   1.229 +#endif
   1.230 +  for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
   1.231 +    sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
   1.232 +  }
   1.233 +  src_a += count & ~(kBlockSize - 1);
   1.234 +  src_b += count & ~(kBlockSize - 1);
   1.235 +  int remainder = count & (kBlockSize - 1) & ~15;
   1.236 +  if (remainder) {
   1.237 +    sse += SumSquareError(src_a, src_b, remainder);
   1.238 +    src_a += remainder;
   1.239 +    src_b += remainder;
   1.240 +  }
   1.241 +  remainder = count & 15;
   1.242 +  if (remainder) {
   1.243 +    sse += SumSquareError_C(src_a, src_b, remainder);
   1.244 +  }
   1.245 +  return static_cast<double>(sse);
   1.246 +}
   1.247 +
   1.248 +#ifdef __cplusplus
   1.249 +}  // extern "C"
   1.250 +#endif

mercurial