media/libyuv/util/psnr.cc

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /*
     2  *  Copyright 2013 The LibYuv Project Authors. All rights reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS. All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include "./psnr.h"  // NOLINT
    13 #include <math.h>
    15 #ifdef _OPENMP
    16 #include <omp.h>
    17 #endif
    18 #ifdef _MSC_VER
    19 #include <intrin.h>  // For __cpuid()
    20 #endif
    22 #ifdef __cplusplus
    23 extern "C" {
    24 #endif
    26 typedef unsigned int uint32;  // NOLINT
    27 #ifdef _MSC_VER
    28 typedef unsigned __int64 uint64;
    29 #else  // COMPILER_MSVC
    30 #if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
    31 typedef unsigned long uint64;  // NOLINT
    32 #else  // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
    33 typedef unsigned long long uint64;  // NOLINT
    34 #endif  // __LP64__
    35 #endif  // _MSC_VER
    37 // PSNR formula: psnr = 10 * log10 (Peak Signal^2 * size / sse)
    38 double ComputePSNR(double sse, double size) {
    39   const double kMINSSE = 255.0 * 255.0 * size / pow(10., kMaxPSNR / 10.);
    40   if (sse <= kMINSSE)
    41     sse = kMINSSE;  // Produces max PSNR of 128
    42   return 10.0 * log10(65025.0 * size / sse);
    43 }
    45 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
    46 #define HAS_SUMSQUAREERROR_NEON
    47 static uint32 SumSquareError_NEON(const uint8* src_a,
    48                                   const uint8* src_b, int count) {
    49   volatile uint32 sse;
    50   asm volatile (  // NOLINT
    51     "vmov.u8    q7, #0                         \n"
    52     "vmov.u8    q9, #0                         \n"
    53     "vmov.u8    q8, #0                         \n"
    54     "vmov.u8    q10, #0                        \n"
    56     "1:                                        \n"
    57     "vld1.u8    {q0}, [%0]!                    \n"
    58     "vld1.u8    {q1}, [%1]!                    \n"
    59     "vsubl.u8   q2, d0, d2                     \n"
    60     "vsubl.u8   q3, d1, d3                     \n"
    61     "vmlal.s16  q7, d4, d4                     \n"
    62     "vmlal.s16  q8, d6, d6                     \n"
    63     "vmlal.s16  q8, d5, d5                     \n"
    64     "vmlal.s16  q10, d7, d7                    \n"
    65     "subs       %2, %2, #16                    \n"
    66     "bhi        1b                             \n"
    68     "vadd.u32   q7, q7, q8                     \n"
    69     "vadd.u32   q9, q9, q10                    \n"
    70     "vadd.u32   q10, q7, q9                    \n"
    71     "vpaddl.u32 q1, q10                        \n"
    72     "vadd.u64   d0, d2, d3                     \n"
    73     "vmov.32    %3, d0[0]                      \n"
    74     : "+r"(src_a),
    75       "+r"(src_b),
    76       "+r"(count),
    77       "=r"(sse)
    78     :
    79     : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10");
    80   return sse;
    81 }
    82 #elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
    83 #define HAS_SUMSQUAREERROR_SSE2
    84 __declspec(naked)
    85 static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,
    86                                   const uint8* /*src_b*/, int /*count*/) {
    87   __asm {
    88     mov        eax, [esp + 4]    // src_a
    89     mov        edx, [esp + 8]    // src_b
    90     mov        ecx, [esp + 12]   // count
    91     pxor       xmm0, xmm0
    92     pxor       xmm5, xmm5
    93     sub        edx, eax
    95   wloop:
    96     movdqu     xmm1, [eax]
    97     movdqu     xmm2, [eax + edx]
    98     lea        eax,  [eax + 16]
    99     movdqu     xmm3, xmm1
   100     psubusb    xmm1, xmm2
   101     psubusb    xmm2, xmm3
   102     por        xmm1, xmm2
   103     movdqu     xmm2, xmm1
   104     punpcklbw  xmm1, xmm5
   105     punpckhbw  xmm2, xmm5
   106     pmaddwd    xmm1, xmm1
   107     pmaddwd    xmm2, xmm2
   108     paddd      xmm0, xmm1
   109     paddd      xmm0, xmm2
   110     sub        ecx, 16
   111     ja         wloop
   113     pshufd     xmm1, xmm0, 0EEh
   114     paddd      xmm0, xmm1
   115     pshufd     xmm1, xmm0, 01h
   116     paddd      xmm0, xmm1
   117     movd       eax, xmm0
   118     ret
   119   }
   120 }
   121 #elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
   122 #define HAS_SUMSQUAREERROR_SSE2
   123 static uint32 SumSquareError_SSE2(const uint8* src_a,
   124                                   const uint8* src_b, int count) {
   125   uint32 sse;
   126   asm volatile (  // NOLINT
   127     "pxor      %%xmm0,%%xmm0                   \n"
   128     "pxor      %%xmm5,%%xmm5                   \n"
   129     "sub       %0,%1                           \n"
   131   "1:                                          \n"
   132     "movdqu    (%0),%%xmm1                     \n"
   133     "movdqu    (%0,%1,1),%%xmm2                \n"
   134     "lea       0x10(%0),%0                     \n"
   135     "movdqu    %%xmm1,%%xmm3                   \n"
   136     "psubusb   %%xmm2,%%xmm1                   \n"
   137     "psubusb   %%xmm3,%%xmm2                   \n"
   138     "por       %%xmm2,%%xmm1                   \n"
   139     "movdqu    %%xmm1,%%xmm2                   \n"
   140     "punpcklbw %%xmm5,%%xmm1                   \n"
   141     "punpckhbw %%xmm5,%%xmm2                   \n"
   142     "pmaddwd   %%xmm1,%%xmm1                   \n"
   143     "pmaddwd   %%xmm2,%%xmm2                   \n"
   144     "paddd     %%xmm1,%%xmm0                   \n"
   145     "paddd     %%xmm2,%%xmm0                   \n"
   146     "sub       $0x10,%2                        \n"
   147     "ja        1b                              \n"
   149     "pshufd    $0xee,%%xmm0,%%xmm1             \n"
   150     "paddd     %%xmm1,%%xmm0                   \n"
   151     "pshufd    $0x1,%%xmm0,%%xmm1              \n"
   152     "paddd     %%xmm1,%%xmm0                   \n"
   153     "movd      %%xmm0,%3                       \n"
   155   : "+r"(src_a),      // %0
   156     "+r"(src_b),      // %1
   157     "+r"(count),      // %2
   158     "=g"(sse)         // %3
   159   :
   160   : "memory", "cc"
   161 #if defined(__SSE2__)
   162     , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
   163 #endif
   164   );  // NOLINT
   165   return sse;
   166 }
   167 #endif  // LIBYUV_DISABLE_X86 etc
   169 #if defined(HAS_SUMSQUAREERROR_SSE2)
   170 #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)
   171 static __inline void __cpuid(int cpu_info[4], int info_type) {
   172   asm volatile (  // NOLINT
   173     "mov %%ebx, %%edi                          \n"
   174     "cpuid                                     \n"
   175     "xchg %%edi, %%ebx                         \n"
   176     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
   177     : "a"(info_type));
   178 }
   179 #elif defined(__i386__) || defined(__x86_64__)
   180 static __inline void __cpuid(int cpu_info[4], int info_type) {
   181   asm volatile (  // NOLINT
   182     "cpuid                                     \n"
   183     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
   184     : "a"(info_type));
   185 }
   186 #endif
   188 static int CpuHasSSE2() {
   189 #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)
   190   int cpu_info[4];
   191   __cpuid(cpu_info, 1);
   192   if (cpu_info[3] & 0x04000000) {
   193     return 1;
   194   }
   195 #endif
   196   return 0;
   197 }
   198 #endif  // HAS_SUMSQUAREERROR_SSE2
   200 static uint32 SumSquareError_C(const uint8* src_a,
   201                                const uint8* src_b, int count) {
   202   uint32 sse = 0u;
   203   for (int x = 0; x < count; ++x) {
   204     int diff = src_a[x] - src_b[x];
   205     sse += static_cast<uint32>(diff * diff);
   206   }
   207   return sse;
   208 }
   210 double ComputeSumSquareError(const uint8* src_a,
   211                              const uint8* src_b, int count) {
   212   uint32 (*SumSquareError)(const uint8* src_a,
   213                            const uint8* src_b, int count) = SumSquareError_C;
   214 #if defined(HAS_SUMSQUAREERROR_NEON)
   215   SumSquareError = SumSquareError_NEON;
   216 #endif
   217 #if defined(HAS_SUMSQUAREERROR_SSE2)
   218   if (CpuHasSSE2()) {
   219     SumSquareError = SumSquareError_SSE2;
   220   }
   221 #endif
   222   const int kBlockSize = 1 << 15;
   223   uint64 sse = 0;
   224 #ifdef _OPENMP
   225 #pragma omp parallel for reduction(+: sse)
   226 #endif
   227   for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
   228     sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
   229   }
   230   src_a += count & ~(kBlockSize - 1);
   231   src_b += count & ~(kBlockSize - 1);
   232   int remainder = count & (kBlockSize - 1) & ~15;
   233   if (remainder) {
   234     sse += SumSquareError(src_a, src_b, remainder);
   235     src_a += remainder;
   236     src_b += remainder;
   237   }
   238   remainder = count & 15;
   239   if (remainder) {
   240     sse += SumSquareError_C(src_a, src_b, remainder);
   241   }
   242   return static_cast<double>(sse);
   243 }
   245 #ifdef __cplusplus
   246 }  // extern "C"
   247 #endif

mercurial