The Tor Browser: media/libyuv/util/psnr.cc@b8a032363ba2

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*

     2  *  Copyright 2013 The LibYuv Project Authors. All rights reserved.

     3  *

     4  *  Use of this source code is governed by a BSD-style license

     5  *  that can be found in the LICENSE file in the root of the source

     6  *  tree. An additional intellectual property rights grant can be found

     7  *  in the file PATENTS. All contributing project authors may

     8  *  be found in the AUTHORS file in the root of the source tree.

     9  */

    11 #include "./psnr.h"  // NOLINT

    13 #include <math.h>

    15 #ifdef _OPENMP

    16 #include <omp.h>

    17 #endif

    18 #ifdef _MSC_VER

    19 #include <intrin.h>  // For __cpuid()

    20 #endif

    22 #ifdef __cplusplus

    23 extern "C" {

    24 #endif

    26 typedef unsigned int uint32;  // NOLINT

    27 #ifdef _MSC_VER

    28 typedef unsigned __int64 uint64;

    29 #else  // COMPILER_MSVC

    30 #if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)

    31 typedef unsigned long uint64;  // NOLINT

    32 #else  // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)

    33 typedef unsigned long long uint64;  // NOLINT

    34 #endif  // __LP64__

    35 #endif  // _MSC_VER

    37 // PSNR formula: psnr = 10 * log10 (Peak Signal^2 * size / sse)

    38 double ComputePSNR(double sse, double size) {

    39   const double kMINSSE = 255.0 * 255.0 * size / pow(10., kMaxPSNR / 10.);

    40   if (sse <= kMINSSE)

    41     sse = kMINSSE;  // Produces max PSNR of 128

    42   return 10.0 * log10(65025.0 * size / sse);

    43 }

    45 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)

    46 #define HAS_SUMSQUAREERROR_NEON

    47 static uint32 SumSquareError_NEON(const uint8* src_a,

    48                                   const uint8* src_b, int count) {

    49   volatile uint32 sse;

    50   asm volatile (  // NOLINT

    51     "vmov.u8    q7, #0                         \n"

    52     "vmov.u8    q9, #0                         \n"

    53     "vmov.u8    q8, #0                         \n"

    54     "vmov.u8    q10, #0                        \n"

    56     "1:                                        \n"

    57     "vld1.u8    {q0}, [%0]!                    \n"

    58     "vld1.u8    {q1}, [%1]!                    \n"

    59     "vsubl.u8   q2, d0, d2                     \n"

    60     "vsubl.u8   q3, d1, d3                     \n"

    61     "vmlal.s16  q7, d4, d4                     \n"

    62     "vmlal.s16  q8, d6, d6                     \n"

    63     "vmlal.s16  q8, d5, d5                     \n"

    64     "vmlal.s16  q10, d7, d7                    \n"

    65     "subs       %2, %2, #16                    \n"

    66     "bhi        1b                             \n"

    68     "vadd.u32   q7, q7, q8                     \n"

    69     "vadd.u32   q9, q9, q10                    \n"

    70     "vadd.u32   q10, q7, q9                    \n"

    71     "vpaddl.u32 q1, q10                        \n"

    72     "vadd.u64   d0, d2, d3                     \n"

    73     "vmov.32    %3, d0[0]                      \n"

    74     : "+r"(src_a),

    75       "+r"(src_b),

    76       "+r"(count),

    77       "=r"(sse)

    78     :

    79     : "memory", "cc", "q0", "q1", "q2", "q3", "q7", "q8", "q9", "q10");

    80   return sse;

    81 }

    82 #elif !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)

    83 #define HAS_SUMSQUAREERROR_SSE2

    84 __declspec(naked)

    85 static uint32 SumSquareError_SSE2(const uint8* /*src_a*/,

    86                                   const uint8* /*src_b*/, int /*count*/) {

    87   __asm {

    88     mov        eax, [esp + 4]    // src_a

    89     mov        edx, [esp + 8]    // src_b

    90     mov        ecx, [esp + 12]   // count

    91     pxor       xmm0, xmm0

    92     pxor       xmm5, xmm5

    93     sub        edx, eax

    95   wloop:

    96     movdqu     xmm1, [eax]

    97     movdqu     xmm2, [eax + edx]

    98     lea        eax,  [eax + 16]

    99     movdqu     xmm3, xmm1

   100     psubusb    xmm1, xmm2

   101     psubusb    xmm2, xmm3

   102     por        xmm1, xmm2

   103     movdqu     xmm2, xmm1

   104     punpcklbw  xmm1, xmm5

   105     punpckhbw  xmm2, xmm5

   106     pmaddwd    xmm1, xmm1

   107     pmaddwd    xmm2, xmm2

   108     paddd      xmm0, xmm1

   109     paddd      xmm0, xmm2

   110     sub        ecx, 16

   111     ja         wloop

   113     pshufd     xmm1, xmm0, 0EEh

   114     paddd      xmm0, xmm1

   115     pshufd     xmm1, xmm0, 01h

   116     paddd      xmm0, xmm1

   117     movd       eax, xmm0

   118     ret

   119   }

   120 }

   121 #elif !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))

   122 #define HAS_SUMSQUAREERROR_SSE2

   123 static uint32 SumSquareError_SSE2(const uint8* src_a,

   124                                   const uint8* src_b, int count) {

   125   uint32 sse;

   126   asm volatile (  // NOLINT

   127     "pxor      %%xmm0,%%xmm0                   \n"

   128     "pxor      %%xmm5,%%xmm5                   \n"

   129     "sub       %0,%1                           \n"

   131   "1:                                          \n"

   132     "movdqu    (%0),%%xmm1                     \n"

   133     "movdqu    (%0,%1,1),%%xmm2                \n"

   134     "lea       0x10(%0),%0                     \n"

   135     "movdqu    %%xmm1,%%xmm3                   \n"

   136     "psubusb   %%xmm2,%%xmm1                   \n"

   137     "psubusb   %%xmm3,%%xmm2                   \n"

   138     "por       %%xmm2,%%xmm1                   \n"

   139     "movdqu    %%xmm1,%%xmm2                   \n"

   140     "punpcklbw %%xmm5,%%xmm1                   \n"

   141     "punpckhbw %%xmm5,%%xmm2                   \n"

   142     "pmaddwd   %%xmm1,%%xmm1                   \n"

   143     "pmaddwd   %%xmm2,%%xmm2                   \n"

   144     "paddd     %%xmm1,%%xmm0                   \n"

   145     "paddd     %%xmm2,%%xmm0                   \n"

   146     "sub       $0x10,%2                        \n"

   147     "ja        1b                              \n"

   149     "pshufd    $0xee,%%xmm0,%%xmm1             \n"

   150     "paddd     %%xmm1,%%xmm0                   \n"

   151     "pshufd    $0x1,%%xmm0,%%xmm1              \n"

   152     "paddd     %%xmm1,%%xmm0                   \n"

   153     "movd      %%xmm0,%3                       \n"

   155   : "+r"(src_a),      // %0

   156     "+r"(src_b),      // %1

   157     "+r"(count),      // %2

   158     "=g"(sse)         // %3

   159   :

   160   : "memory", "cc"

   161 #if defined(__SSE2__)

   162     , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"

   163 #endif

   164   );  // NOLINT

   165   return sse;

   166 }

   167 #endif  // LIBYUV_DISABLE_X86 etc

   169 #if defined(HAS_SUMSQUAREERROR_SSE2)

   170 #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__)

   171 static __inline void __cpuid(int cpu_info[4], int info_type) {

   172   asm volatile (  // NOLINT

   173     "mov %%ebx, %%edi                          \n"

   174     "cpuid                                     \n"

   175     "xchg %%edi, %%ebx                         \n"

   176     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])

   177     : "a"(info_type));

   178 }

   179 #elif defined(__i386__) || defined(__x86_64__)

   180 static __inline void __cpuid(int cpu_info[4], int info_type) {

   181   asm volatile (  // NOLINT

   182     "cpuid                                     \n"

   183     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])

   184     : "a"(info_type));

   185 }

   186 #endif

   188 static int CpuHasSSE2() {

   189 #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)

   190   int cpu_info[4];

   191   __cpuid(cpu_info, 1);

   192   if (cpu_info[3] & 0x04000000) {

   193     return 1;

   194   }

   195 #endif

   196   return 0;

   197 }

   198 #endif  // HAS_SUMSQUAREERROR_SSE2

   200 static uint32 SumSquareError_C(const uint8* src_a,

   201                                const uint8* src_b, int count) {

   202   uint32 sse = 0u;

   203   for (int x = 0; x < count; ++x) {

   204     int diff = src_a[x] - src_b[x];

   205     sse += static_cast<uint32>(diff * diff);

   206   }

   207   return sse;

   208 }

   210 double ComputeSumSquareError(const uint8* src_a,

   211                              const uint8* src_b, int count) {

   212   uint32 (*SumSquareError)(const uint8* src_a,

   213                            const uint8* src_b, int count) = SumSquareError_C;

   214 #if defined(HAS_SUMSQUAREERROR_NEON)

   215   SumSquareError = SumSquareError_NEON;

   216 #endif

   217 #if defined(HAS_SUMSQUAREERROR_SSE2)

   218   if (CpuHasSSE2()) {

   219     SumSquareError = SumSquareError_SSE2;

   220   }

   221 #endif

   222   const int kBlockSize = 1 << 15;

   223   uint64 sse = 0;

   224 #ifdef _OPENMP

   225 #pragma omp parallel for reduction(+: sse)

   226 #endif

   227   for (int i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {

   228     sse += SumSquareError(src_a + i, src_b + i, kBlockSize);

   229   }

   230   src_a += count & ~(kBlockSize - 1);

   231   src_b += count & ~(kBlockSize - 1);

   232   int remainder = count & (kBlockSize - 1) & ~15;

   233   if (remainder) {

   234     sse += SumSquareError(src_a, src_b, remainder);

   235     src_a += remainder;

   236     src_b += remainder;

   237   }

   238   remainder = count & 15;

   239   if (remainder) {

   240     sse += SumSquareError_C(src_a, src_b, remainder);

   241   }

   242   return static_cast<double>(sse);

   243 }

   245 #ifdef __cplusplus

   246 }  // extern "C"

   247 #endif

The Tor Browser / file revision

media/libyuv/util/psnr.cc@b8a032363ba2

media/libyuv/util/psnr.cc