media/libyuv/source/compare_posix.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /*
     2  *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
     3  *
     4  *  Use of this source code is governed by a BSD-style license
     5  *  that can be found in the LICENSE file in the root of the source
     6  *  tree. An additional intellectual property rights grant can be found
     7  *  in the file PATENTS. All contributing project authors may
     8  *  be found in the AUTHORS file in the root of the source tree.
     9  */
    11 #include "libyuv/basic_types.h"
    12 #include "libyuv/row.h"
    14 #ifdef __cplusplus
    15 namespace libyuv {
    16 extern "C" {
    17 #endif
    19 #if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
    21 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
    22   uint32 sse;
    23   asm volatile (  // NOLINT
    24     "pxor      %%xmm0,%%xmm0                   \n"
    25     "pxor      %%xmm5,%%xmm5                   \n"
    26     LABELALIGN
    27   "1:                                          \n"
    28     "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
    29     "lea       " MEMLEA(0x10, 0) ",%0          \n"
    30     "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
    31     "lea       " MEMLEA(0x10, 1) ",%1          \n"
    32     "sub       $0x10,%2                        \n"
    33     "movdqa    %%xmm1,%%xmm3                   \n"
    34     "psubusb   %%xmm2,%%xmm1                   \n"
    35     "psubusb   %%xmm3,%%xmm2                   \n"
    36     "por       %%xmm2,%%xmm1                   \n"
    37     "movdqa    %%xmm1,%%xmm2                   \n"
    38     "punpcklbw %%xmm5,%%xmm1                   \n"
    39     "punpckhbw %%xmm5,%%xmm2                   \n"
    40     "pmaddwd   %%xmm1,%%xmm1                   \n"
    41     "pmaddwd   %%xmm2,%%xmm2                   \n"
    42     "paddd     %%xmm1,%%xmm0                   \n"
    43     "paddd     %%xmm2,%%xmm0                   \n"
    44     "jg        1b                              \n"
    46     "pshufd    $0xee,%%xmm0,%%xmm1             \n"
    47     "paddd     %%xmm1,%%xmm0                   \n"
    48     "pshufd    $0x1,%%xmm0,%%xmm1              \n"
    49     "paddd     %%xmm1,%%xmm0                   \n"
    50     "movd      %%xmm0,%3                       \n"
    52   : "+r"(src_a),      // %0
    53     "+r"(src_b),      // %1
    54     "+r"(count),      // %2
    55     "=g"(sse)         // %3
    56   :
    57   : "memory", "cc"
    58 #if defined(__SSE2__)
    59     , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
    60 #endif
    61   );  // NOLINT
    62   return sse;
    63 }
    65 #endif  // defined(__x86_64__) || defined(__i386__)
    67 #if !defined(LIBYUV_DISABLE_X86) && \
    68     (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
    69 #define HAS_HASHDJB2_SSE41
    70 static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
    71 static uvec32 kHashMul0 = {
    72   0x0c3525e1,  // 33 ^ 15
    73   0xa3476dc1,  // 33 ^ 14
    74   0x3b4039a1,  // 33 ^ 13
    75   0x4f5f0981,  // 33 ^ 12
    76 };
    77 static uvec32 kHashMul1 = {
    78   0x30f35d61,  // 33 ^ 11
    79   0x855cb541,  // 33 ^ 10
    80   0x040a9121,  // 33 ^ 9
    81   0x747c7101,  // 33 ^ 8
    82 };
    83 static uvec32 kHashMul2 = {
    84   0xec41d4e1,  // 33 ^ 7
    85   0x4cfa3cc1,  // 33 ^ 6
    86   0x025528a1,  // 33 ^ 5
    87   0x00121881,  // 33 ^ 4
    88 };
    89 static uvec32 kHashMul3 = {
    90   0x00008c61,  // 33 ^ 3
    91   0x00000441,  // 33 ^ 2
    92   0x00000021,  // 33 ^ 1
    93   0x00000001,  // 33 ^ 0
    94 };
    96 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
    97   uint32 hash;
    98   asm volatile (  // NOLINT
    99     "movd      %2,%%xmm0                       \n"
   100     "pxor      %%xmm7,%%xmm7                   \n"
   101     "movdqa    %4,%%xmm6                       \n"
   102     LABELALIGN
   103   "1:                                          \n"
   104     "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
   105     "lea       " MEMLEA(0x10, 0) ",%0          \n"
   106     "pmulld    %%xmm6,%%xmm0                   \n"
   107     "movdqa    %5,%%xmm5                       \n"
   108     "movdqa    %%xmm1,%%xmm2                   \n"
   109     "punpcklbw %%xmm7,%%xmm2                   \n"
   110     "movdqa    %%xmm2,%%xmm3                   \n"
   111     "punpcklwd %%xmm7,%%xmm3                   \n"
   112     "pmulld    %%xmm5,%%xmm3                   \n"
   113     "movdqa    %6,%%xmm5                       \n"
   114     "movdqa    %%xmm2,%%xmm4                   \n"
   115     "punpckhwd %%xmm7,%%xmm4                   \n"
   116     "pmulld    %%xmm5,%%xmm4                   \n"
   117     "movdqa    %7,%%xmm5                       \n"
   118     "punpckhbw %%xmm7,%%xmm1                   \n"
   119     "movdqa    %%xmm1,%%xmm2                   \n"
   120     "punpcklwd %%xmm7,%%xmm2                   \n"
   121     "pmulld    %%xmm5,%%xmm2                   \n"
   122     "movdqa    %8,%%xmm5                       \n"
   123     "punpckhwd %%xmm7,%%xmm1                   \n"
   124     "pmulld    %%xmm5,%%xmm1                   \n"
   125     "paddd     %%xmm4,%%xmm3                   \n"
   126     "paddd     %%xmm2,%%xmm1                   \n"
   127     "sub       $0x10,%1                        \n"
   128     "paddd     %%xmm3,%%xmm1                   \n"
   129     "pshufd    $0xe,%%xmm1,%%xmm2              \n"
   130     "paddd     %%xmm2,%%xmm1                   \n"
   131     "pshufd    $0x1,%%xmm1,%%xmm2              \n"
   132     "paddd     %%xmm2,%%xmm1                   \n"
   133     "paddd     %%xmm1,%%xmm0                   \n"
   134     "jg        1b                              \n"
   135     "movd      %%xmm0,%3                       \n"
   136   : "+r"(src),        // %0
   137     "+r"(count),      // %1
   138     "+rm"(seed),      // %2
   139     "=g"(hash)        // %3
   140   : "m"(kHash16x33),  // %4
   141     "m"(kHashMul0),   // %5
   142     "m"(kHashMul1),   // %6
   143     "m"(kHashMul2),   // %7
   144     "m"(kHashMul3)    // %8
   145   : "memory", "cc"
   146 #if defined(__SSE2__)
   147     , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
   148 #endif
   149   );  // NOLINT
   150   return hash;
   151 }
   152 #endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
   154 #ifdef __cplusplus
   155 }  // extern "C"
   156 }  // namespace libyuv
   157 #endif

mercurial