media/libyuv/source/compare_posix.cc

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/media/libyuv/source/compare_posix.cc	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,158 @@
     1.4 +/*
     1.5 + *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
     1.6 + *
     1.7 + *  Use of this source code is governed by a BSD-style license
     1.8 + *  that can be found in the LICENSE file in the root of the source
     1.9 + *  tree. An additional intellectual property rights grant can be found
    1.10 + *  in the file PATENTS. All contributing project authors may
    1.11 + *  be found in the AUTHORS file in the root of the source tree.
    1.12 + */
    1.13 +
    1.14 +#include "libyuv/basic_types.h"
    1.15 +#include "libyuv/row.h"
    1.16 +
    1.17 +#ifdef __cplusplus
    1.18 +namespace libyuv {
    1.19 +extern "C" {
    1.20 +#endif
    1.21 +
    1.22 +#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
    1.23 +
    1.24 +uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
    1.25 +  uint32 sse;
    1.26 +  asm volatile (  // NOLINT
    1.27 +    "pxor      %%xmm0,%%xmm0                   \n"
    1.28 +    "pxor      %%xmm5,%%xmm5                   \n"
    1.29 +    LABELALIGN
    1.30 +  "1:                                          \n"
    1.31 +    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
    1.32 +    "lea       " MEMLEA(0x10, 0) ",%0          \n"
    1.33 +    "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
    1.34 +    "lea       " MEMLEA(0x10, 1) ",%1          \n"
    1.35 +    "sub       $0x10,%2                        \n"
    1.36 +    "movdqa    %%xmm1,%%xmm3                   \n"
    1.37 +    "psubusb   %%xmm2,%%xmm1                   \n"
    1.38 +    "psubusb   %%xmm3,%%xmm2                   \n"
    1.39 +    "por       %%xmm2,%%xmm1                   \n"
    1.40 +    "movdqa    %%xmm1,%%xmm2                   \n"
    1.41 +    "punpcklbw %%xmm5,%%xmm1                   \n"
    1.42 +    "punpckhbw %%xmm5,%%xmm2                   \n"
    1.43 +    "pmaddwd   %%xmm1,%%xmm1                   \n"
    1.44 +    "pmaddwd   %%xmm2,%%xmm2                   \n"
    1.45 +    "paddd     %%xmm1,%%xmm0                   \n"
    1.46 +    "paddd     %%xmm2,%%xmm0                   \n"
    1.47 +    "jg        1b                              \n"
    1.48 +
    1.49 +    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
    1.50 +    "paddd     %%xmm1,%%xmm0                   \n"
    1.51 +    "pshufd    $0x1,%%xmm0,%%xmm1              \n"
    1.52 +    "paddd     %%xmm1,%%xmm0                   \n"
    1.53 +    "movd      %%xmm0,%3                       \n"
    1.54 +
    1.55 +  : "+r"(src_a),      // %0
    1.56 +    "+r"(src_b),      // %1
    1.57 +    "+r"(count),      // %2
    1.58 +    "=g"(sse)         // %3
    1.59 +  :
    1.60 +  : "memory", "cc"
    1.61 +#if defined(__SSE2__)
    1.62 +    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
    1.63 +#endif
    1.64 +  );  // NOLINT
    1.65 +  return sse;
    1.66 +}
    1.67 +
    1.68 +#endif  // defined(__x86_64__) || defined(__i386__)
    1.69 +
    1.70 +#if !defined(LIBYUV_DISABLE_X86) && \
    1.71 +    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
    1.72 +#define HAS_HASHDJB2_SSE41
    1.73 +static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
    1.74 +static uvec32 kHashMul0 = {
    1.75 +  0x0c3525e1,  // 33 ^ 15
    1.76 +  0xa3476dc1,  // 33 ^ 14
    1.77 +  0x3b4039a1,  // 33 ^ 13
    1.78 +  0x4f5f0981,  // 33 ^ 12
    1.79 +};
    1.80 +static uvec32 kHashMul1 = {
    1.81 +  0x30f35d61,  // 33 ^ 11
    1.82 +  0x855cb541,  // 33 ^ 10
    1.83 +  0x040a9121,  // 33 ^ 9
    1.84 +  0x747c7101,  // 33 ^ 8
    1.85 +};
    1.86 +static uvec32 kHashMul2 = {
    1.87 +  0xec41d4e1,  // 33 ^ 7
    1.88 +  0x4cfa3cc1,  // 33 ^ 6
    1.89 +  0x025528a1,  // 33 ^ 5
    1.90 +  0x00121881,  // 33 ^ 4
    1.91 +};
    1.92 +static uvec32 kHashMul3 = {
    1.93 +  0x00008c61,  // 33 ^ 3
    1.94 +  0x00000441,  // 33 ^ 2
    1.95 +  0x00000021,  // 33 ^ 1
    1.96 +  0x00000001,  // 33 ^ 0
    1.97 +};
    1.98 +
    1.99 +uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
   1.100 +  uint32 hash;
   1.101 +  asm volatile (  // NOLINT
   1.102 +    "movd      %2,%%xmm0                       \n"
   1.103 +    "pxor      %%xmm7,%%xmm7                   \n"
   1.104 +    "movdqa    %4,%%xmm6                       \n"
   1.105 +    LABELALIGN
   1.106 +  "1:                                          \n"
   1.107 +    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
   1.108 +    "lea       " MEMLEA(0x10, 0) ",%0          \n"
   1.109 +    "pmulld    %%xmm6,%%xmm0                   \n"
   1.110 +    "movdqa    %5,%%xmm5                       \n"
   1.111 +    "movdqa    %%xmm1,%%xmm2                   \n"
   1.112 +    "punpcklbw %%xmm7,%%xmm2                   \n"
   1.113 +    "movdqa    %%xmm2,%%xmm3                   \n"
   1.114 +    "punpcklwd %%xmm7,%%xmm3                   \n"
   1.115 +    "pmulld    %%xmm5,%%xmm3                   \n"
   1.116 +    "movdqa    %6,%%xmm5                       \n"
   1.117 +    "movdqa    %%xmm2,%%xmm4                   \n"
   1.118 +    "punpckhwd %%xmm7,%%xmm4                   \n"
   1.119 +    "pmulld    %%xmm5,%%xmm4                   \n"
   1.120 +    "movdqa    %7,%%xmm5                       \n"
   1.121 +    "punpckhbw %%xmm7,%%xmm1                   \n"
   1.122 +    "movdqa    %%xmm1,%%xmm2                   \n"
   1.123 +    "punpcklwd %%xmm7,%%xmm2                   \n"
   1.124 +    "pmulld    %%xmm5,%%xmm2                   \n"
   1.125 +    "movdqa    %8,%%xmm5                       \n"
   1.126 +    "punpckhwd %%xmm7,%%xmm1                   \n"
   1.127 +    "pmulld    %%xmm5,%%xmm1                   \n"
   1.128 +    "paddd     %%xmm4,%%xmm3                   \n"
   1.129 +    "paddd     %%xmm2,%%xmm1                   \n"
   1.130 +    "sub       $0x10,%1                        \n"
   1.131 +    "paddd     %%xmm3,%%xmm1                   \n"
   1.132 +    "pshufd    $0xe,%%xmm1,%%xmm2              \n"
   1.133 +    "paddd     %%xmm2,%%xmm1                   \n"
   1.134 +    "pshufd    $0x1,%%xmm1,%%xmm2              \n"
   1.135 +    "paddd     %%xmm2,%%xmm1                   \n"
   1.136 +    "paddd     %%xmm1,%%xmm0                   \n"
   1.137 +    "jg        1b                              \n"
   1.138 +    "movd      %%xmm0,%3                       \n"
   1.139 +  : "+r"(src),        // %0
   1.140 +    "+r"(count),      // %1
   1.141 +    "+rm"(seed),      // %2
   1.142 +    "=g"(hash)        // %3
   1.143 +  : "m"(kHash16x33),  // %4
   1.144 +    "m"(kHashMul0),   // %5
   1.145 +    "m"(kHashMul1),   // %6
   1.146 +    "m"(kHashMul2),   // %7
   1.147 +    "m"(kHashMul3)    // %8
   1.148 +  : "memory", "cc"
   1.149 +#if defined(__SSE2__)
   1.150 +    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
   1.151 +#endif
   1.152 +  );  // NOLINT
   1.153 +  return hash;
   1.154 +}
   1.155 +#endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
   1.156 +
   1.157 +#ifdef __cplusplus
   1.158 +}  // extern "C"
   1.159 +}  // namespace libyuv
   1.160 +#endif
   1.161 +

mercurial