gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 #ifndef MMX_X64_H_INCLUDED
     2 #define MMX_X64_H_INCLUDED
     4 /* Implementation of x64 MMX substitition functions, before
     5  * pixman is reimplemented not to use __m64 type on Visual C++
     6  *
     7  * Copyright (C)2009 by George Yohng
     8  * Released in public domain.
     9  */
    11 #include <intrin.h>
    13 #define M64C(a) (*(const __m64 *)(&a))
    14 #define M64U(a) (*(const unsigned long long *)(&a))
    16 __inline __m64
    17 _m_from_int (int a)
    18 {
    19     long long i64 = a;
    21     return M64C (i64);
    22 }
    24 __inline __m64
    25 _mm_setzero_si64 ()
    26 {
    27     long long i64 = 0;
    29     return M64C (i64);
    30 }
    32 __inline __m64
    33 _mm_set_pi32 (int i1,   int i0)
    34 {
    35     unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32);
    37     return M64C (i64);
    38 }
    40 __inline void
    41 _m_empty ()
    42 {
    43 }
    45 __inline __m64
    46 _mm_set1_pi16 (short w)
    47 {
    48     unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL;
    50     return M64C (i64);
    51 }
    53 __inline int
    54 _m_to_int (__m64 m)
    55 {
    56     return m.m64_i32[0];
    57 }
    59 __inline __m64
    60 _mm_movepi64_pi64 (__m128i a)
    61 {
    62     return M64C (a.m128i_i64[0]);
    63 }
    65 __inline __m64
    66 _m_pand (__m64 a, __m64 b)
    67 {
    68     unsigned long long i64 = M64U (a) & M64U (b);
    70     return M64C (i64);
    71 }
    73 __inline __m64
    74 _m_por (__m64 a, __m64 b)
    75 {
    76     unsigned long long i64 = M64U (a) | M64U (b);
    78     return M64C (i64);
    79 }
    81 __inline __m64
    82 _m_pxor (__m64 a, __m64 b)
    83 {
    84     unsigned long long i64 = M64U (a) ^ M64U (b);
    86     return M64C (i64);
    87 }
    89 __inline __m64
    90 _m_pmulhuw (__m64 a, __m64 b)        /* unoptimized */
    91 {
    92     unsigned short d[4] =
    93     {
    94 	(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16),
    95 	(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16),
    96 	(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16),
    97 	(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16)
    98     };
   100     return M64C (d[0]);
   101 }
   103 __inline __m64
   104 _m_pmullw2 (__m64 a, __m64 b)        /* unoptimized */
   105 {
   106     unsigned short d[4] =
   107     {
   108 	(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])),
   109 	(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])),
   110 	(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])),
   111 	(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))
   112     };
   114     return M64C (d[0]);
   115 }
   117 __inline __m64
   118 _m_pmullw (__m64 a, __m64 b)        /* unoptimized */
   119 {
   120     unsigned long long x =
   121 	((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])))  +
   122 	(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16)  +
   123 	(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32)  +
   124 	(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48);
   126     return M64C (x);
   127 }
   129 __inline __m64
   130 _m_paddusb (__m64 a, __m64 b)        /* unoptimized */
   131 {
   132     unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) +
   133                            (M64U (b) & 0x00FF00FF00FF00FFULL);
   135     unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) +
   136                            ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL);
   138     x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
   139     y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
   141     x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8);
   143     return M64C (x);
   144 }
   146 __inline __m64
   147 _m_paddusw (__m64 a, __m64 b)        /* unoptimized */
   148 {
   149     unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) +
   150                            (M64U (b) & 0x0000FFFF0000FFFFULL);
   152     unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) +
   153                            ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL);
   155     x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
   156     y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
   158     x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16);
   160     return M64C (x);
   161 }
   163 __inline __m64
   164 _m_pshufw (__m64 a, int n)         /* unoptimized */
   165 {
   166     unsigned short d[4] =
   167     {
   168 	a.m64_u16[n & 3],
   169 	a.m64_u16[(n >> 2) & 3],
   170 	a.m64_u16[(n >> 4) & 3],
   171 	a.m64_u16[(n >> 6) & 3]
   172     };
   174     return M64C (d[0]);
   175 }
   177 __inline unsigned char
   178 sat16 (unsigned short d)
   179 {
   180     if (d > 0xFF) return 0xFF;
   181     else return d & 0xFF;
   182 }
   184 __inline __m64
   185 _m_packuswb (__m64 m1, __m64 m2)          /* unoptimized */
   186 {
   187     unsigned char d[8] =
   188     {
   189 	sat16 (m1.m64_u16[0]),
   190 	sat16 (m1.m64_u16[1]),
   191 	sat16 (m1.m64_u16[2]),
   192 	sat16 (m1.m64_u16[3]),
   193 	sat16 (m2.m64_u16[0]),
   194 	sat16 (m2.m64_u16[1]),
   195 	sat16 (m2.m64_u16[2]),
   196 	sat16 (m2.m64_u16[3])
   197     };
   199     return M64C (d[0]);
   200 }
   202 __inline __m64 _m_punpcklbw (__m64 m1, __m64 m2)          /* unoptimized */
   203 {
   204     unsigned char d[8] =
   205     {
   206 	m1.m64_u8[0],
   207 	m2.m64_u8[0],
   208 	m1.m64_u8[1],
   209 	m2.m64_u8[1],
   210 	m1.m64_u8[2],
   211 	m2.m64_u8[2],
   212 	m1.m64_u8[3],
   213 	m2.m64_u8[3],
   214     };
   216     return M64C (d[0]);
   217 }
   219 __inline __m64 _m_punpckhbw (__m64 m1, __m64 m2)          /* unoptimized */
   220 {
   221     unsigned char d[8] =
   222     {
   223 	m1.m64_u8[4],
   224 	m2.m64_u8[4],
   225 	m1.m64_u8[5],
   226 	m2.m64_u8[5],
   227 	m1.m64_u8[6],
   228 	m2.m64_u8[6],
   229 	m1.m64_u8[7],
   230 	m2.m64_u8[7],
   231     };
   233     return M64C (d[0]);
   234 }
   236 __inline __m64 _m_psrlwi (__m64 a, int n)       /* unoptimized */
   237 {
   238     unsigned short d[4] =
   239     {
   240 	a.m64_u16[0] >> n,
   241 	a.m64_u16[1] >> n,
   242 	a.m64_u16[2] >> n,
   243 	a.m64_u16[3] >> n
   244     };
   246     return M64C (d[0]);
   247 }
   249 __inline __m64 _m_psrlqi (__m64 m, int n)
   250 {
   251     unsigned long long x = M64U (m) >> n;
   253     return M64C (x);
   254 }
   256 __inline __m64 _m_psllqi (__m64 m, int n)
   257 {
   258     unsigned long long x = M64U (m) << n;
   260     return M64C (x);
   261 }
   263 #endif /* MMX_X64_H_INCLUDED */

mercurial