gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 #ifndef MMX_X64_H_INCLUDED
michael@0 2 #define MMX_X64_H_INCLUDED
michael@0 3
michael@0 4 /* Implementation of x64 MMX substitition functions, before
michael@0 5 * pixman is reimplemented not to use __m64 type on Visual C++
michael@0 6 *
michael@0 7 * Copyright (C)2009 by George Yohng
michael@0 8 * Released in public domain.
michael@0 9 */
michael@0 10
michael@0 11 #include <intrin.h>
michael@0 12
michael@0 13 #define M64C(a) (*(const __m64 *)(&a))
michael@0 14 #define M64U(a) (*(const unsigned long long *)(&a))
michael@0 15
michael@0 16 __inline __m64
michael@0 17 _m_from_int (int a)
michael@0 18 {
michael@0 19 long long i64 = a;
michael@0 20
michael@0 21 return M64C (i64);
michael@0 22 }
michael@0 23
michael@0 24 __inline __m64
michael@0 25 _mm_setzero_si64 ()
michael@0 26 {
michael@0 27 long long i64 = 0;
michael@0 28
michael@0 29 return M64C (i64);
michael@0 30 }
michael@0 31
michael@0 32 __inline __m64
michael@0 33 _mm_set_pi32 (int i1, int i0)
michael@0 34 {
michael@0 35 unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32);
michael@0 36
michael@0 37 return M64C (i64);
michael@0 38 }
michael@0 39
michael@0 40 __inline void
michael@0 41 _m_empty ()
michael@0 42 {
michael@0 43 }
michael@0 44
michael@0 45 __inline __m64
michael@0 46 _mm_set1_pi16 (short w)
michael@0 47 {
michael@0 48 unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL;
michael@0 49
michael@0 50 return M64C (i64);
michael@0 51 }
michael@0 52
michael@0 53 __inline int
michael@0 54 _m_to_int (__m64 m)
michael@0 55 {
michael@0 56 return m.m64_i32[0];
michael@0 57 }
michael@0 58
michael@0 59 __inline __m64
michael@0 60 _mm_movepi64_pi64 (__m128i a)
michael@0 61 {
michael@0 62 return M64C (a.m128i_i64[0]);
michael@0 63 }
michael@0 64
michael@0 65 __inline __m64
michael@0 66 _m_pand (__m64 a, __m64 b)
michael@0 67 {
michael@0 68 unsigned long long i64 = M64U (a) & M64U (b);
michael@0 69
michael@0 70 return M64C (i64);
michael@0 71 }
michael@0 72
michael@0 73 __inline __m64
michael@0 74 _m_por (__m64 a, __m64 b)
michael@0 75 {
michael@0 76 unsigned long long i64 = M64U (a) | M64U (b);
michael@0 77
michael@0 78 return M64C (i64);
michael@0 79 }
michael@0 80
michael@0 81 __inline __m64
michael@0 82 _m_pxor (__m64 a, __m64 b)
michael@0 83 {
michael@0 84 unsigned long long i64 = M64U (a) ^ M64U (b);
michael@0 85
michael@0 86 return M64C (i64);
michael@0 87 }
michael@0 88
michael@0 89 __inline __m64
michael@0 90 _m_pmulhuw (__m64 a, __m64 b) /* unoptimized */
michael@0 91 {
michael@0 92 unsigned short d[4] =
michael@0 93 {
michael@0 94 (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16),
michael@0 95 (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16),
michael@0 96 (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16),
michael@0 97 (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16)
michael@0 98 };
michael@0 99
michael@0 100 return M64C (d[0]);
michael@0 101 }
michael@0 102
michael@0 103 __inline __m64
michael@0 104 _m_pmullw2 (__m64 a, __m64 b) /* unoptimized */
michael@0 105 {
michael@0 106 unsigned short d[4] =
michael@0 107 {
michael@0 108 (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])),
michael@0 109 (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])),
michael@0 110 (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])),
michael@0 111 (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))
michael@0 112 };
michael@0 113
michael@0 114 return M64C (d[0]);
michael@0 115 }
michael@0 116
michael@0 117 __inline __m64
michael@0 118 _m_pmullw (__m64 a, __m64 b) /* unoptimized */
michael@0 119 {
michael@0 120 unsigned long long x =
michael@0 121 ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) +
michael@0 122 (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) +
michael@0 123 (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) +
michael@0 124 (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48);
michael@0 125
michael@0 126 return M64C (x);
michael@0 127 }
michael@0 128
michael@0 129 __inline __m64
michael@0 130 _m_paddusb (__m64 a, __m64 b) /* unoptimized */
michael@0 131 {
michael@0 132 unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) +
michael@0 133 (M64U (b) & 0x00FF00FF00FF00FFULL);
michael@0 134
michael@0 135 unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) +
michael@0 136 ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL);
michael@0 137
michael@0 138 x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
michael@0 139 y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
michael@0 140
michael@0 141 x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8);
michael@0 142
michael@0 143 return M64C (x);
michael@0 144 }
michael@0 145
michael@0 146 __inline __m64
michael@0 147 _m_paddusw (__m64 a, __m64 b) /* unoptimized */
michael@0 148 {
michael@0 149 unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) +
michael@0 150 (M64U (b) & 0x0000FFFF0000FFFFULL);
michael@0 151
michael@0 152 unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) +
michael@0 153 ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL);
michael@0 154
michael@0 155 x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
michael@0 156 y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
michael@0 157
michael@0 158 x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16);
michael@0 159
michael@0 160 return M64C (x);
michael@0 161 }
michael@0 162
michael@0 163 __inline __m64
michael@0 164 _m_pshufw (__m64 a, int n) /* unoptimized */
michael@0 165 {
michael@0 166 unsigned short d[4] =
michael@0 167 {
michael@0 168 a.m64_u16[n & 3],
michael@0 169 a.m64_u16[(n >> 2) & 3],
michael@0 170 a.m64_u16[(n >> 4) & 3],
michael@0 171 a.m64_u16[(n >> 6) & 3]
michael@0 172 };
michael@0 173
michael@0 174 return M64C (d[0]);
michael@0 175 }
michael@0 176
michael@0 177 __inline unsigned char
michael@0 178 sat16 (unsigned short d)
michael@0 179 {
michael@0 180 if (d > 0xFF) return 0xFF;
michael@0 181 else return d & 0xFF;
michael@0 182 }
michael@0 183
michael@0 184 __inline __m64
michael@0 185 _m_packuswb (__m64 m1, __m64 m2) /* unoptimized */
michael@0 186 {
michael@0 187 unsigned char d[8] =
michael@0 188 {
michael@0 189 sat16 (m1.m64_u16[0]),
michael@0 190 sat16 (m1.m64_u16[1]),
michael@0 191 sat16 (m1.m64_u16[2]),
michael@0 192 sat16 (m1.m64_u16[3]),
michael@0 193 sat16 (m2.m64_u16[0]),
michael@0 194 sat16 (m2.m64_u16[1]),
michael@0 195 sat16 (m2.m64_u16[2]),
michael@0 196 sat16 (m2.m64_u16[3])
michael@0 197 };
michael@0 198
michael@0 199 return M64C (d[0]);
michael@0 200 }
michael@0 201
michael@0 202 __inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */
michael@0 203 {
michael@0 204 unsigned char d[8] =
michael@0 205 {
michael@0 206 m1.m64_u8[0],
michael@0 207 m2.m64_u8[0],
michael@0 208 m1.m64_u8[1],
michael@0 209 m2.m64_u8[1],
michael@0 210 m1.m64_u8[2],
michael@0 211 m2.m64_u8[2],
michael@0 212 m1.m64_u8[3],
michael@0 213 m2.m64_u8[3],
michael@0 214 };
michael@0 215
michael@0 216 return M64C (d[0]);
michael@0 217 }
michael@0 218
michael@0 219 __inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */
michael@0 220 {
michael@0 221 unsigned char d[8] =
michael@0 222 {
michael@0 223 m1.m64_u8[4],
michael@0 224 m2.m64_u8[4],
michael@0 225 m1.m64_u8[5],
michael@0 226 m2.m64_u8[5],
michael@0 227 m1.m64_u8[6],
michael@0 228 m2.m64_u8[6],
michael@0 229 m1.m64_u8[7],
michael@0 230 m2.m64_u8[7],
michael@0 231 };
michael@0 232
michael@0 233 return M64C (d[0]);
michael@0 234 }
michael@0 235
michael@0 236 __inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */
michael@0 237 {
michael@0 238 unsigned short d[4] =
michael@0 239 {
michael@0 240 a.m64_u16[0] >> n,
michael@0 241 a.m64_u16[1] >> n,
michael@0 242 a.m64_u16[2] >> n,
michael@0 243 a.m64_u16[3] >> n
michael@0 244 };
michael@0 245
michael@0 246 return M64C (d[0]);
michael@0 247 }
michael@0 248
michael@0 249 __inline __m64 _m_psrlqi (__m64 m, int n)
michael@0 250 {
michael@0 251 unsigned long long x = M64U (m) >> n;
michael@0 252
michael@0 253 return M64C (x);
michael@0 254 }
michael@0 255
michael@0 256 __inline __m64 _m_psllqi (__m64 m, int n)
michael@0 257 {
michael@0 258 unsigned long long x = M64U (m) << n;
michael@0 259
michael@0 260 return M64C (x);
michael@0 261 }
michael@0 262
michael@0 263 #endif /* MMX_X64_H_INCLUDED */

mercurial