1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,263 @@ 1.4 +#ifndef MMX_X64_H_INCLUDED 1.5 +#define MMX_X64_H_INCLUDED 1.6 + 1.7 +/* Implementation of x64 MMX substitition functions, before 1.8 + * pixman is reimplemented not to use __m64 type on Visual C++ 1.9 + * 1.10 + * Copyright (C)2009 by George Yohng 1.11 + * Released in public domain. 1.12 + */ 1.13 + 1.14 +#include <intrin.h> 1.15 + 1.16 +#define M64C(a) (*(const __m64 *)(&a)) 1.17 +#define M64U(a) (*(const unsigned long long *)(&a)) 1.18 + 1.19 +__inline __m64 1.20 +_m_from_int (int a) 1.21 +{ 1.22 + long long i64 = a; 1.23 + 1.24 + return M64C (i64); 1.25 +} 1.26 + 1.27 +__inline __m64 1.28 +_mm_setzero_si64 () 1.29 +{ 1.30 + long long i64 = 0; 1.31 + 1.32 + return M64C (i64); 1.33 +} 1.34 + 1.35 +__inline __m64 1.36 +_mm_set_pi32 (int i1, int i0) 1.37 +{ 1.38 + unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32); 1.39 + 1.40 + return M64C (i64); 1.41 +} 1.42 + 1.43 +__inline void 1.44 +_m_empty () 1.45 +{ 1.46 +} 1.47 + 1.48 +__inline __m64 1.49 +_mm_set1_pi16 (short w) 1.50 +{ 1.51 + unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL; 1.52 + 1.53 + return M64C (i64); 1.54 +} 1.55 + 1.56 +__inline int 1.57 +_m_to_int (__m64 m) 1.58 +{ 1.59 + return m.m64_i32[0]; 1.60 +} 1.61 + 1.62 +__inline __m64 1.63 +_mm_movepi64_pi64 (__m128i a) 1.64 +{ 1.65 + return M64C (a.m128i_i64[0]); 1.66 +} 1.67 + 1.68 +__inline __m64 1.69 +_m_pand (__m64 a, __m64 b) 1.70 +{ 1.71 + unsigned long long i64 = M64U (a) & M64U (b); 1.72 + 1.73 + return M64C (i64); 1.74 +} 1.75 + 1.76 +__inline __m64 1.77 +_m_por (__m64 a, __m64 b) 1.78 +{ 1.79 + unsigned long long i64 = M64U (a) | M64U (b); 1.80 + 1.81 + return M64C (i64); 1.82 +} 1.83 + 1.84 +__inline __m64 1.85 +_m_pxor (__m64 a, __m64 b) 1.86 +{ 1.87 + unsigned long long i64 = M64U (a) ^ M64U (b); 1.88 + 1.89 + return M64C (i64); 1.90 +} 1.91 + 1.92 +__inline __m64 1.93 +_m_pmulhuw (__m64 a, __m64 b) /* unoptimized */ 1.94 +{ 1.95 + unsigned short d[4] = 1.96 + { 1.97 + (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16), 1.98 + (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16), 1.99 + (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16), 1.100 + (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16) 1.101 + }; 1.102 + 1.103 + return M64C (d[0]); 1.104 +} 1.105 + 1.106 +__inline __m64 1.107 +_m_pmullw2 (__m64 a, __m64 b) /* unoptimized */ 1.108 +{ 1.109 + unsigned short d[4] = 1.110 + { 1.111 + (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])), 1.112 + (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])), 1.113 + (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])), 1.114 + (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3])) 1.115 + }; 1.116 + 1.117 + return M64C (d[0]); 1.118 +} 1.119 + 1.120 +__inline __m64 1.121 +_m_pmullw (__m64 a, __m64 b) /* unoptimized */ 1.122 +{ 1.123 + unsigned long long x = 1.124 + ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) + 1.125 + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) + 1.126 + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) + 1.127 + (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48); 1.128 + 1.129 + return M64C (x); 1.130 +} 1.131 + 1.132 +__inline __m64 1.133 +_m_paddusb (__m64 a, __m64 b) /* unoptimized */ 1.134 +{ 1.135 + unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) + 1.136 + (M64U (b) & 0x00FF00FF00FF00FFULL); 1.137 + 1.138 + unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) + 1.139 + ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL); 1.140 + 1.141 + x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; 1.142 + y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF; 1.143 + 1.144 + x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8); 1.145 + 1.146 + return M64C (x); 1.147 +} 1.148 + 1.149 +__inline __m64 1.150 +_m_paddusw (__m64 a, __m64 b) /* unoptimized */ 1.151 +{ 1.152 + unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) + 1.153 + (M64U (b) & 0x0000FFFF0000FFFFULL); 1.154 + 1.155 + unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) + 1.156 + ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL); 1.157 + 1.158 + x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; 1.159 + y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF; 1.160 + 1.161 + x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16); 1.162 + 1.163 + return M64C (x); 1.164 +} 1.165 + 1.166 +__inline __m64 1.167 +_m_pshufw (__m64 a, int n) /* unoptimized */ 1.168 +{ 1.169 + unsigned short d[4] = 1.170 + { 1.171 + a.m64_u16[n & 3], 1.172 + a.m64_u16[(n >> 2) & 3], 1.173 + a.m64_u16[(n >> 4) & 3], 1.174 + a.m64_u16[(n >> 6) & 3] 1.175 + }; 1.176 + 1.177 + return M64C (d[0]); 1.178 +} 1.179 + 1.180 +__inline unsigned char 1.181 +sat16 (unsigned short d) 1.182 +{ 1.183 + if (d > 0xFF) return 0xFF; 1.184 + else return d & 0xFF; 1.185 +} 1.186 + 1.187 +__inline __m64 1.188 +_m_packuswb (__m64 m1, __m64 m2) /* unoptimized */ 1.189 +{ 1.190 + unsigned char d[8] = 1.191 + { 1.192 + sat16 (m1.m64_u16[0]), 1.193 + sat16 (m1.m64_u16[1]), 1.194 + sat16 (m1.m64_u16[2]), 1.195 + sat16 (m1.m64_u16[3]), 1.196 + sat16 (m2.m64_u16[0]), 1.197 + sat16 (m2.m64_u16[1]), 1.198 + sat16 (m2.m64_u16[2]), 1.199 + sat16 (m2.m64_u16[3]) 1.200 + }; 1.201 + 1.202 + return M64C (d[0]); 1.203 +} 1.204 + 1.205 +__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */ 1.206 +{ 1.207 + unsigned char d[8] = 1.208 + { 1.209 + m1.m64_u8[0], 1.210 + m2.m64_u8[0], 1.211 + m1.m64_u8[1], 1.212 + m2.m64_u8[1], 1.213 + m1.m64_u8[2], 1.214 + m2.m64_u8[2], 1.215 + m1.m64_u8[3], 1.216 + m2.m64_u8[3], 1.217 + }; 1.218 + 1.219 + return M64C (d[0]); 1.220 +} 1.221 + 1.222 +__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */ 1.223 +{ 1.224 + unsigned char d[8] = 1.225 + { 1.226 + m1.m64_u8[4], 1.227 + m2.m64_u8[4], 1.228 + m1.m64_u8[5], 1.229 + m2.m64_u8[5], 1.230 + m1.m64_u8[6], 1.231 + m2.m64_u8[6], 1.232 + m1.m64_u8[7], 1.233 + m2.m64_u8[7], 1.234 + }; 1.235 + 1.236 + return M64C (d[0]); 1.237 +} 1.238 + 1.239 +__inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */ 1.240 +{ 1.241 + unsigned short d[4] = 1.242 + { 1.243 + a.m64_u16[0] >> n, 1.244 + a.m64_u16[1] >> n, 1.245 + a.m64_u16[2] >> n, 1.246 + a.m64_u16[3] >> n 1.247 + }; 1.248 + 1.249 + return M64C (d[0]); 1.250 +} 1.251 + 1.252 +__inline __m64 _m_psrlqi (__m64 m, int n) 1.253 +{ 1.254 + unsigned long long x = M64U (m) >> n; 1.255 + 1.256 + return M64C (x); 1.257 +} 1.258 + 1.259 +__inline __m64 _m_psllqi (__m64 m, int n) 1.260 +{ 1.261 + unsigned long long x = M64U (m) << n; 1.262 + 1.263 + return M64C (x); 1.264 +} 1.265 + 1.266 +#endif /* MMX_X64_H_INCLUDED */