gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:01321d5b78bb
1 #ifndef MMX_X64_H_INCLUDED
2 #define MMX_X64_H_INCLUDED
3
4 /* Implementation of x64 MMX substitition functions, before
5 * pixman is reimplemented not to use __m64 type on Visual C++
6 *
7 * Copyright (C)2009 by George Yohng
8 * Released in public domain.
9 */
10
11 #include <intrin.h>
12
13 #define M64C(a) (*(const __m64 *)(&a))
14 #define M64U(a) (*(const unsigned long long *)(&a))
15
16 __inline __m64
17 _m_from_int (int a)
18 {
19 long long i64 = a;
20
21 return M64C (i64);
22 }
23
24 __inline __m64
25 _mm_setzero_si64 ()
26 {
27 long long i64 = 0;
28
29 return M64C (i64);
30 }
31
32 __inline __m64
33 _mm_set_pi32 (int i1, int i0)
34 {
35 unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32);
36
37 return M64C (i64);
38 }
39
40 __inline void
41 _m_empty ()
42 {
43 }
44
45 __inline __m64
46 _mm_set1_pi16 (short w)
47 {
48 unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL;
49
50 return M64C (i64);
51 }
52
53 __inline int
54 _m_to_int (__m64 m)
55 {
56 return m.m64_i32[0];
57 }
58
59 __inline __m64
60 _mm_movepi64_pi64 (__m128i a)
61 {
62 return M64C (a.m128i_i64[0]);
63 }
64
65 __inline __m64
66 _m_pand (__m64 a, __m64 b)
67 {
68 unsigned long long i64 = M64U (a) & M64U (b);
69
70 return M64C (i64);
71 }
72
73 __inline __m64
74 _m_por (__m64 a, __m64 b)
75 {
76 unsigned long long i64 = M64U (a) | M64U (b);
77
78 return M64C (i64);
79 }
80
81 __inline __m64
82 _m_pxor (__m64 a, __m64 b)
83 {
84 unsigned long long i64 = M64U (a) ^ M64U (b);
85
86 return M64C (i64);
87 }
88
89 __inline __m64
90 _m_pmulhuw (__m64 a, __m64 b) /* unoptimized */
91 {
92 unsigned short d[4] =
93 {
94 (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16),
95 (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16),
96 (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16),
97 (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16)
98 };
99
100 return M64C (d[0]);
101 }
102
103 __inline __m64
104 _m_pmullw2 (__m64 a, __m64 b) /* unoptimized */
105 {
106 unsigned short d[4] =
107 {
108 (unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])),
109 (unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])),
110 (unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])),
111 (unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))
112 };
113
114 return M64C (d[0]);
115 }
116
117 __inline __m64
118 _m_pmullw (__m64 a, __m64 b) /* unoptimized */
119 {
120 unsigned long long x =
121 ((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]))) +
122 (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16) +
123 (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32) +
124 (((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48);
125
126 return M64C (x);
127 }
128
129 __inline __m64
130 _m_paddusb (__m64 a, __m64 b) /* unoptimized */
131 {
132 unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) +
133 (M64U (b) & 0x00FF00FF00FF00FFULL);
134
135 unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) +
136 ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL);
137
138 x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
139 y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
140
141 x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8);
142
143 return M64C (x);
144 }
145
146 __inline __m64
147 _m_paddusw (__m64 a, __m64 b) /* unoptimized */
148 {
149 unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) +
150 (M64U (b) & 0x0000FFFF0000FFFFULL);
151
152 unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) +
153 ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL);
154
155 x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
156 y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
157
158 x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16);
159
160 return M64C (x);
161 }
162
163 __inline __m64
164 _m_pshufw (__m64 a, int n) /* unoptimized */
165 {
166 unsigned short d[4] =
167 {
168 a.m64_u16[n & 3],
169 a.m64_u16[(n >> 2) & 3],
170 a.m64_u16[(n >> 4) & 3],
171 a.m64_u16[(n >> 6) & 3]
172 };
173
174 return M64C (d[0]);
175 }
176
177 __inline unsigned char
178 sat16 (unsigned short d)
179 {
180 if (d > 0xFF) return 0xFF;
181 else return d & 0xFF;
182 }
183
184 __inline __m64
185 _m_packuswb (__m64 m1, __m64 m2) /* unoptimized */
186 {
187 unsigned char d[8] =
188 {
189 sat16 (m1.m64_u16[0]),
190 sat16 (m1.m64_u16[1]),
191 sat16 (m1.m64_u16[2]),
192 sat16 (m1.m64_u16[3]),
193 sat16 (m2.m64_u16[0]),
194 sat16 (m2.m64_u16[1]),
195 sat16 (m2.m64_u16[2]),
196 sat16 (m2.m64_u16[3])
197 };
198
199 return M64C (d[0]);
200 }
201
202 __inline __m64 _m_punpcklbw (__m64 m1, __m64 m2) /* unoptimized */
203 {
204 unsigned char d[8] =
205 {
206 m1.m64_u8[0],
207 m2.m64_u8[0],
208 m1.m64_u8[1],
209 m2.m64_u8[1],
210 m1.m64_u8[2],
211 m2.m64_u8[2],
212 m1.m64_u8[3],
213 m2.m64_u8[3],
214 };
215
216 return M64C (d[0]);
217 }
218
219 __inline __m64 _m_punpckhbw (__m64 m1, __m64 m2) /* unoptimized */
220 {
221 unsigned char d[8] =
222 {
223 m1.m64_u8[4],
224 m2.m64_u8[4],
225 m1.m64_u8[5],
226 m2.m64_u8[5],
227 m1.m64_u8[6],
228 m2.m64_u8[6],
229 m1.m64_u8[7],
230 m2.m64_u8[7],
231 };
232
233 return M64C (d[0]);
234 }
235
236 __inline __m64 _m_psrlwi (__m64 a, int n) /* unoptimized */
237 {
238 unsigned short d[4] =
239 {
240 a.m64_u16[0] >> n,
241 a.m64_u16[1] >> n,
242 a.m64_u16[2] >> n,
243 a.m64_u16[3] >> n
244 };
245
246 return M64C (d[0]);
247 }
248
249 __inline __m64 _m_psrlqi (__m64 m, int n)
250 {
251 unsigned long long x = M64U (m) >> n;
252
253 return M64C (x);
254 }
255
256 __inline __m64 _m_psllqi (__m64 m, int n)
257 {
258 unsigned long long x = M64U (m) << n;
259
260 return M64C (x);
261 }
262
263 #endif /* MMX_X64_H_INCLUDED */

mercurial