gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/gfx/cairo/libpixman/src/pixman-x64-mmx-emulation.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,263 @@
     1.4 +#ifndef MMX_X64_H_INCLUDED
     1.5 +#define MMX_X64_H_INCLUDED
     1.6 +
     1.7 +/* Implementation of x64 MMX substitition functions, before
     1.8 + * pixman is reimplemented not to use __m64 type on Visual C++
     1.9 + *
    1.10 + * Copyright (C)2009 by George Yohng
    1.11 + * Released in public domain.
    1.12 + */
    1.13 +
    1.14 +#include <intrin.h>
    1.15 +
    1.16 +#define M64C(a) (*(const __m64 *)(&a))
    1.17 +#define M64U(a) (*(const unsigned long long *)(&a))
    1.18 +
    1.19 +__inline __m64
    1.20 +_m_from_int (int a)
    1.21 +{
    1.22 +    long long i64 = a;
    1.23 +
    1.24 +    return M64C (i64);
    1.25 +}
    1.26 +
    1.27 +__inline __m64
    1.28 +_mm_setzero_si64 ()
    1.29 +{
    1.30 +    long long i64 = 0;
    1.31 +
    1.32 +    return M64C (i64);
    1.33 +}
    1.34 +
    1.35 +__inline __m64
    1.36 +_mm_set_pi32 (int i1,   int i0)
    1.37 +{
    1.38 +    unsigned long long i64 = ((unsigned)i0) + (((unsigned long long)(unsigned)i1) << 32);
    1.39 +
    1.40 +    return M64C (i64);
    1.41 +}
    1.42 +
    1.43 +__inline void
    1.44 +_m_empty ()
    1.45 +{
    1.46 +}
    1.47 +
    1.48 +__inline __m64
    1.49 +_mm_set1_pi16 (short w)
    1.50 +{
    1.51 +    unsigned long long i64 = ((unsigned long long)(unsigned short)(w)) * 0x0001000100010001ULL;
    1.52 +
    1.53 +    return M64C (i64);
    1.54 +}
    1.55 +
    1.56 +__inline int
    1.57 +_m_to_int (__m64 m)
    1.58 +{
    1.59 +    return m.m64_i32[0];
    1.60 +}
    1.61 +
    1.62 +__inline __m64
    1.63 +_mm_movepi64_pi64 (__m128i a)
    1.64 +{
    1.65 +    return M64C (a.m128i_i64[0]);
    1.66 +}
    1.67 +
    1.68 +__inline __m64
    1.69 +_m_pand (__m64 a, __m64 b)
    1.70 +{
    1.71 +    unsigned long long i64 = M64U (a) & M64U (b);
    1.72 +
    1.73 +    return M64C (i64);
    1.74 +}
    1.75 +
    1.76 +__inline __m64
    1.77 +_m_por (__m64 a, __m64 b)
    1.78 +{
    1.79 +    unsigned long long i64 = M64U (a) | M64U (b);
    1.80 +
    1.81 +    return M64C (i64);
    1.82 +}
    1.83 +
    1.84 +__inline __m64
    1.85 +_m_pxor (__m64 a, __m64 b)
    1.86 +{
    1.87 +    unsigned long long i64 = M64U (a) ^ M64U (b);
    1.88 +
    1.89 +    return M64C (i64);
    1.90 +}
    1.91 +
    1.92 +__inline __m64
    1.93 +_m_pmulhuw (__m64 a, __m64 b)        /* unoptimized */
    1.94 +{
    1.95 +    unsigned short d[4] =
    1.96 +    {
    1.97 +	(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0]) >> 16),
    1.98 +	(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]) >> 16),
    1.99 +	(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]) >> 16),
   1.100 +	(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]) >> 16)
   1.101 +    };
   1.102 +
   1.103 +    return M64C (d[0]);
   1.104 +}
   1.105 +
   1.106 +__inline __m64
   1.107 +_m_pmullw2 (__m64 a, __m64 b)        /* unoptimized */
   1.108 +{
   1.109 +    unsigned short d[4] =
   1.110 +    {
   1.111 +	(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])),
   1.112 +	(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1])),
   1.113 +	(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2])),
   1.114 +	(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))
   1.115 +    };
   1.116 +
   1.117 +    return M64C (d[0]);
   1.118 +}
   1.119 +
   1.120 +__inline __m64
   1.121 +_m_pmullw (__m64 a, __m64 b)        /* unoptimized */
   1.122 +{
   1.123 +    unsigned long long x =
   1.124 +	((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[0]) * b.m64_u16[0])))  +
   1.125 +	(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[1]) * b.m64_u16[1]))) << 16)  +
   1.126 +	(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[2]) * b.m64_u16[2]))) << 32)  +
   1.127 +	(((unsigned long long)(unsigned short)((((unsigned)a.m64_u16[3]) * b.m64_u16[3]))) << 48);
   1.128 +
   1.129 +    return M64C (x);
   1.130 +}
   1.131 +
   1.132 +__inline __m64
   1.133 +_m_paddusb (__m64 a, __m64 b)        /* unoptimized */
   1.134 +{
   1.135 +    unsigned long long x = (M64U (a) & 0x00FF00FF00FF00FFULL) +
   1.136 +                           (M64U (b) & 0x00FF00FF00FF00FFULL);
   1.137 +
   1.138 +    unsigned long long y = ((M64U (a) >> 8) & 0x00FF00FF00FF00FFULL) +
   1.139 +                           ((M64U (b) >> 8) & 0x00FF00FF00FF00FFULL);
   1.140 +
   1.141 +    x |= ((x & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
   1.142 +    y |= ((y & 0xFF00FF00FF00FF00ULL) >> 8) * 0xFF;
   1.143 +
   1.144 +    x = (x & 0x00FF00FF00FF00FFULL) | ((y & 0x00FF00FF00FF00FFULL) << 8);
   1.145 +
   1.146 +    return M64C (x);
   1.147 +}
   1.148 +
   1.149 +__inline __m64
   1.150 +_m_paddusw (__m64 a, __m64 b)        /* unoptimized */
   1.151 +{
   1.152 +    unsigned long long x = (M64U (a) & 0x0000FFFF0000FFFFULL) +
   1.153 +                           (M64U (b) & 0x0000FFFF0000FFFFULL);
   1.154 +
   1.155 +    unsigned long long y = ((M64U (a) >> 16) & 0x0000FFFF0000FFFFULL) +
   1.156 +                           ((M64U (b) >> 16) & 0x0000FFFF0000FFFFULL);
   1.157 +
   1.158 +    x |= ((x & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
   1.159 +    y |= ((y & 0xFFFF0000FFFF0000) >> 16) * 0xFFFF;
   1.160 +
   1.161 +    x = (x & 0x0000FFFF0000FFFFULL) | ((y & 0x0000FFFF0000FFFFULL) << 16);
   1.162 +
   1.163 +    return M64C (x);
   1.164 +}
   1.165 +
   1.166 +__inline __m64
   1.167 +_m_pshufw (__m64 a, int n)         /* unoptimized */
   1.168 +{
   1.169 +    unsigned short d[4] =
   1.170 +    {
   1.171 +	a.m64_u16[n & 3],
   1.172 +	a.m64_u16[(n >> 2) & 3],
   1.173 +	a.m64_u16[(n >> 4) & 3],
   1.174 +	a.m64_u16[(n >> 6) & 3]
   1.175 +    };
   1.176 +
   1.177 +    return M64C (d[0]);
   1.178 +}
   1.179 +
   1.180 +__inline unsigned char
   1.181 +sat16 (unsigned short d)
   1.182 +{
   1.183 +    if (d > 0xFF) return 0xFF;
   1.184 +    else return d & 0xFF;
   1.185 +}
   1.186 +
   1.187 +__inline __m64
   1.188 +_m_packuswb (__m64 m1, __m64 m2)          /* unoptimized */
   1.189 +{
   1.190 +    unsigned char d[8] =
   1.191 +    {
   1.192 +	sat16 (m1.m64_u16[0]),
   1.193 +	sat16 (m1.m64_u16[1]),
   1.194 +	sat16 (m1.m64_u16[2]),
   1.195 +	sat16 (m1.m64_u16[3]),
   1.196 +	sat16 (m2.m64_u16[0]),
   1.197 +	sat16 (m2.m64_u16[1]),
   1.198 +	sat16 (m2.m64_u16[2]),
   1.199 +	sat16 (m2.m64_u16[3])
   1.200 +    };
   1.201 +
   1.202 +    return M64C (d[0]);
   1.203 +}
   1.204 +
   1.205 +__inline __m64 _m_punpcklbw (__m64 m1, __m64 m2)          /* unoptimized */
   1.206 +{
   1.207 +    unsigned char d[8] =
   1.208 +    {
   1.209 +	m1.m64_u8[0],
   1.210 +	m2.m64_u8[0],
   1.211 +	m1.m64_u8[1],
   1.212 +	m2.m64_u8[1],
   1.213 +	m1.m64_u8[2],
   1.214 +	m2.m64_u8[2],
   1.215 +	m1.m64_u8[3],
   1.216 +	m2.m64_u8[3],
   1.217 +    };
   1.218 +
   1.219 +    return M64C (d[0]);
   1.220 +}
   1.221 +
   1.222 +__inline __m64 _m_punpckhbw (__m64 m1, __m64 m2)          /* unoptimized */
   1.223 +{
   1.224 +    unsigned char d[8] =
   1.225 +    {
   1.226 +	m1.m64_u8[4],
   1.227 +	m2.m64_u8[4],
   1.228 +	m1.m64_u8[5],
   1.229 +	m2.m64_u8[5],
   1.230 +	m1.m64_u8[6],
   1.231 +	m2.m64_u8[6],
   1.232 +	m1.m64_u8[7],
   1.233 +	m2.m64_u8[7],
   1.234 +    };
   1.235 +
   1.236 +    return M64C (d[0]);
   1.237 +}
   1.238 +
   1.239 +__inline __m64 _m_psrlwi (__m64 a, int n)       /* unoptimized */
   1.240 +{
   1.241 +    unsigned short d[4] =
   1.242 +    {
   1.243 +	a.m64_u16[0] >> n,
   1.244 +	a.m64_u16[1] >> n,
   1.245 +	a.m64_u16[2] >> n,
   1.246 +	a.m64_u16[3] >> n
   1.247 +    };
   1.248 +
   1.249 +    return M64C (d[0]);
   1.250 +}
   1.251 +
   1.252 +__inline __m64 _m_psrlqi (__m64 m, int n)
   1.253 +{
   1.254 +    unsigned long long x = M64U (m) >> n;
   1.255 +
   1.256 +    return M64C (x);
   1.257 +}
   1.258 +
   1.259 +__inline __m64 _m_psllqi (__m64 m, int n)
   1.260 +{
   1.261 +    unsigned long long x = M64U (m) << n;
   1.262 +
   1.263 +    return M64C (x);
   1.264 +}
   1.265 +
   1.266 +#endif /* MMX_X64_H_INCLUDED */

mercurial