|
1 /* |
|
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
|
3 * |
|
4 * Use of this source code is governed by a BSD-style license |
|
5 * that can be found in the LICENSE file in the root of the source |
|
6 * tree. An additional intellectual property rights grant can be found |
|
7 * in the file PATENTS. All contributing project authors may |
|
8 * be found in the AUTHORS file in the root of the source tree. |
|
9 */ |
|
10 |
|
11 |
|
12 #ifndef VPX_PORTS_X86_H |
|
13 #define VPX_PORTS_X86_H |
|
14 #include <stdlib.h> |
|
15 #include "vpx_config.h" |
|
16 |
|
17 typedef enum { |
|
18 VPX_CPU_UNKNOWN = -1, |
|
19 VPX_CPU_AMD, |
|
20 VPX_CPU_AMD_OLD, |
|
21 VPX_CPU_CENTAUR, |
|
22 VPX_CPU_CYRIX, |
|
23 VPX_CPU_INTEL, |
|
24 VPX_CPU_NEXGEN, |
|
25 VPX_CPU_NSC, |
|
26 VPX_CPU_RISE, |
|
27 VPX_CPU_SIS, |
|
28 VPX_CPU_TRANSMETA, |
|
29 VPX_CPU_TRANSMETA_OLD, |
|
30 VPX_CPU_UMC, |
|
31 VPX_CPU_VIA, |
|
32 |
|
33 VPX_CPU_LAST |
|
34 } vpx_cpu_t; |
|
35 |
|
36 #if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) |
|
37 #if ARCH_X86_64 |
|
38 #define cpuid(func,ax,bx,cx,dx)\ |
|
39 __asm__ __volatile__ (\ |
|
40 "cpuid \n\t" \ |
|
41 : "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \ |
|
42 : "a" (func)); |
|
43 #else |
|
44 #define cpuid(func,ax,bx,cx,dx)\ |
|
45 __asm__ __volatile__ (\ |
|
46 "mov %%ebx, %%edi \n\t" \ |
|
47 "cpuid \n\t" \ |
|
48 "xchg %%edi, %%ebx \n\t" \ |
|
49 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
|
50 : "a" (func)); |
|
51 #endif |
|
52 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ |
|
53 #if ARCH_X86_64 |
|
54 #define cpuid(func,ax,bx,cx,dx)\ |
|
55 asm volatile (\ |
|
56 "xchg %rsi, %rbx \n\t" \ |
|
57 "cpuid \n\t" \ |
|
58 "movl %ebx, %edi \n\t" \ |
|
59 "xchg %rsi, %rbx \n\t" \ |
|
60 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
|
61 : "a" (func)); |
|
62 #else |
|
63 #define cpuid(func,ax,bx,cx,dx)\ |
|
64 asm volatile (\ |
|
65 "pushl %ebx \n\t" \ |
|
66 "cpuid \n\t" \ |
|
67 "movl %ebx, %edi \n\t" \ |
|
68 "popl %ebx \n\t" \ |
|
69 : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ |
|
70 : "a" (func)); |
|
71 #endif |
|
72 #else /* end __SUNPRO__ */ |
|
73 #if ARCH_X86_64 |
|
74 void __cpuid(int CPUInfo[4], int info_type); |
|
75 #pragma intrinsic(__cpuid) |
|
76 #define cpuid(func,a,b,c,d) do{\ |
|
77 int regs[4];\ |
|
78 __cpuid(regs,func); a=regs[0]; b=regs[1]; c=regs[2]; d=regs[3];\ |
|
79 } while(0) |
|
80 #else |
|
81 #define cpuid(func,a,b,c,d)\ |
|
82 __asm mov eax, func\ |
|
83 __asm cpuid\ |
|
84 __asm mov a, eax\ |
|
85 __asm mov b, ebx\ |
|
86 __asm mov c, ecx\ |
|
87 __asm mov d, edx |
|
88 #endif |
|
89 #endif /* end others */ |
|
90 |
|
91 #define HAS_MMX 0x01 |
|
92 #define HAS_SSE 0x02 |
|
93 #define HAS_SSE2 0x04 |
|
94 #define HAS_SSE3 0x08 |
|
95 #define HAS_SSSE3 0x10 |
|
96 #define HAS_SSE4_1 0x20 |
|
97 #define HAS_AVX 0x40 |
|
98 #define HAS_AVX2 0x80 |
|
99 #ifndef BIT |
|
100 #define BIT(n) (1<<n) |
|
101 #endif |
|
102 |
|
103 static int |
|
104 x86_simd_caps(void) { |
|
105 unsigned int flags = 0; |
|
106 unsigned int mask = ~0; |
|
107 unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; |
|
108 char *env; |
|
109 (void)reg_ebx; |
|
110 |
|
111 /* See if the CPU capabilities are being overridden by the environment */ |
|
112 env = getenv("VPX_SIMD_CAPS"); |
|
113 |
|
114 if (env && *env) |
|
115 return (int)strtol(env, NULL, 0); |
|
116 |
|
117 env = getenv("VPX_SIMD_CAPS_MASK"); |
|
118 |
|
119 if (env && *env) |
|
120 mask = strtol(env, NULL, 0); |
|
121 |
|
122 /* Ensure that the CPUID instruction supports extended features */ |
|
123 cpuid(0, reg_eax, reg_ebx, reg_ecx, reg_edx); |
|
124 |
|
125 if (reg_eax < 1) |
|
126 return 0; |
|
127 |
|
128 /* Get the standard feature flags */ |
|
129 cpuid(1, reg_eax, reg_ebx, reg_ecx, reg_edx); |
|
130 |
|
131 if (reg_edx & BIT(23)) flags |= HAS_MMX; |
|
132 |
|
133 if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ |
|
134 |
|
135 if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ |
|
136 |
|
137 if (reg_ecx & BIT(0)) flags |= HAS_SSE3; |
|
138 |
|
139 if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; |
|
140 |
|
141 if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; |
|
142 |
|
143 if (reg_ecx & BIT(28)) flags |= HAS_AVX; |
|
144 |
|
145 if (reg_ebx & BIT(5)) flags |= HAS_AVX2; |
|
146 |
|
147 return flags & mask; |
|
148 } |
|
149 |
|
150 vpx_cpu_t vpx_x86_vendor(void); |
|
151 |
|
152 #if ARCH_X86_64 && defined(_MSC_VER) |
|
153 unsigned __int64 __rdtsc(void); |
|
154 #pragma intrinsic(__rdtsc) |
|
155 #endif |
|
156 static unsigned int |
|
157 x86_readtsc(void) { |
|
158 #if defined(__GNUC__) && __GNUC__ |
|
159 unsigned int tsc; |
|
160 __asm__ __volatile__("rdtsc\n\t":"=a"(tsc):); |
|
161 return tsc; |
|
162 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
|
163 unsigned int tsc; |
|
164 asm volatile("rdtsc\n\t":"=a"(tsc):); |
|
165 return tsc; |
|
166 #else |
|
167 #if ARCH_X86_64 |
|
168 return (unsigned int)__rdtsc(); |
|
169 #else |
|
170 __asm rdtsc; |
|
171 #endif |
|
172 #endif |
|
173 } |
|
174 |
|
175 |
|
176 #if defined(__GNUC__) && __GNUC__ |
|
177 #define x86_pause_hint()\ |
|
178 __asm__ __volatile__ ("pause \n\t") |
|
179 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
|
180 #define x86_pause_hint()\ |
|
181 asm volatile ("pause \n\t") |
|
182 #else |
|
183 #if ARCH_X86_64 |
|
184 #define x86_pause_hint()\ |
|
185 _mm_pause(); |
|
186 #else |
|
187 #define x86_pause_hint()\ |
|
188 __asm pause |
|
189 #endif |
|
190 #endif |
|
191 |
|
192 #if defined(__GNUC__) && __GNUC__ |
|
193 static void |
|
194 x87_set_control_word(unsigned short mode) { |
|
195 __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); |
|
196 } |
|
197 static unsigned short |
|
198 x87_get_control_word(void) { |
|
199 unsigned short mode; |
|
200 __asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):); |
|
201 return mode; |
|
202 } |
|
203 #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) |
|
204 static void |
|
205 x87_set_control_word(unsigned short mode) { |
|
206 asm volatile("fldcw %0" : : "m"(*&mode)); |
|
207 } |
|
208 static unsigned short |
|
209 x87_get_control_word(void) { |
|
210 unsigned short mode; |
|
211 asm volatile("fstcw %0\n\t":"=m"(*&mode):); |
|
212 return mode; |
|
213 } |
|
214 #elif ARCH_X86_64 |
|
215 /* No fldcw intrinsics on Windows x64, punt to external asm */ |
|
216 extern void vpx_winx64_fldcw(unsigned short mode); |
|
217 extern unsigned short vpx_winx64_fstcw(void); |
|
218 #define x87_set_control_word vpx_winx64_fldcw |
|
219 #define x87_get_control_word vpx_winx64_fstcw |
|
220 #else |
|
221 static void |
|
222 x87_set_control_word(unsigned short mode) { |
|
223 __asm { fldcw mode } |
|
224 } |
|
225 static unsigned short |
|
226 x87_get_control_word(void) { |
|
227 unsigned short mode; |
|
228 __asm { fstcw mode } |
|
229 return mode; |
|
230 } |
|
231 #endif |
|
232 |
|
233 static unsigned short |
|
234 x87_set_double_precision(void) { |
|
235 unsigned short mode = x87_get_control_word(); |
|
236 x87_set_control_word((mode&~0x300) | 0x200); |
|
237 return mode; |
|
238 } |
|
239 |
|
240 |
|
241 extern void vpx_reset_mmx_state(void); |
|
242 #endif |
|
243 |