security/nss/lib/freebl/mpi/mpcpucache.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 #include "mpi.h"
michael@0 6
michael@0 7 /*
michael@0 8 * This file implements a single function: s_mpi_getProcessorLineSize();
michael@0 9 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
michael@0 10 * if a cache exists, or zero if there is no cache. If more than one
michael@0 11 * cache line exists, it should return the smallest line size (which is
michael@0 12 * usually the L1 cache).
michael@0 13 *
michael@0 14 * mp_modexp uses this information to make sure that private key information
michael@0 15 * isn't being leaked through the cache.
michael@0 16 *
michael@0 17 * Currently the file returns good data for most modern x86 processors, and
michael@0 18 * reasonable data on 64-bit ppc processors. All other processors are assumed
michael@0 19 * to have a cache line size of 32 bytes unless modified by target.mk.
michael@0 20 *
michael@0 21 */
michael@0 22
michael@0 23 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
michael@0 24 /* X86 processors have special instructions that tell us about the cache */
michael@0 25 #include "string.h"
michael@0 26
michael@0 27 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
michael@0 28 #define AMD_64 1
michael@0 29 #endif
michael@0 30
michael@0 31 /* Generic CPUID function */
michael@0 32 #if defined(AMD_64)
michael@0 33
michael@0 34 #if defined(__GNUC__)
michael@0 35
michael@0 36 void freebl_cpuid(unsigned long op, unsigned long *eax,
michael@0 37 unsigned long *ebx, unsigned long *ecx,
michael@0 38 unsigned long *edx)
michael@0 39 {
michael@0 40 __asm__("cpuid\n\t"
michael@0 41 : "=a" (*eax),
michael@0 42 "=b" (*ebx),
michael@0 43 "=c" (*ecx),
michael@0 44 "=d" (*edx)
michael@0 45 : "0" (op));
michael@0 46 }
michael@0 47
michael@0 48 #elif defined(_MSC_VER)
michael@0 49
michael@0 50 #include <intrin.h>
michael@0 51
michael@0 52 void freebl_cpuid(unsigned long op, unsigned long *eax,
michael@0 53 unsigned long *ebx, unsigned long *ecx,
michael@0 54 unsigned long *edx)
michael@0 55 {
michael@0 56 int intrinsic_out[4];
michael@0 57
michael@0 58 __cpuid(intrinsic_out, op);
michael@0 59 *eax = intrinsic_out[0];
michael@0 60 *ebx = intrinsic_out[1];
michael@0 61 *ecx = intrinsic_out[2];
michael@0 62 *edx = intrinsic_out[3];
michael@0 63 }
michael@0 64
michael@0 65 #endif
michael@0 66
michael@0 67 #else /* !defined(AMD_64) */
michael@0 68
michael@0 69 /* x86 */
michael@0 70
michael@0 71 #if defined(__GNUC__)
michael@0 72 void freebl_cpuid(unsigned long op, unsigned long *eax,
michael@0 73 unsigned long *ebx, unsigned long *ecx,
michael@0 74 unsigned long *edx)
michael@0 75 {
michael@0 76 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own
michael@0 77 * in this case, so do it by hand. Use edi to store ebx and pass the
michael@0 78 * value returned in ebx from cpuid through edi. */
michael@0 79 __asm__("mov %%ebx,%%edi\n\t"
michael@0 80 "cpuid\n\t"
michael@0 81 "xchgl %%ebx,%%edi\n\t"
michael@0 82 : "=a" (*eax),
michael@0 83 "=D" (*ebx),
michael@0 84 "=c" (*ecx),
michael@0 85 "=d" (*edx)
michael@0 86 : "0" (op));
michael@0 87 }
michael@0 88
michael@0 89 /*
michael@0 90 * try flipping a processor flag to determine CPU type
michael@0 91 */
michael@0 92 static unsigned long changeFlag(unsigned long flag)
michael@0 93 {
michael@0 94 unsigned long changedFlags, originalFlags;
michael@0 95 __asm__("pushfl\n\t" /* get the flags */
michael@0 96 "popl %0\n\t"
michael@0 97 "movl %0,%1\n\t" /* save the original flags */
michael@0 98 "xorl %2,%0\n\t" /* flip the bit */
michael@0 99 "pushl %0\n\t" /* set the flags */
michael@0 100 "popfl\n\t"
michael@0 101 "pushfl\n\t" /* get the flags again (for return) */
michael@0 102 "popl %0\n\t"
michael@0 103 "pushl %1\n\t" /* restore the original flags */
michael@0 104 "popfl\n\t"
michael@0 105 : "=r" (changedFlags),
michael@0 106 "=r" (originalFlags),
michael@0 107 "=r" (flag)
michael@0 108 : "2" (flag));
michael@0 109 return changedFlags ^ originalFlags;
michael@0 110 }
michael@0 111
michael@0 112 #elif defined(_MSC_VER)
michael@0 113
michael@0 114 /*
michael@0 115 * windows versions of the above assembler
michael@0 116 */
michael@0 117 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
michael@0 118 void freebl_cpuid(unsigned long op, unsigned long *Reax,
michael@0 119 unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
michael@0 120 {
michael@0 121 unsigned long Leax, Lebx, Lecx, Ledx;
michael@0 122 __asm {
michael@0 123 pushad
michael@0 124 mov eax,op
michael@0 125 wcpuid
michael@0 126 mov Leax,eax
michael@0 127 mov Lebx,ebx
michael@0 128 mov Lecx,ecx
michael@0 129 mov Ledx,edx
michael@0 130 popad
michael@0 131 }
michael@0 132 *Reax = Leax;
michael@0 133 *Rebx = Lebx;
michael@0 134 *Recx = Lecx;
michael@0 135 *Redx = Ledx;
michael@0 136 }
michael@0 137
michael@0 138 static unsigned long changeFlag(unsigned long flag)
michael@0 139 {
michael@0 140 unsigned long changedFlags, originalFlags;
michael@0 141 __asm {
michael@0 142 push eax
michael@0 143 push ebx
michael@0 144 pushfd /* get the flags */
michael@0 145 pop eax
michael@0 146 push eax /* save the flags on the stack */
michael@0 147 mov originalFlags,eax /* save the original flags */
michael@0 148 mov ebx,flag
michael@0 149 xor eax,ebx /* flip the bit */
michael@0 150 push eax /* set the flags */
michael@0 151 popfd
michael@0 152 pushfd /* get the flags again (for return) */
michael@0 153 pop eax
michael@0 154 popfd /* restore the original flags */
michael@0 155 mov changedFlags,eax
michael@0 156 pop ebx
michael@0 157 pop eax
michael@0 158 }
michael@0 159 return changedFlags ^ originalFlags;
michael@0 160 }
michael@0 161 #endif
michael@0 162
michael@0 163 #endif
michael@0 164
michael@0 165 #if !defined(AMD_64)
michael@0 166 #define AC_FLAG 0x40000
michael@0 167 #define ID_FLAG 0x200000
michael@0 168
michael@0 169 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
michael@0 170 static int is386()
michael@0 171 {
michael@0 172 return changeFlag(AC_FLAG) == 0;
michael@0 173 }
michael@0 174
michael@0 175 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
michael@0 176 static int is486()
michael@0 177 {
michael@0 178 return changeFlag(ID_FLAG) == 0;
michael@0 179 }
michael@0 180 #endif
michael@0 181
michael@0 182
michael@0 183 /*
michael@0 184 * table for Intel Cache.
michael@0 185 * See Intel Application Note AP-485 for more information
michael@0 186 */
michael@0 187
michael@0 188 typedef unsigned char CacheTypeEntry;
michael@0 189
michael@0 190 typedef enum {
michael@0 191 Cache_NONE = 0,
michael@0 192 Cache_UNKNOWN = 1,
michael@0 193 Cache_TLB = 2,
michael@0 194 Cache_TLBi = 3,
michael@0 195 Cache_TLBd = 4,
michael@0 196 Cache_Trace = 5,
michael@0 197 Cache_L1 = 6,
michael@0 198 Cache_L1i = 7,
michael@0 199 Cache_L1d = 8,
michael@0 200 Cache_L2 = 9 ,
michael@0 201 Cache_L2i = 10 ,
michael@0 202 Cache_L2d = 11 ,
michael@0 203 Cache_L3 = 12 ,
michael@0 204 Cache_L3i = 13,
michael@0 205 Cache_L3d = 14
michael@0 206 } CacheType;
michael@0 207
michael@0 208 struct _cache {
michael@0 209 CacheTypeEntry type;
michael@0 210 unsigned char lineSize;
michael@0 211 };
michael@0 212 static const struct _cache CacheMap[256] = {
michael@0 213 /* 00 */ {Cache_NONE, 0 },
michael@0 214 /* 01 */ {Cache_TLBi, 0 },
michael@0 215 /* 02 */ {Cache_TLBi, 0 },
michael@0 216 /* 03 */ {Cache_TLBd, 0 },
michael@0 217 /* 04 */ {Cache_TLBd, },
michael@0 218 /* 05 */ {Cache_UNKNOWN, 0 },
michael@0 219 /* 06 */ {Cache_L1i, 32 },
michael@0 220 /* 07 */ {Cache_UNKNOWN, 0 },
michael@0 221 /* 08 */ {Cache_L1i, 32 },
michael@0 222 /* 09 */ {Cache_UNKNOWN, 0 },
michael@0 223 /* 0a */ {Cache_L1d, 32 },
michael@0 224 /* 0b */ {Cache_UNKNOWN, 0 },
michael@0 225 /* 0c */ {Cache_L1d, 32 },
michael@0 226 /* 0d */ {Cache_UNKNOWN, 0 },
michael@0 227 /* 0e */ {Cache_UNKNOWN, 0 },
michael@0 228 /* 0f */ {Cache_UNKNOWN, 0 },
michael@0 229 /* 10 */ {Cache_UNKNOWN, 0 },
michael@0 230 /* 11 */ {Cache_UNKNOWN, 0 },
michael@0 231 /* 12 */ {Cache_UNKNOWN, 0 },
michael@0 232 /* 13 */ {Cache_UNKNOWN, 0 },
michael@0 233 /* 14 */ {Cache_UNKNOWN, 0 },
michael@0 234 /* 15 */ {Cache_UNKNOWN, 0 },
michael@0 235 /* 16 */ {Cache_UNKNOWN, 0 },
michael@0 236 /* 17 */ {Cache_UNKNOWN, 0 },
michael@0 237 /* 18 */ {Cache_UNKNOWN, 0 },
michael@0 238 /* 19 */ {Cache_UNKNOWN, 0 },
michael@0 239 /* 1a */ {Cache_UNKNOWN, 0 },
michael@0 240 /* 1b */ {Cache_UNKNOWN, 0 },
michael@0 241 /* 1c */ {Cache_UNKNOWN, 0 },
michael@0 242 /* 1d */ {Cache_UNKNOWN, 0 },
michael@0 243 /* 1e */ {Cache_UNKNOWN, 0 },
michael@0 244 /* 1f */ {Cache_UNKNOWN, 0 },
michael@0 245 /* 20 */ {Cache_UNKNOWN, 0 },
michael@0 246 /* 21 */ {Cache_UNKNOWN, 0 },
michael@0 247 /* 22 */ {Cache_L3, 64 },
michael@0 248 /* 23 */ {Cache_L3, 64 },
michael@0 249 /* 24 */ {Cache_UNKNOWN, 0 },
michael@0 250 /* 25 */ {Cache_L3, 64 },
michael@0 251 /* 26 */ {Cache_UNKNOWN, 0 },
michael@0 252 /* 27 */ {Cache_UNKNOWN, 0 },
michael@0 253 /* 28 */ {Cache_UNKNOWN, 0 },
michael@0 254 /* 29 */ {Cache_L3, 64 },
michael@0 255 /* 2a */ {Cache_UNKNOWN, 0 },
michael@0 256 /* 2b */ {Cache_UNKNOWN, 0 },
michael@0 257 /* 2c */ {Cache_L1d, 64 },
michael@0 258 /* 2d */ {Cache_UNKNOWN, 0 },
michael@0 259 /* 2e */ {Cache_UNKNOWN, 0 },
michael@0 260 /* 2f */ {Cache_UNKNOWN, 0 },
michael@0 261 /* 30 */ {Cache_L1i, 64 },
michael@0 262 /* 31 */ {Cache_UNKNOWN, 0 },
michael@0 263 /* 32 */ {Cache_UNKNOWN, 0 },
michael@0 264 /* 33 */ {Cache_UNKNOWN, 0 },
michael@0 265 /* 34 */ {Cache_UNKNOWN, 0 },
michael@0 266 /* 35 */ {Cache_UNKNOWN, 0 },
michael@0 267 /* 36 */ {Cache_UNKNOWN, 0 },
michael@0 268 /* 37 */ {Cache_UNKNOWN, 0 },
michael@0 269 /* 38 */ {Cache_UNKNOWN, 0 },
michael@0 270 /* 39 */ {Cache_L2, 64 },
michael@0 271 /* 3a */ {Cache_UNKNOWN, 0 },
michael@0 272 /* 3b */ {Cache_L2, 64 },
michael@0 273 /* 3c */ {Cache_L2, 64 },
michael@0 274 /* 3d */ {Cache_UNKNOWN, 0 },
michael@0 275 /* 3e */ {Cache_UNKNOWN, 0 },
michael@0 276 /* 3f */ {Cache_UNKNOWN, 0 },
michael@0 277 /* 40 */ {Cache_L2, 0 },
michael@0 278 /* 41 */ {Cache_L2, 32 },
michael@0 279 /* 42 */ {Cache_L2, 32 },
michael@0 280 /* 43 */ {Cache_L2, 32 },
michael@0 281 /* 44 */ {Cache_L2, 32 },
michael@0 282 /* 45 */ {Cache_L2, 32 },
michael@0 283 /* 46 */ {Cache_UNKNOWN, 0 },
michael@0 284 /* 47 */ {Cache_UNKNOWN, 0 },
michael@0 285 /* 48 */ {Cache_UNKNOWN, 0 },
michael@0 286 /* 49 */ {Cache_UNKNOWN, 0 },
michael@0 287 /* 4a */ {Cache_UNKNOWN, 0 },
michael@0 288 /* 4b */ {Cache_UNKNOWN, 0 },
michael@0 289 /* 4c */ {Cache_UNKNOWN, 0 },
michael@0 290 /* 4d */ {Cache_UNKNOWN, 0 },
michael@0 291 /* 4e */ {Cache_UNKNOWN, 0 },
michael@0 292 /* 4f */ {Cache_UNKNOWN, 0 },
michael@0 293 /* 50 */ {Cache_TLBi, 0 },
michael@0 294 /* 51 */ {Cache_TLBi, 0 },
michael@0 295 /* 52 */ {Cache_TLBi, 0 },
michael@0 296 /* 53 */ {Cache_UNKNOWN, 0 },
michael@0 297 /* 54 */ {Cache_UNKNOWN, 0 },
michael@0 298 /* 55 */ {Cache_UNKNOWN, 0 },
michael@0 299 /* 56 */ {Cache_UNKNOWN, 0 },
michael@0 300 /* 57 */ {Cache_UNKNOWN, 0 },
michael@0 301 /* 58 */ {Cache_UNKNOWN, 0 },
michael@0 302 /* 59 */ {Cache_UNKNOWN, 0 },
michael@0 303 /* 5a */ {Cache_UNKNOWN, 0 },
michael@0 304 /* 5b */ {Cache_TLBd, 0 },
michael@0 305 /* 5c */ {Cache_TLBd, 0 },
michael@0 306 /* 5d */ {Cache_TLBd, 0 },
michael@0 307 /* 5e */ {Cache_UNKNOWN, 0 },
michael@0 308 /* 5f */ {Cache_UNKNOWN, 0 },
michael@0 309 /* 60 */ {Cache_UNKNOWN, 0 },
michael@0 310 /* 61 */ {Cache_UNKNOWN, 0 },
michael@0 311 /* 62 */ {Cache_UNKNOWN, 0 },
michael@0 312 /* 63 */ {Cache_UNKNOWN, 0 },
michael@0 313 /* 64 */ {Cache_UNKNOWN, 0 },
michael@0 314 /* 65 */ {Cache_UNKNOWN, 0 },
michael@0 315 /* 66 */ {Cache_L1d, 64 },
michael@0 316 /* 67 */ {Cache_L1d, 64 },
michael@0 317 /* 68 */ {Cache_L1d, 64 },
michael@0 318 /* 69 */ {Cache_UNKNOWN, 0 },
michael@0 319 /* 6a */ {Cache_UNKNOWN, 0 },
michael@0 320 /* 6b */ {Cache_UNKNOWN, 0 },
michael@0 321 /* 6c */ {Cache_UNKNOWN, 0 },
michael@0 322 /* 6d */ {Cache_UNKNOWN, 0 },
michael@0 323 /* 6e */ {Cache_UNKNOWN, 0 },
michael@0 324 /* 6f */ {Cache_UNKNOWN, 0 },
michael@0 325 /* 70 */ {Cache_Trace, 1 },
michael@0 326 /* 71 */ {Cache_Trace, 1 },
michael@0 327 /* 72 */ {Cache_Trace, 1 },
michael@0 328 /* 73 */ {Cache_UNKNOWN, 0 },
michael@0 329 /* 74 */ {Cache_UNKNOWN, 0 },
michael@0 330 /* 75 */ {Cache_UNKNOWN, 0 },
michael@0 331 /* 76 */ {Cache_UNKNOWN, 0 },
michael@0 332 /* 77 */ {Cache_UNKNOWN, 0 },
michael@0 333 /* 78 */ {Cache_UNKNOWN, 0 },
michael@0 334 /* 79 */ {Cache_L2, 64 },
michael@0 335 /* 7a */ {Cache_L2, 64 },
michael@0 336 /* 7b */ {Cache_L2, 64 },
michael@0 337 /* 7c */ {Cache_L2, 64 },
michael@0 338 /* 7d */ {Cache_UNKNOWN, 0 },
michael@0 339 /* 7e */ {Cache_UNKNOWN, 0 },
michael@0 340 /* 7f */ {Cache_UNKNOWN, 0 },
michael@0 341 /* 80 */ {Cache_UNKNOWN, 0 },
michael@0 342 /* 81 */ {Cache_UNKNOWN, 0 },
michael@0 343 /* 82 */ {Cache_L2, 32 },
michael@0 344 /* 83 */ {Cache_L2, 32 },
michael@0 345 /* 84 */ {Cache_L2, 32 },
michael@0 346 /* 85 */ {Cache_L2, 32 },
michael@0 347 /* 86 */ {Cache_L2, 64 },
michael@0 348 /* 87 */ {Cache_L2, 64 },
michael@0 349 /* 88 */ {Cache_UNKNOWN, 0 },
michael@0 350 /* 89 */ {Cache_UNKNOWN, 0 },
michael@0 351 /* 8a */ {Cache_UNKNOWN, 0 },
michael@0 352 /* 8b */ {Cache_UNKNOWN, 0 },
michael@0 353 /* 8c */ {Cache_UNKNOWN, 0 },
michael@0 354 /* 8d */ {Cache_UNKNOWN, 0 },
michael@0 355 /* 8e */ {Cache_UNKNOWN, 0 },
michael@0 356 /* 8f */ {Cache_UNKNOWN, 0 },
michael@0 357 /* 90 */ {Cache_UNKNOWN, 0 },
michael@0 358 /* 91 */ {Cache_UNKNOWN, 0 },
michael@0 359 /* 92 */ {Cache_UNKNOWN, 0 },
michael@0 360 /* 93 */ {Cache_UNKNOWN, 0 },
michael@0 361 /* 94 */ {Cache_UNKNOWN, 0 },
michael@0 362 /* 95 */ {Cache_UNKNOWN, 0 },
michael@0 363 /* 96 */ {Cache_UNKNOWN, 0 },
michael@0 364 /* 97 */ {Cache_UNKNOWN, 0 },
michael@0 365 /* 98 */ {Cache_UNKNOWN, 0 },
michael@0 366 /* 99 */ {Cache_UNKNOWN, 0 },
michael@0 367 /* 9a */ {Cache_UNKNOWN, 0 },
michael@0 368 /* 9b */ {Cache_UNKNOWN, 0 },
michael@0 369 /* 9c */ {Cache_UNKNOWN, 0 },
michael@0 370 /* 9d */ {Cache_UNKNOWN, 0 },
michael@0 371 /* 9e */ {Cache_UNKNOWN, 0 },
michael@0 372 /* 9f */ {Cache_UNKNOWN, 0 },
michael@0 373 /* a0 */ {Cache_UNKNOWN, 0 },
michael@0 374 /* a1 */ {Cache_UNKNOWN, 0 },
michael@0 375 /* a2 */ {Cache_UNKNOWN, 0 },
michael@0 376 /* a3 */ {Cache_UNKNOWN, 0 },
michael@0 377 /* a4 */ {Cache_UNKNOWN, 0 },
michael@0 378 /* a5 */ {Cache_UNKNOWN, 0 },
michael@0 379 /* a6 */ {Cache_UNKNOWN, 0 },
michael@0 380 /* a7 */ {Cache_UNKNOWN, 0 },
michael@0 381 /* a8 */ {Cache_UNKNOWN, 0 },
michael@0 382 /* a9 */ {Cache_UNKNOWN, 0 },
michael@0 383 /* aa */ {Cache_UNKNOWN, 0 },
michael@0 384 /* ab */ {Cache_UNKNOWN, 0 },
michael@0 385 /* ac */ {Cache_UNKNOWN, 0 },
michael@0 386 /* ad */ {Cache_UNKNOWN, 0 },
michael@0 387 /* ae */ {Cache_UNKNOWN, 0 },
michael@0 388 /* af */ {Cache_UNKNOWN, 0 },
michael@0 389 /* b0 */ {Cache_TLBi, 0 },
michael@0 390 /* b1 */ {Cache_UNKNOWN, 0 },
michael@0 391 /* b2 */ {Cache_UNKNOWN, 0 },
michael@0 392 /* b3 */ {Cache_TLBd, 0 },
michael@0 393 /* b4 */ {Cache_UNKNOWN, 0 },
michael@0 394 /* b5 */ {Cache_UNKNOWN, 0 },
michael@0 395 /* b6 */ {Cache_UNKNOWN, 0 },
michael@0 396 /* b7 */ {Cache_UNKNOWN, 0 },
michael@0 397 /* b8 */ {Cache_UNKNOWN, 0 },
michael@0 398 /* b9 */ {Cache_UNKNOWN, 0 },
michael@0 399 /* ba */ {Cache_UNKNOWN, 0 },
michael@0 400 /* bb */ {Cache_UNKNOWN, 0 },
michael@0 401 /* bc */ {Cache_UNKNOWN, 0 },
michael@0 402 /* bd */ {Cache_UNKNOWN, 0 },
michael@0 403 /* be */ {Cache_UNKNOWN, 0 },
michael@0 404 /* bf */ {Cache_UNKNOWN, 0 },
michael@0 405 /* c0 */ {Cache_UNKNOWN, 0 },
michael@0 406 /* c1 */ {Cache_UNKNOWN, 0 },
michael@0 407 /* c2 */ {Cache_UNKNOWN, 0 },
michael@0 408 /* c3 */ {Cache_UNKNOWN, 0 },
michael@0 409 /* c4 */ {Cache_UNKNOWN, 0 },
michael@0 410 /* c5 */ {Cache_UNKNOWN, 0 },
michael@0 411 /* c6 */ {Cache_UNKNOWN, 0 },
michael@0 412 /* c7 */ {Cache_UNKNOWN, 0 },
michael@0 413 /* c8 */ {Cache_UNKNOWN, 0 },
michael@0 414 /* c9 */ {Cache_UNKNOWN, 0 },
michael@0 415 /* ca */ {Cache_UNKNOWN, 0 },
michael@0 416 /* cb */ {Cache_UNKNOWN, 0 },
michael@0 417 /* cc */ {Cache_UNKNOWN, 0 },
michael@0 418 /* cd */ {Cache_UNKNOWN, 0 },
michael@0 419 /* ce */ {Cache_UNKNOWN, 0 },
michael@0 420 /* cf */ {Cache_UNKNOWN, 0 },
michael@0 421 /* d0 */ {Cache_UNKNOWN, 0 },
michael@0 422 /* d1 */ {Cache_UNKNOWN, 0 },
michael@0 423 /* d2 */ {Cache_UNKNOWN, 0 },
michael@0 424 /* d3 */ {Cache_UNKNOWN, 0 },
michael@0 425 /* d4 */ {Cache_UNKNOWN, 0 },
michael@0 426 /* d5 */ {Cache_UNKNOWN, 0 },
michael@0 427 /* d6 */ {Cache_UNKNOWN, 0 },
michael@0 428 /* d7 */ {Cache_UNKNOWN, 0 },
michael@0 429 /* d8 */ {Cache_UNKNOWN, 0 },
michael@0 430 /* d9 */ {Cache_UNKNOWN, 0 },
michael@0 431 /* da */ {Cache_UNKNOWN, 0 },
michael@0 432 /* db */ {Cache_UNKNOWN, 0 },
michael@0 433 /* dc */ {Cache_UNKNOWN, 0 },
michael@0 434 /* dd */ {Cache_UNKNOWN, 0 },
michael@0 435 /* de */ {Cache_UNKNOWN, 0 },
michael@0 436 /* df */ {Cache_UNKNOWN, 0 },
michael@0 437 /* e0 */ {Cache_UNKNOWN, 0 },
michael@0 438 /* e1 */ {Cache_UNKNOWN, 0 },
michael@0 439 /* e2 */ {Cache_UNKNOWN, 0 },
michael@0 440 /* e3 */ {Cache_UNKNOWN, 0 },
michael@0 441 /* e4 */ {Cache_UNKNOWN, 0 },
michael@0 442 /* e5 */ {Cache_UNKNOWN, 0 },
michael@0 443 /* e6 */ {Cache_UNKNOWN, 0 },
michael@0 444 /* e7 */ {Cache_UNKNOWN, 0 },
michael@0 445 /* e8 */ {Cache_UNKNOWN, 0 },
michael@0 446 /* e9 */ {Cache_UNKNOWN, 0 },
michael@0 447 /* ea */ {Cache_UNKNOWN, 0 },
michael@0 448 /* eb */ {Cache_UNKNOWN, 0 },
michael@0 449 /* ec */ {Cache_UNKNOWN, 0 },
michael@0 450 /* ed */ {Cache_UNKNOWN, 0 },
michael@0 451 /* ee */ {Cache_UNKNOWN, 0 },
michael@0 452 /* ef */ {Cache_UNKNOWN, 0 },
michael@0 453 /* f0 */ {Cache_UNKNOWN, 0 },
michael@0 454 /* f1 */ {Cache_UNKNOWN, 0 },
michael@0 455 /* f2 */ {Cache_UNKNOWN, 0 },
michael@0 456 /* f3 */ {Cache_UNKNOWN, 0 },
michael@0 457 /* f4 */ {Cache_UNKNOWN, 0 },
michael@0 458 /* f5 */ {Cache_UNKNOWN, 0 },
michael@0 459 /* f6 */ {Cache_UNKNOWN, 0 },
michael@0 460 /* f7 */ {Cache_UNKNOWN, 0 },
michael@0 461 /* f8 */ {Cache_UNKNOWN, 0 },
michael@0 462 /* f9 */ {Cache_UNKNOWN, 0 },
michael@0 463 /* fa */ {Cache_UNKNOWN, 0 },
michael@0 464 /* fb */ {Cache_UNKNOWN, 0 },
michael@0 465 /* fc */ {Cache_UNKNOWN, 0 },
michael@0 466 /* fd */ {Cache_UNKNOWN, 0 },
michael@0 467 /* fe */ {Cache_UNKNOWN, 0 },
michael@0 468 /* ff */ {Cache_UNKNOWN, 0 }
michael@0 469 };
michael@0 470
michael@0 471
michael@0 472 /*
michael@0 473 * use the above table to determine the CacheEntryLineSize.
michael@0 474 */
michael@0 475 static void
michael@0 476 getIntelCacheEntryLineSize(unsigned long val, int *level,
michael@0 477 unsigned long *lineSize)
michael@0 478 {
michael@0 479 CacheType type;
michael@0 480
michael@0 481 type = CacheMap[val].type;
michael@0 482 /* only interested in data caches */
michael@0 483 /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
michael@0 484 * this data check has the side effect of rejecting that entry. If
michael@0 485 * that wasn't the case, we could have to reject it explicitly */
michael@0 486 if (CacheMap[val].lineSize == 0) {
michael@0 487 return;
michael@0 488 }
michael@0 489 /* look at the caches, skip types we aren't interested in.
michael@0 490 * if we already have a value for a lower level cache, skip the
michael@0 491 * current entry */
michael@0 492 if ((type == Cache_L1)|| (type == Cache_L1d)) {
michael@0 493 *level = 1;
michael@0 494 *lineSize = CacheMap[val].lineSize;
michael@0 495 } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
michael@0 496 *level = 2;
michael@0 497 *lineSize = CacheMap[val].lineSize;
michael@0 498 } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
michael@0 499 *level = 3;
michael@0 500 *lineSize = CacheMap[val].lineSize;
michael@0 501 }
michael@0 502 return;
michael@0 503 }
michael@0 504
michael@0 505
michael@0 506 static void
michael@0 507 getIntelRegisterCacheLineSize(unsigned long val,
michael@0 508 int *level, unsigned long *lineSize)
michael@0 509 {
michael@0 510 getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
michael@0 511 getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
michael@0 512 getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
michael@0 513 getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
michael@0 514 }
michael@0 515
michael@0 516 /*
michael@0 517 * returns '0' if no recognized cache is found, or if the cache
michael@0 518 * information is supported by this processor
michael@0 519 */
michael@0 520 static unsigned long
michael@0 521 getIntelCacheLineSize(int cpuidLevel)
michael@0 522 {
michael@0 523 int level = 4;
michael@0 524 unsigned long lineSize = 0;
michael@0 525 unsigned long eax, ebx, ecx, edx;
michael@0 526 int repeat, count;
michael@0 527
michael@0 528 if (cpuidLevel < 2) {
michael@0 529 return 0;
michael@0 530 }
michael@0 531
michael@0 532 /* command '2' of the cpuid is intel's cache info call. Each byte of the
michael@0 533 * 4 registers contain a potential descriptor for the cache. The CacheMap
michael@0 534 * table maps the cache entry with the processor cache. Register 'al'
michael@0 535 * contains a count value that cpuid '2' needs to be called in order to
michael@0 536 * find all the cache descriptors. Only registers with the high bit set
michael@0 537 * to 'zero' have valid descriptors. This code loops through all the
michael@0 538 * required calls to cpuid '2' and passes any valid descriptors it finds
michael@0 539 * to the getIntelRegisterCacheLineSize code, which breaks the registers
michael@0 540 * down into their component descriptors. In the end the lineSize of the
michael@0 541 * lowest level cache data cache is returned. */
michael@0 542 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
michael@0 543 repeat = eax & 0xf;
michael@0 544 for (count = 0; count < repeat; count++) {
michael@0 545 if ((eax & 0x80000000) == 0) {
michael@0 546 getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
michael@0 547 }
michael@0 548 if ((ebx & 0x80000000) == 0) {
michael@0 549 getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
michael@0 550 }
michael@0 551 if ((ecx & 0x80000000) == 0) {
michael@0 552 getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
michael@0 553 }
michael@0 554 if ((edx & 0x80000000) == 0) {
michael@0 555 getIntelRegisterCacheLineSize(edx, &level, &lineSize);
michael@0 556 }
michael@0 557 if (count+1 != repeat) {
michael@0 558 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
michael@0 559 }
michael@0 560 }
michael@0 561 return lineSize;
michael@0 562 }
michael@0 563
michael@0 564 /*
michael@0 565 * returns '0' if the cache info is not supported by this processor.
michael@0 566 * This is based on the AMD extended cache commands for cpuid.
michael@0 567 * (see "AMD Processor Recognition Application Note" Publication 20734).
michael@0 568 * Some other processors use the identical scheme.
michael@0 569 * (see "Processor Recognition, Transmeta Corporation").
michael@0 570 */
michael@0 571 static unsigned long
michael@0 572 getOtherCacheLineSize(unsigned long cpuidLevel)
michael@0 573 {
michael@0 574 unsigned long lineSize = 0;
michael@0 575 unsigned long eax, ebx, ecx, edx;
michael@0 576
michael@0 577 /* get the Extended CPUID level */
michael@0 578 freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
michael@0 579 cpuidLevel = eax;
michael@0 580
michael@0 581 if (cpuidLevel >= 0x80000005) {
michael@0 582 freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
michael@0 583 lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
michael@0 584 }
michael@0 585 return lineSize;
michael@0 586 }
michael@0 587
michael@0 588 static const char * const manMap[] = {
michael@0 589 #define INTEL 0
michael@0 590 "GenuineIntel",
michael@0 591 #define AMD 1
michael@0 592 "AuthenticAMD",
michael@0 593 #define CYRIX 2
michael@0 594 "CyrixInstead",
michael@0 595 #define CENTAUR 2
michael@0 596 "CentaurHauls",
michael@0 597 #define NEXGEN 3
michael@0 598 "NexGenDriven",
michael@0 599 #define TRANSMETA 4
michael@0 600 "GenuineTMx86",
michael@0 601 #define RISE 5
michael@0 602 "RiseRiseRise",
michael@0 603 #define UMC 6
michael@0 604 "UMC UMC UMC ",
michael@0 605 #define SIS 7
michael@0 606 "Sis Sis Sis ",
michael@0 607 #define NATIONAL 8
michael@0 608 "Geode by NSC",
michael@0 609 };
michael@0 610
michael@0 611 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
michael@0 612
michael@0 613
michael@0 614 #define MAN_UNKNOWN 9
michael@0 615
michael@0 616 #if !defined(AMD_64)
michael@0 617 #define SSE2_FLAG (1<<26)
michael@0 618 unsigned long
michael@0 619 s_mpi_is_sse2()
michael@0 620 {
michael@0 621 unsigned long eax, ebx, ecx, edx;
michael@0 622 int manufacturer = MAN_UNKNOWN;
michael@0 623 int i;
michael@0 624 char string[13];
michael@0 625
michael@0 626 if (is386() || is486()) {
michael@0 627 return 0;
michael@0 628 }
michael@0 629 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
michael@0 630 /* string holds the CPU's manufacturer ID string - a twelve
michael@0 631 * character ASCII string stored in ebx, edx, ecx, and
michael@0 632 * the 32-bit extended feature flags are in edx, ecx.
michael@0 633 */
michael@0 634 *(int *)string = ebx;
michael@0 635 *(int *)&string[4] = (int)edx;
michael@0 636 *(int *)&string[8] = (int)ecx;
michael@0 637 string[12] = 0;
michael@0 638
michael@0 639 /* has no SSE2 extensions */
michael@0 640 if (eax == 0) {
michael@0 641 return 0;
michael@0 642 }
michael@0 643
michael@0 644 for (i=0; i < n_manufacturers; i++) {
michael@0 645 if ( strcmp(manMap[i],string) == 0) {
michael@0 646 manufacturer = i;
michael@0 647 break;
michael@0 648 }
michael@0 649 }
michael@0 650
michael@0 651 freebl_cpuid(1,&eax,&ebx,&ecx,&edx);
michael@0 652 return (edx & SSE2_FLAG) == SSE2_FLAG;
michael@0 653 }
michael@0 654 #endif
michael@0 655
michael@0 656 unsigned long
michael@0 657 s_mpi_getProcessorLineSize()
michael@0 658 {
michael@0 659 unsigned long eax, ebx, ecx, edx;
michael@0 660 unsigned long cpuidLevel;
michael@0 661 unsigned long cacheLineSize = 0;
michael@0 662 int manufacturer = MAN_UNKNOWN;
michael@0 663 int i;
michael@0 664 char string[65];
michael@0 665
michael@0 666 #if !defined(AMD_64)
michael@0 667 if (is386()) {
michael@0 668 return 0; /* 386 had no cache */
michael@0 669 } if (is486()) {
michael@0 670 return 32; /* really? need more info */
michael@0 671 }
michael@0 672 #endif
michael@0 673
michael@0 674 /* Pentium, cpuid command is available */
michael@0 675 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
michael@0 676 cpuidLevel = eax;
michael@0 677 /* string holds the CPU's manufacturer ID string - a twelve
michael@0 678 * character ASCII string stored in ebx, edx, ecx, and
michael@0 679 * the 32-bit extended feature flags are in edx, ecx.
michael@0 680 */
michael@0 681 *(int *)string = ebx;
michael@0 682 *(int *)&string[4] = (int)edx;
michael@0 683 *(int *)&string[8] = (int)ecx;
michael@0 684 string[12] = 0;
michael@0 685
michael@0 686 manufacturer = MAN_UNKNOWN;
michael@0 687 for (i=0; i < n_manufacturers; i++) {
michael@0 688 if ( strcmp(manMap[i],string) == 0) {
michael@0 689 manufacturer = i;
michael@0 690 }
michael@0 691 }
michael@0 692
michael@0 693 if (manufacturer == INTEL) {
michael@0 694 cacheLineSize = getIntelCacheLineSize(cpuidLevel);
michael@0 695 } else {
michael@0 696 cacheLineSize = getOtherCacheLineSize(cpuidLevel);
michael@0 697 }
michael@0 698 /* doesn't support cache info based on cpuid. This means
michael@0 699 * an old pentium class processor, which have cache lines of
michael@0 700 * 32. If we learn differently, we can use a switch based on
michael@0 701 * the Manufacturer id */
michael@0 702 if (cacheLineSize == 0) {
michael@0 703 cacheLineSize = 32;
michael@0 704 }
michael@0 705 return cacheLineSize;
michael@0 706 }
michael@0 707 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
michael@0 708 #endif
michael@0 709
michael@0 710 #if defined(__ppc64__)
michael@0 711 /*
michael@0 712 * Sigh, The PPC has some really nice features to help us determine cache
michael@0 713 * size, since it had lots of direct control functions to do so. The POWER
michael@0 714 * processor even has an instruction to do this, but it was dropped in
michael@0 715 * PowerPC. Unfortunately most of them are not available in user mode.
michael@0 716 *
michael@0 717 * The dcbz function would be a great way to determine cache line size except
michael@0 718 * 1) it only works on write-back memory (it throws an exception otherwise),
michael@0 719 * and 2) because so many mac programs 'knew' the processor cache size was
michael@0 720 * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
michael@0 721 * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
michael@0 722 * these programs happy. dcbzl work if 64 bit instructions are supported.
michael@0 723 * If you know 64 bit instructions are supported, and that stack is
michael@0 724 * write-back, you can use this code.
michael@0 725 */
michael@0 726 #include "memory.h"
michael@0 727
michael@0 728 /* clear the cache line that contains 'array' */
michael@0 729 static inline void dcbzl(char *array)
michael@0 730 {
michael@0 731 register char *a asm("r2") = array;
michael@0 732 __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
michael@0 733 }
michael@0 734
michael@0 735
michael@0 736 #define PPC_DO_ALIGN(x,y) ((char *)\
michael@0 737 ((((long long) (x))+((y)-1))&~((y)-1)))
michael@0 738
michael@0 739 #define PPC_MAX_LINE_SIZE 256
michael@0 740 unsigned long
michael@0 741 s_mpi_getProcessorLineSize()
michael@0 742 {
michael@0 743 char testArray[2*PPC_MAX_LINE_SIZE+1];
michael@0 744 char *test;
michael@0 745 int i;
michael@0 746
michael@0 747 /* align the array on a maximum line size boundary, so we
michael@0 748 * know we are starting to clear from the first address */
michael@0 749 test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
michael@0 750 /* set all the values to 1's */
michael@0 751 memset(test, 0xff, PPC_MAX_LINE_SIZE);
michael@0 752 /* clear one cache block starting at 'test' */
michael@0 753 dcbzl(test);
michael@0 754
michael@0 755 /* find the size of the cleared area, that's our block size */
michael@0 756 for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
michael@0 757 if (test[i-1] == 0) {
michael@0 758 return i;
michael@0 759 }
michael@0 760 }
michael@0 761 return 0;
michael@0 762 }
michael@0 763
michael@0 764 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
michael@0 765 #endif
michael@0 766
michael@0 767
michael@0 768 /*
michael@0 769 * put other processor and platform specific cache code here
michael@0 770 * return the smallest cache line size in bytes on the processor
michael@0 771 * (usually the L1 cache). If the OS has a call, this would be
michael@0 772 * a greate place to put it.
michael@0 773 *
michael@0 774 * If there is no cache, return 0;
michael@0 775 *
michael@0 776 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
michael@0 777 * below aren't compiled.
michael@0 778 *
michael@0 779 */
michael@0 780
michael@0 781
michael@0 782 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
michael@0 783 * OS */
michael@0 784 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
michael@0 785
michael@0 786 unsigned long
michael@0 787 s_mpi_getProcessorLineSize()
michael@0 788 {
michael@0 789 return MPI_CACHE_LINE_SIZE;
michael@0 790 }
michael@0 791 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
michael@0 792 #endif
michael@0 793
michael@0 794
michael@0 795 /* If no way to get the processor cache line size has been defined, assume
michael@0 796 * it's 32 bytes (most common value, does not significantly impact performance)
michael@0 797 */
michael@0 798 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
michael@0 799 unsigned long
michael@0 800 s_mpi_getProcessorLineSize()
michael@0 801 {
michael@0 802 return 32;
michael@0 803 }
michael@0 804 #endif
michael@0 805
michael@0 806 #ifdef TEST_IT
michael@0 807 #include <stdio.h>
michael@0 808
michael@0 809 main()
michael@0 810 {
michael@0 811 printf("line size = %d\n", s_mpi_getProcessorLineSize());
michael@0 812 }
michael@0 813 #endif

mercurial