security/nss/lib/freebl/mpi/mpcpucache.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this
     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "mpi.h"
     7 /*
     8  * This file implements a single function: s_mpi_getProcessorLineSize();
     9  * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
    10  * if a cache exists, or zero if there is no cache. If more than one
    11  * cache line exists, it should return the smallest line size (which is 
    12  * usually the L1 cache).
    13  *
    14  * mp_modexp uses this information to make sure that private key information
    15  * isn't being leaked through the cache.
    16  *
    17  * Currently the file returns good data for most modern x86 processors, and
    18  * reasonable data on 64-bit ppc processors. All other processors are assumed
    19  * to have a cache line size of 32 bytes unless modified by target.mk.
    20  * 
    21  */
    23 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
    24 /* X86 processors have special instructions that tell us about the cache */
    25 #include "string.h"
    27 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
    28 #define AMD_64 1
    29 #endif
    31 /* Generic CPUID function */
    32 #if defined(AMD_64)
    34 #if defined(__GNUC__)
    36 void freebl_cpuid(unsigned long op, unsigned long *eax, 
    37 	                 unsigned long *ebx, unsigned long *ecx, 
    38                          unsigned long *edx)
    39 {
    40 	__asm__("cpuid\n\t"
    41 		: "=a" (*eax),
    42 		  "=b" (*ebx),
    43 		  "=c" (*ecx),
    44 		  "=d" (*edx)
    45 		: "0" (op));
    46 }
    48 #elif defined(_MSC_VER)
    50 #include <intrin.h>
    52 void freebl_cpuid(unsigned long op, unsigned long *eax, 
    53            unsigned long *ebx, unsigned long *ecx, 
    54            unsigned long *edx)
    55 {
    56     int intrinsic_out[4];
    58     __cpuid(intrinsic_out, op);
    59     *eax = intrinsic_out[0];
    60     *ebx = intrinsic_out[1];
    61     *ecx = intrinsic_out[2];
    62     *edx = intrinsic_out[3];
    63 }
    65 #endif
    67 #else /* !defined(AMD_64) */
    69 /* x86 */
    71 #if defined(__GNUC__)
    72 void freebl_cpuid(unsigned long op, unsigned long *eax, 
    73 	                 unsigned long *ebx, unsigned long *ecx, 
    74                          unsigned long *edx)
    75 {
    76 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own
    77  * in this case, so do it by hand. Use edi to store ebx and pass the
    78  * value returned in ebx from cpuid through edi. */
    79 	__asm__("mov %%ebx,%%edi\n\t"
    80 		  "cpuid\n\t"
    81 		  "xchgl %%ebx,%%edi\n\t"
    82 		: "=a" (*eax),
    83 		  "=D" (*ebx),
    84 		  "=c" (*ecx),
    85 		  "=d" (*edx)
    86 		: "0" (op));
    87 }
    89 /*
    90  * try flipping a processor flag to determine CPU type
    91  */
    92 static unsigned long changeFlag(unsigned long flag)
    93 {
    94 	unsigned long changedFlags, originalFlags;
    95 	__asm__("pushfl\n\t"            /* get the flags */
    96 	        "popl %0\n\t"
    97 	        "movl %0,%1\n\t"	/* save the original flags */
    98 	        "xorl %2,%0\n\t" 	/* flip the bit */
    99 		"pushl %0\n\t"  	/* set the flags */
   100 	        "popfl\n\t"
   101 		"pushfl\n\t"		/* get the flags again (for return) */
   102 		"popl %0\n\t"
   103 		"pushl %1\n\t"		/* restore the original flags */
   104 		 "popfl\n\t"
   105 		: "=r" (changedFlags),
   106 		  "=r" (originalFlags),
   107 		  "=r" (flag)
   108 		: "2" (flag));
   109 	return changedFlags ^ originalFlags;
   110 }
   112 #elif defined(_MSC_VER)
   114 /*
   115  * windows versions of the above assembler
   116  */
   117 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
   118 void freebl_cpuid(unsigned long op,    unsigned long *Reax, 
   119     unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
   120 {
   121         unsigned long  Leax, Lebx, Lecx, Ledx;
   122         __asm {
   123         pushad
   124         mov     eax,op
   125         wcpuid
   126         mov     Leax,eax
   127         mov     Lebx,ebx
   128         mov     Lecx,ecx
   129         mov     Ledx,edx
   130         popad
   131         }
   132         *Reax = Leax;
   133         *Rebx = Lebx;
   134         *Recx = Lecx;
   135         *Redx = Ledx;
   136 }
   138 static unsigned long changeFlag(unsigned long flag)
   139 {
   140 	unsigned long changedFlags, originalFlags;
   141 	__asm {
   142 		push eax
   143 		push ebx
   144 		pushfd 	                /* get the flags */
   145 	        pop  eax
   146 		push eax		/* save the flags on the stack */
   147 	        mov  originalFlags,eax  /* save the original flags */
   148 		mov  ebx,flag
   149 	        xor  eax,ebx            /* flip the bit */
   150 		push eax                /* set the flags */
   151 	        popfd
   152 		pushfd                  /* get the flags again (for return) */
   153 		pop  eax	
   154 		popfd                   /* restore the original flags */
   155 		mov changedFlags,eax
   156 		pop ebx
   157 		pop eax
   158 	}
   159 	return changedFlags ^ originalFlags;
   160 }
   161 #endif
   163 #endif
   165 #if !defined(AMD_64)
   166 #define AC_FLAG 0x40000
   167 #define ID_FLAG 0x200000
   169 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
   170 static int is386()
   171 {
   172     return changeFlag(AC_FLAG) == 0;
   173 }
   175 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
   176 static int is486()
   177 {
   178     return changeFlag(ID_FLAG) == 0;
   179 }
   180 #endif
   183 /*
   184  * table for Intel Cache.
   185  * See Intel Application Note AP-485 for more information 
   186  */
   188 typedef unsigned char CacheTypeEntry;
   190 typedef enum {
   191     Cache_NONE    = 0,
   192     Cache_UNKNOWN = 1,
   193     Cache_TLB     = 2,
   194     Cache_TLBi    = 3,
   195     Cache_TLBd    = 4,
   196     Cache_Trace   = 5,
   197     Cache_L1      = 6,
   198     Cache_L1i     = 7,
   199     Cache_L1d     = 8,
   200     Cache_L2      = 9 ,
   201     Cache_L2i     = 10 ,
   202     Cache_L2d     = 11 ,
   203     Cache_L3      = 12 ,
   204     Cache_L3i     = 13,
   205     Cache_L3d     = 14
   206 } CacheType;
   208 struct _cache {
   209     CacheTypeEntry type;
   210     unsigned char lineSize;
   211 };
   212 static const struct _cache CacheMap[256] = {
   213 /* 00 */ {Cache_NONE,    0   },
   214 /* 01 */ {Cache_TLBi,    0   },
   215 /* 02 */ {Cache_TLBi,    0   },
   216 /* 03 */ {Cache_TLBd,    0   },
   217 /* 04 */ {Cache_TLBd,        },
   218 /* 05 */ {Cache_UNKNOWN, 0   },
   219 /* 06 */ {Cache_L1i,     32  },
   220 /* 07 */ {Cache_UNKNOWN, 0   },
   221 /* 08 */ {Cache_L1i,     32  },
   222 /* 09 */ {Cache_UNKNOWN, 0   },
   223 /* 0a */ {Cache_L1d,     32  },
   224 /* 0b */ {Cache_UNKNOWN, 0   },
   225 /* 0c */ {Cache_L1d,     32  },
   226 /* 0d */ {Cache_UNKNOWN, 0   },
   227 /* 0e */ {Cache_UNKNOWN, 0   },
   228 /* 0f */ {Cache_UNKNOWN, 0   },
   229 /* 10 */ {Cache_UNKNOWN, 0   },
   230 /* 11 */ {Cache_UNKNOWN, 0   },
   231 /* 12 */ {Cache_UNKNOWN, 0   },
   232 /* 13 */ {Cache_UNKNOWN, 0   },
   233 /* 14 */ {Cache_UNKNOWN, 0   },
   234 /* 15 */ {Cache_UNKNOWN, 0   },
   235 /* 16 */ {Cache_UNKNOWN, 0   },
   236 /* 17 */ {Cache_UNKNOWN, 0   },
   237 /* 18 */ {Cache_UNKNOWN, 0   },
   238 /* 19 */ {Cache_UNKNOWN, 0   },
   239 /* 1a */ {Cache_UNKNOWN, 0   },
   240 /* 1b */ {Cache_UNKNOWN, 0   },
   241 /* 1c */ {Cache_UNKNOWN, 0   },
   242 /* 1d */ {Cache_UNKNOWN, 0   },
   243 /* 1e */ {Cache_UNKNOWN, 0   },
   244 /* 1f */ {Cache_UNKNOWN, 0   },
   245 /* 20 */ {Cache_UNKNOWN, 0   },
   246 /* 21 */ {Cache_UNKNOWN, 0   },
   247 /* 22 */ {Cache_L3,      64  },
   248 /* 23 */ {Cache_L3,      64  },
   249 /* 24 */ {Cache_UNKNOWN, 0   },
   250 /* 25 */ {Cache_L3,      64  },
   251 /* 26 */ {Cache_UNKNOWN, 0   },
   252 /* 27 */ {Cache_UNKNOWN, 0   },
   253 /* 28 */ {Cache_UNKNOWN, 0   },
   254 /* 29 */ {Cache_L3,      64  },
   255 /* 2a */ {Cache_UNKNOWN, 0   },
   256 /* 2b */ {Cache_UNKNOWN, 0   },
   257 /* 2c */ {Cache_L1d,     64  },
   258 /* 2d */ {Cache_UNKNOWN, 0   },
   259 /* 2e */ {Cache_UNKNOWN, 0   },
   260 /* 2f */ {Cache_UNKNOWN, 0   },
   261 /* 30 */ {Cache_L1i,     64  },
   262 /* 31 */ {Cache_UNKNOWN, 0   },
   263 /* 32 */ {Cache_UNKNOWN, 0   },
   264 /* 33 */ {Cache_UNKNOWN, 0   },
   265 /* 34 */ {Cache_UNKNOWN, 0   },
   266 /* 35 */ {Cache_UNKNOWN, 0   },
   267 /* 36 */ {Cache_UNKNOWN, 0   },
   268 /* 37 */ {Cache_UNKNOWN, 0   },
   269 /* 38 */ {Cache_UNKNOWN, 0   },
   270 /* 39 */ {Cache_L2,      64  },
   271 /* 3a */ {Cache_UNKNOWN, 0   },
   272 /* 3b */ {Cache_L2,      64  },
   273 /* 3c */ {Cache_L2,      64  },
   274 /* 3d */ {Cache_UNKNOWN, 0   },
   275 /* 3e */ {Cache_UNKNOWN, 0   },
   276 /* 3f */ {Cache_UNKNOWN, 0   },
   277 /* 40 */ {Cache_L2,      0   },
   278 /* 41 */ {Cache_L2,      32  },
   279 /* 42 */ {Cache_L2,      32  },
   280 /* 43 */ {Cache_L2,      32  },
   281 /* 44 */ {Cache_L2,      32  },
   282 /* 45 */ {Cache_L2,      32  },
   283 /* 46 */ {Cache_UNKNOWN, 0   },
   284 /* 47 */ {Cache_UNKNOWN, 0   },
   285 /* 48 */ {Cache_UNKNOWN, 0   },
   286 /* 49 */ {Cache_UNKNOWN, 0   },
   287 /* 4a */ {Cache_UNKNOWN, 0   },
   288 /* 4b */ {Cache_UNKNOWN, 0   },
   289 /* 4c */ {Cache_UNKNOWN, 0   },
   290 /* 4d */ {Cache_UNKNOWN, 0   },
   291 /* 4e */ {Cache_UNKNOWN, 0   },
   292 /* 4f */ {Cache_UNKNOWN, 0   },
   293 /* 50 */ {Cache_TLBi,    0   },
   294 /* 51 */ {Cache_TLBi,    0   },
   295 /* 52 */ {Cache_TLBi,    0   },
   296 /* 53 */ {Cache_UNKNOWN, 0   },
   297 /* 54 */ {Cache_UNKNOWN, 0   },
   298 /* 55 */ {Cache_UNKNOWN, 0   },
   299 /* 56 */ {Cache_UNKNOWN, 0   },
   300 /* 57 */ {Cache_UNKNOWN, 0   },
   301 /* 58 */ {Cache_UNKNOWN, 0   },
   302 /* 59 */ {Cache_UNKNOWN, 0   },
   303 /* 5a */ {Cache_UNKNOWN, 0   },
   304 /* 5b */ {Cache_TLBd,    0   },
   305 /* 5c */ {Cache_TLBd,    0   },
   306 /* 5d */ {Cache_TLBd,    0   },
   307 /* 5e */ {Cache_UNKNOWN, 0   },
   308 /* 5f */ {Cache_UNKNOWN, 0   },
   309 /* 60 */ {Cache_UNKNOWN, 0   },
   310 /* 61 */ {Cache_UNKNOWN, 0   },
   311 /* 62 */ {Cache_UNKNOWN, 0   },
   312 /* 63 */ {Cache_UNKNOWN, 0   },
   313 /* 64 */ {Cache_UNKNOWN, 0   },
   314 /* 65 */ {Cache_UNKNOWN, 0   },
   315 /* 66 */ {Cache_L1d,     64  },
   316 /* 67 */ {Cache_L1d,     64  },
   317 /* 68 */ {Cache_L1d,     64  },
   318 /* 69 */ {Cache_UNKNOWN, 0   },
   319 /* 6a */ {Cache_UNKNOWN, 0   },
   320 /* 6b */ {Cache_UNKNOWN, 0   },
   321 /* 6c */ {Cache_UNKNOWN, 0   },
   322 /* 6d */ {Cache_UNKNOWN, 0   },
   323 /* 6e */ {Cache_UNKNOWN, 0   },
   324 /* 6f */ {Cache_UNKNOWN, 0   },
   325 /* 70 */ {Cache_Trace,   1   },
   326 /* 71 */ {Cache_Trace,   1   },
   327 /* 72 */ {Cache_Trace,   1   },
   328 /* 73 */ {Cache_UNKNOWN, 0   },
   329 /* 74 */ {Cache_UNKNOWN, 0   },
   330 /* 75 */ {Cache_UNKNOWN, 0   },
   331 /* 76 */ {Cache_UNKNOWN, 0   },
   332 /* 77 */ {Cache_UNKNOWN, 0   },
   333 /* 78 */ {Cache_UNKNOWN, 0   },
   334 /* 79 */ {Cache_L2,      64  },
   335 /* 7a */ {Cache_L2,      64  },
   336 /* 7b */ {Cache_L2,      64  },
   337 /* 7c */ {Cache_L2,      64  },
   338 /* 7d */ {Cache_UNKNOWN, 0   },
   339 /* 7e */ {Cache_UNKNOWN, 0   },
   340 /* 7f */ {Cache_UNKNOWN, 0   },
   341 /* 80 */ {Cache_UNKNOWN, 0   },
   342 /* 81 */ {Cache_UNKNOWN, 0   },
   343 /* 82 */ {Cache_L2,      32  },
   344 /* 83 */ {Cache_L2,      32  },
   345 /* 84 */ {Cache_L2,      32  },
   346 /* 85 */ {Cache_L2,      32  },
   347 /* 86 */ {Cache_L2,      64  },
   348 /* 87 */ {Cache_L2,      64  },
   349 /* 88 */ {Cache_UNKNOWN, 0   },
   350 /* 89 */ {Cache_UNKNOWN, 0   },
   351 /* 8a */ {Cache_UNKNOWN, 0   },
   352 /* 8b */ {Cache_UNKNOWN, 0   },
   353 /* 8c */ {Cache_UNKNOWN, 0   },
   354 /* 8d */ {Cache_UNKNOWN, 0   },
   355 /* 8e */ {Cache_UNKNOWN, 0   },
   356 /* 8f */ {Cache_UNKNOWN, 0   },
   357 /* 90 */ {Cache_UNKNOWN, 0   },
   358 /* 91 */ {Cache_UNKNOWN, 0   },
   359 /* 92 */ {Cache_UNKNOWN, 0   },
   360 /* 93 */ {Cache_UNKNOWN, 0   },
   361 /* 94 */ {Cache_UNKNOWN, 0   },
   362 /* 95 */ {Cache_UNKNOWN, 0   },
   363 /* 96 */ {Cache_UNKNOWN, 0   },
   364 /* 97 */ {Cache_UNKNOWN, 0   },
   365 /* 98 */ {Cache_UNKNOWN, 0   },
   366 /* 99 */ {Cache_UNKNOWN, 0   },
   367 /* 9a */ {Cache_UNKNOWN, 0   },
   368 /* 9b */ {Cache_UNKNOWN, 0   },
   369 /* 9c */ {Cache_UNKNOWN, 0   },
   370 /* 9d */ {Cache_UNKNOWN, 0   },
   371 /* 9e */ {Cache_UNKNOWN, 0   },
   372 /* 9f */ {Cache_UNKNOWN, 0   },
   373 /* a0 */ {Cache_UNKNOWN, 0   },
   374 /* a1 */ {Cache_UNKNOWN, 0   },
   375 /* a2 */ {Cache_UNKNOWN, 0   },
   376 /* a3 */ {Cache_UNKNOWN, 0   },
   377 /* a4 */ {Cache_UNKNOWN, 0   },
   378 /* a5 */ {Cache_UNKNOWN, 0   },
   379 /* a6 */ {Cache_UNKNOWN, 0   },
   380 /* a7 */ {Cache_UNKNOWN, 0   },
   381 /* a8 */ {Cache_UNKNOWN, 0   },
   382 /* a9 */ {Cache_UNKNOWN, 0   },
   383 /* aa */ {Cache_UNKNOWN, 0   },
   384 /* ab */ {Cache_UNKNOWN, 0   },
   385 /* ac */ {Cache_UNKNOWN, 0   },
   386 /* ad */ {Cache_UNKNOWN, 0   },
   387 /* ae */ {Cache_UNKNOWN, 0   },
   388 /* af */ {Cache_UNKNOWN, 0   },
   389 /* b0 */ {Cache_TLBi,    0   },
   390 /* b1 */ {Cache_UNKNOWN, 0   },
   391 /* b2 */ {Cache_UNKNOWN, 0   },
   392 /* b3 */ {Cache_TLBd,    0   },
   393 /* b4 */ {Cache_UNKNOWN, 0   },
   394 /* b5 */ {Cache_UNKNOWN, 0   },
   395 /* b6 */ {Cache_UNKNOWN, 0   },
   396 /* b7 */ {Cache_UNKNOWN, 0   },
   397 /* b8 */ {Cache_UNKNOWN, 0   },
   398 /* b9 */ {Cache_UNKNOWN, 0   },
   399 /* ba */ {Cache_UNKNOWN, 0   },
   400 /* bb */ {Cache_UNKNOWN, 0   },
   401 /* bc */ {Cache_UNKNOWN, 0   },
   402 /* bd */ {Cache_UNKNOWN, 0   },
   403 /* be */ {Cache_UNKNOWN, 0   },
   404 /* bf */ {Cache_UNKNOWN, 0   },
   405 /* c0 */ {Cache_UNKNOWN, 0   },
   406 /* c1 */ {Cache_UNKNOWN, 0   },
   407 /* c2 */ {Cache_UNKNOWN, 0   },
   408 /* c3 */ {Cache_UNKNOWN, 0   },
   409 /* c4 */ {Cache_UNKNOWN, 0   },
   410 /* c5 */ {Cache_UNKNOWN, 0   },
   411 /* c6 */ {Cache_UNKNOWN, 0   },
   412 /* c7 */ {Cache_UNKNOWN, 0   },
   413 /* c8 */ {Cache_UNKNOWN, 0   },
   414 /* c9 */ {Cache_UNKNOWN, 0   },
   415 /* ca */ {Cache_UNKNOWN, 0   },
   416 /* cb */ {Cache_UNKNOWN, 0   },
   417 /* cc */ {Cache_UNKNOWN, 0   },
   418 /* cd */ {Cache_UNKNOWN, 0   },
   419 /* ce */ {Cache_UNKNOWN, 0   },
   420 /* cf */ {Cache_UNKNOWN, 0   },
   421 /* d0 */ {Cache_UNKNOWN, 0   },
   422 /* d1 */ {Cache_UNKNOWN, 0   },
   423 /* d2 */ {Cache_UNKNOWN, 0   },
   424 /* d3 */ {Cache_UNKNOWN, 0   },
   425 /* d4 */ {Cache_UNKNOWN, 0   },
   426 /* d5 */ {Cache_UNKNOWN, 0   },
   427 /* d6 */ {Cache_UNKNOWN, 0   },
   428 /* d7 */ {Cache_UNKNOWN, 0   },
   429 /* d8 */ {Cache_UNKNOWN, 0   },
   430 /* d9 */ {Cache_UNKNOWN, 0   },
   431 /* da */ {Cache_UNKNOWN, 0   },
   432 /* db */ {Cache_UNKNOWN, 0   },
   433 /* dc */ {Cache_UNKNOWN, 0   },
   434 /* dd */ {Cache_UNKNOWN, 0   },
   435 /* de */ {Cache_UNKNOWN, 0   },
   436 /* df */ {Cache_UNKNOWN, 0   },
   437 /* e0 */ {Cache_UNKNOWN, 0   },
   438 /* e1 */ {Cache_UNKNOWN, 0   },
   439 /* e2 */ {Cache_UNKNOWN, 0   },
   440 /* e3 */ {Cache_UNKNOWN, 0   },
   441 /* e4 */ {Cache_UNKNOWN, 0   },
   442 /* e5 */ {Cache_UNKNOWN, 0   },
   443 /* e6 */ {Cache_UNKNOWN, 0   },
   444 /* e7 */ {Cache_UNKNOWN, 0   },
   445 /* e8 */ {Cache_UNKNOWN, 0   },
   446 /* e9 */ {Cache_UNKNOWN, 0   },
   447 /* ea */ {Cache_UNKNOWN, 0   },
   448 /* eb */ {Cache_UNKNOWN, 0   },
   449 /* ec */ {Cache_UNKNOWN, 0   },
   450 /* ed */ {Cache_UNKNOWN, 0   },
   451 /* ee */ {Cache_UNKNOWN, 0   },
   452 /* ef */ {Cache_UNKNOWN, 0   },
   453 /* f0 */ {Cache_UNKNOWN, 0   },
   454 /* f1 */ {Cache_UNKNOWN, 0   },
   455 /* f2 */ {Cache_UNKNOWN, 0   },
   456 /* f3 */ {Cache_UNKNOWN, 0   },
   457 /* f4 */ {Cache_UNKNOWN, 0   },
   458 /* f5 */ {Cache_UNKNOWN, 0   },
   459 /* f6 */ {Cache_UNKNOWN, 0   },
   460 /* f7 */ {Cache_UNKNOWN, 0   },
   461 /* f8 */ {Cache_UNKNOWN, 0   },
   462 /* f9 */ {Cache_UNKNOWN, 0   },
   463 /* fa */ {Cache_UNKNOWN, 0   },
   464 /* fb */ {Cache_UNKNOWN, 0   },
   465 /* fc */ {Cache_UNKNOWN, 0   },
   466 /* fd */ {Cache_UNKNOWN, 0   },
   467 /* fe */ {Cache_UNKNOWN, 0   },
   468 /* ff */ {Cache_UNKNOWN, 0   }
   469 };
   472 /*
   473  * use the above table to determine the CacheEntryLineSize.
   474  */
   475 static void
   476 getIntelCacheEntryLineSize(unsigned long val, int *level, 
   477 						unsigned long *lineSize)
   478 {
   479     CacheType type;
   481     type = CacheMap[val].type;
   482     /* only interested in data caches */
   483     /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
   484      * this data check has the side effect of rejecting that entry. If
   485      * that wasn't the case, we could have to reject it explicitly */
   486     if (CacheMap[val].lineSize == 0) {
   487 	return;
   488     }
   489     /* look at the caches, skip types we aren't interested in.
   490      * if we already have a value for a lower level cache, skip the
   491      * current entry */
   492     if ((type == Cache_L1)|| (type == Cache_L1d)) {
   493 	*level = 1;
   494 	*lineSize = CacheMap[val].lineSize;
   495     } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
   496 	*level = 2;
   497 	*lineSize = CacheMap[val].lineSize;
   498     } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
   499 	*level = 3;
   500 	*lineSize = CacheMap[val].lineSize;
   501     }
   502     return;
   503 }
   506 static void
   507 getIntelRegisterCacheLineSize(unsigned long val, 
   508 			int *level, unsigned long *lineSize)
   509 {
   510     getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
   511     getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
   512     getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
   513     getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
   514 }
   516 /*
   517  * returns '0' if no recognized cache is found, or if the cache
   518  * information is supported by this processor 
   519  */
   520 static unsigned long
   521 getIntelCacheLineSize(int cpuidLevel)
   522 {
   523     int level = 4;
   524     unsigned long lineSize = 0;
   525     unsigned long eax, ebx, ecx, edx;
   526     int repeat, count;
   528     if (cpuidLevel < 2) {
   529 	return 0;
   530     }
   532     /* command '2' of the cpuid is intel's cache info call. Each byte of the
   533      * 4 registers contain a potential descriptor for the cache. The CacheMap	
   534      * table maps the cache entry with the processor cache. Register 'al'
   535      * contains a count value that cpuid '2' needs to be called in order to 
   536      * find all the cache descriptors. Only registers with the high bit set
   537      * to 'zero' have valid descriptors. This code loops through all the
   538      * required calls to cpuid '2' and passes any valid descriptors it finds
   539      * to the getIntelRegisterCacheLineSize code, which breaks the registers
   540      * down into their component descriptors. In the end the lineSize of the
   541      * lowest level cache data cache is returned. */
   542     freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
   543     repeat = eax & 0xf;
   544     for (count = 0; count < repeat; count++) {
   545 	if ((eax & 0x80000000) == 0) {
   546 	    getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
   547 	}
   548 	if ((ebx & 0x80000000) == 0) {
   549 	    getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
   550 	}
   551 	if ((ecx & 0x80000000) == 0) {
   552 	    getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
   553 	}
   554 	if ((edx & 0x80000000) == 0) {
   555 	    getIntelRegisterCacheLineSize(edx, &level, &lineSize);
   556 	}
   557 	if (count+1 != repeat) {
   558 	    freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
   559 	}
   560     }
   561     return lineSize;
   562 }
   564 /*
   565  * returns '0' if the cache info is not supported by this processor.
   566  * This is based on the AMD extended cache commands for cpuid. 
   567  * (see "AMD Processor Recognition Application Note" Publication 20734).
   568  * Some other processors use the identical scheme.
   569  * (see "Processor Recognition, Transmeta Corporation").
   570  */
   571 static unsigned long
   572 getOtherCacheLineSize(unsigned long cpuidLevel)
   573 {
   574     unsigned long lineSize = 0;
   575     unsigned long eax, ebx, ecx, edx;
   577     /* get the Extended CPUID level */
   578     freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
   579     cpuidLevel = eax;
   581     if (cpuidLevel >= 0x80000005) {
   582 	freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
   583 	lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
   584     }
   585     return lineSize;
   586 }
   588 static const char * const manMap[] = {
   589 #define INTEL     0
   590     "GenuineIntel",
   591 #define AMD       1
   592     "AuthenticAMD",
   593 #define CYRIX     2
   594     "CyrixInstead",
   595 #define CENTAUR   2
   596     "CentaurHauls",
   597 #define NEXGEN    3
   598     "NexGenDriven",
   599 #define TRANSMETA 4
   600     "GenuineTMx86",
   601 #define RISE      5
   602     "RiseRiseRise",
   603 #define UMC       6
   604     "UMC UMC UMC ",
   605 #define SIS       7
   606     "Sis Sis Sis ",
   607 #define NATIONAL  8
   608     "Geode by NSC",
   609 };
   611 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
   614 #define MAN_UNKNOWN 9
   616 #if !defined(AMD_64)
   617 #define SSE2_FLAG (1<<26)
   618 unsigned long
   619 s_mpi_is_sse2()
   620 {
   621     unsigned long eax, ebx, ecx, edx;
   622     int manufacturer = MAN_UNKNOWN;
   623     int i;
   624     char string[13];
   626     if (is386() || is486()) {
   627 	return 0;
   628     }
   629     freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
   630     /* string holds the CPU's manufacturer ID string - a twelve
   631      * character ASCII string stored in ebx, edx, ecx, and
   632      * the 32-bit extended feature flags are in edx, ecx.
   633      */
   634     *(int *)string = ebx;
   635     *(int *)&string[4] = (int)edx;
   636     *(int *)&string[8] = (int)ecx;
   637     string[12] = 0;
   639     /* has no SSE2 extensions */
   640     if (eax == 0) {
   641 	return 0;
   642     }
   644     for (i=0; i < n_manufacturers; i++) {
   645 	if ( strcmp(manMap[i],string) == 0) {
   646 	    manufacturer = i;
   647 	    break;
   648 	}
   649     }
   651     freebl_cpuid(1,&eax,&ebx,&ecx,&edx);
   652     return (edx & SSE2_FLAG) == SSE2_FLAG;
   653 }
   654 #endif
   656 unsigned long
   657 s_mpi_getProcessorLineSize()
   658 {
   659     unsigned long eax, ebx, ecx, edx;
   660     unsigned long cpuidLevel;
   661     unsigned long cacheLineSize = 0;
   662     int manufacturer = MAN_UNKNOWN;
   663     int i;
   664     char string[65];
   666 #if !defined(AMD_64)
   667     if (is386()) {
   668 	return 0; /* 386 had no cache */
   669     } if (is486()) {
   670 	return 32; /* really? need more info */
   671     }
   672 #endif
   674     /* Pentium, cpuid command is available */
   675     freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
   676     cpuidLevel = eax;
   677     /* string holds the CPU's manufacturer ID string - a twelve
   678      * character ASCII string stored in ebx, edx, ecx, and
   679      * the 32-bit extended feature flags are in edx, ecx.
   680      */
   681     *(int *)string = ebx;
   682     *(int *)&string[4] = (int)edx;
   683     *(int *)&string[8] = (int)ecx;
   684     string[12] = 0;
   686     manufacturer = MAN_UNKNOWN;
   687     for (i=0; i < n_manufacturers; i++) {
   688 	if ( strcmp(manMap[i],string) == 0) {
   689 	    manufacturer = i;
   690 	}
   691     }
   693     if (manufacturer == INTEL) {
   694 	cacheLineSize = getIntelCacheLineSize(cpuidLevel);
   695     } else {
   696 	cacheLineSize = getOtherCacheLineSize(cpuidLevel);
   697     }
   698     /* doesn't support cache info based on cpuid. This means
   699      * an old pentium class processor, which have cache lines of
   700      * 32. If we learn differently, we can use a switch based on
   701      * the Manufacturer id  */
   702     if (cacheLineSize == 0) {
   703 	cacheLineSize = 32;
   704     }
   705     return cacheLineSize;
   706 }
   707 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
   708 #endif
   710 #if defined(__ppc64__) 
   711 /*
   712  *  Sigh, The PPC has some really nice features to help us determine cache
   713  *  size, since it had lots of direct control functions to do so. The POWER
   714  *  processor even has an instruction to do this, but it was dropped in
   715  *  PowerPC. Unfortunately most of them are not available in user mode.
   716  *
   717  *  The dcbz function would be a great way to determine cache line size except
   718  *  1) it only works on write-back memory (it throws an exception otherwise), 
   719  *  and 2) because so many mac programs 'knew' the processor cache size was
   720  *  32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
   721  *  G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
   722  *  these programs happy. dcbzl work if 64 bit instructions are supported.
   723  *  If you know 64 bit instructions are supported, and that stack is 
   724  *  write-back, you can use this code.
   725  */
   726 #include "memory.h"
   728 /* clear the cache line that contains 'array' */
   729 static inline void dcbzl(char *array)
   730 {
   731 	register char *a asm("r2") = array;
   732 	__asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
   733 }
   736 #define PPC_DO_ALIGN(x,y) ((char *)\
   737 			((((long long) (x))+((y)-1))&~((y)-1)))
   739 #define PPC_MAX_LINE_SIZE 256
   740 unsigned long
   741 s_mpi_getProcessorLineSize()
   742 {
   743     char testArray[2*PPC_MAX_LINE_SIZE+1];
   744     char *test;
   745     int i;
   747     /* align the array on a maximum line size boundary, so we
   748      * know we are starting to clear from the first address */
   749     test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); 
   750     /* set all the values to 1's */
   751     memset(test, 0xff, PPC_MAX_LINE_SIZE);
   752     /* clear one cache block starting at 'test' */
   753     dcbzl(test);
   755     /* find the size of the cleared area, that's our block size */
   756     for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
   757 	if (test[i-1] == 0) {
   758 	    return i;
   759 	}
   760     }
   761     return 0;
   762 }
   764 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
   765 #endif
   768 /*
   769  * put other processor and platform specific cache code here
   770  * return the smallest cache line size in bytes on the processor 
   771  * (usually the L1 cache). If the OS has a call, this would be
   772  * a greate place to put it.
   773  *
   774  * If there is no cache, return 0;
   775  * 
   776  * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
   777  * below aren't compiled.
   778  *
   779  */
   782 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or 
   783  * OS */
   784 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
   786 unsigned long
   787 s_mpi_getProcessorLineSize()
   788 {
   789    return MPI_CACHE_LINE_SIZE;
   790 }
   791 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
   792 #endif
   795 /* If no way to get the processor cache line size has been defined, assume
   796  * it's 32 bytes (most common value, does not significantly impact performance)
   797  */ 
   798 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
   799 unsigned long
   800 s_mpi_getProcessorLineSize()
   801 {
   802    return 32;
   803 }
   804 #endif
   806 #ifdef TEST_IT
   807 #include <stdio.h>
   809 main()
   810 {
   811     printf("line size = %d\n", s_mpi_getProcessorLineSize());
   812 } 
   813 #endif

mercurial