Skip to content

Use cpuid 4 with subleafs to query L1 cache size on Intel processors #1236

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 24, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 105 additions & 15 deletions cpuid_x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
*edx = cpuInfo[3];
}

void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
{
int cpuInfo[4] = {-1};
__cpuidex(cpuInfo, op, count);
*eax = cpuInfo[0];
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
}

#else

#ifndef CPUIDEMU

#if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
#else
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
Expand All @@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
#endif
}

static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
__asm__ __volatile__
("mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
#else
__asm__ __volatile__
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
#endif
}
#endif

#else
Expand Down Expand Up @@ -133,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
*edx = idlist[current].d;
}

void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
return cpuid (op, eax, ebx, ecx, edx);
}

#endif

#endif // _MSC_VER
Expand Down Expand Up @@ -312,9 +340,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);

if (cpuid_level > 1) {

int numcalls =0 ;
cpuid(2, &eax, &ebx, &ecx, &edx);

numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
info[ 0] = BITMASK(eax, 8, 0xff);
info[ 1] = BITMASK(eax, 16, 0xff);
info[ 2] = BITMASK(eax, 24, 0xff);
Expand All @@ -335,7 +363,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
info[14] = BITMASK(edx, 24, 0xff);

for (i = 0; i < 15; i++){

switch (info[i]){

/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
Expand Down Expand Up @@ -637,12 +664,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LD1.linesize = 64;
break;
case 0x63 :
DTB.size = 2048;
DTB.associative = 4;
DTB.linesize = 32;
LDTB.size = 4096;
LDTB.associative= 4;
LDTB.linesize = 32;
DTB.size = 2048;
DTB.associative = 4;
DTB.linesize = 32;
LDTB.size = 4096;
LDTB.associative= 4;
LDTB.linesize = 32;
break;
case 0x66 :
LD1.size = 8;
LD1.associative = 4;
Expand Down Expand Up @@ -675,12 +703,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LC1.associative = 8;
break;
case 0x76 :
ITB.size = 2048;
ITB.associative = 0;
ITB.linesize = 8;
LITB.size = 4096;
LITB.associative= 0;
LITB.linesize = 8;
ITB.size = 2048;
ITB.associative = 0;
ITB.linesize = 8;
LITB.size = 4096;
LITB.associative= 0;
LITB.linesize = 8;
break;
case 0x77 :
LC1.size = 16;
LC1.associative = 4;
Expand Down Expand Up @@ -891,6 +920,67 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
}

if (get_vendor() == VENDOR_INTEL) {
if(LD1.size<=0 || LC1.size<=0){
//If we didn't detect L1 correctly before,
int count;
for (count=0;count <4;count++) {
cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
switch (eax &0x1f) {
case 0:
continue;
case 1:
case 3:
{
switch ((eax >>5) &0x07)
{
case 1:
{
// fprintf(stderr,"L1 data cache...\n");
int sets = ecx+1;
int lines = (ebx & 0x0fff) +1;
ebx>>=12;
int part = (ebx&0x03ff)+1;
ebx >>=10;
int assoc = (ebx&0x03ff)+1;
LD1.size = (assoc*part*lines*sets)/1024;
LD1.associative = assoc;
LD1.linesize= lines;
break;
}
default:
break;
}
break;
}
case 2:
{
switch ((eax >>5) &0x07)
{
case 1:
{
// fprintf(stderr,"L1 instruction cache...\n");
int sets = ecx+1;
int lines = (ebx & 0x0fff) +1;
ebx>>=12;
int part = (ebx&0x03ff)+1;
ebx >>=10;
int assoc = (ebx&0x03ff)+1;
LC1.size = (assoc*part*lines*sets)/1024;
LC1.associative = assoc;
LC1.linesize= lines;
break;
}
default:
break;
}
break;

}
default:
break;
}
}
}
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level >= 0x80000006) {
if(L2.size<=0){
Expand Down