Home > Back-end >  Using CPUID to get cache size in Ryzen CPU
Using CPUID to get cache size in Ryzen CPU

Time:08-20

I wanted to use the CPUID instruction to get the size for each cache level (L1, L2, L3).

I've been reading enter image description here

In page 616 there is the information relevant for L2 and L3.

enter image description here

I have made a simple C assembly program to print out these values.

const char* amd_L2_L3_associotivity_str(uint32_t code)
{
    switch(code) {
        case 0: return "Disabled";
        case 1: return "1 way (direct mapped)";
        case 2: return "2 way";
        case 4: return "4 way";
        case 6: return "8 way";
        case 8: return "16 way";
        case 10: return "32 way";
        case 11: return "48 way";
        case 12: return "64 way";
        case 13: return "96 way";
        case 14: return "128 way";
        case 15: return "Fully Associative";
        default: assert(0);
    }
    return 0;
}

void cpuid_caches_amd()
{
    uint32_t eax, ebx, ecx, edx;
    { // L1
        eax = 0x80000005; // the specific code of the cpuid instruction for L1

        __asm__ (
            "cpuid" // cpuid is the name of the instruction that queries the info we want
            : " a" (eax)
            , "=b" (ebx)
            , "=c" (ecx)
            , "=d" (edx)
        );

        uint32_t
            dataCache_size = ecx & 0xFF,
            dataCache_associativity = (ecx >> 8) & 0xFF,
            dataCache_linesPerTag = (ecx >> 16) & 0xFF,
            dataCache_lineSize = (ecx >> 24) & 0xFF;

        uint32_t
            instrCache_size = edx & 0xFF,
            instrCache_associativity = (edx >> 8) & 0xFF,
            instrCache_linesPerTag = (edx >> 16) & 0xFF,
            instrCache_lineSize = (edx >> 24) & 0xFF;

        printf(
            "L1 Data Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %d\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            "\n"
            "L1 Instruction Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %d\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            ,
            dataCache_size,
            dataCache_associativity,
            dataCache_linesPerTag,
            dataCache_lineSize,
            instrCache_size,
            instrCache_associativity,
            instrCache_linesPerTag,
            instrCache_lineSize
        );
    }

    { // L2, L3
        eax = 0x80000006; // the specific code of the cpuid instruction for L1

        __asm__ (
            "cpuid" // cpuid is the name of the instruction that queries the info we want
            : " a" (eax)
            , "=b" (ebx)
            , "=c" (ecx)
            , "=d" (edx)
        );

        uint32_t
            L2_size = ecx & 0xFFFF,
            L2_associativity = (ecx >> 16) & 0xF,
            L2_linesPerTag = (ecx >> 20) & 0xF,
            L2_lineSize = (ecx >> 24) & 0xFF;


        uint32_t
            L3_size = edx & 0x3FFF,
            L3_associativity = (edx >> 16) & 0xF,
            L3_linesPerTag = (edx >> 20) & 0xF,
            L3_lineSize = (edx >> 24) & 0xFF;

        printf(
            "L2 Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %s\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            "\n"
            "L3 Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %s\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            ,
            L2_size,
            amd_L2_L3_associotivity_str(L2_associativity),
            L2_linesPerTag,
            L2_lineSize,
            L3_size * 512,
            amd_L2_L3_associotivity_str(L3_associativity),
            L3_linesPerTag,
            L3_lineSize
        );
    }
}

int main()
{
    cpuid_caches_amd();
}

This is the output of the program for my Ryzen 3700X:

L1 Data Cache:
        Size: 64 KB
        Associativity: 1
        Lines per Tag: 8
        Line Size: 32 B

L1 Instruction Cache:
        Size: 64 KB
        Associativity: 1
        Lines per Tag: 8
        Line Size: 32 B
L2 Cache:
        Size: 24896 KB
        Associativity: Disabled
        Lines per Tag: 0
        Line Size: 2 B

L3 Cache:
        Size: 2260992 KB
        Associativity: Disabled
        Lines per Tag: 0
        Line Size: 1 B

According to this I have 2GB of L3 cache which is not possible. According to the official specs, it should have L1: 512KB, L2: 4MB, L3: 32MB.

Another thing that puzzles me is that L1DcSize is only 8 bits wide. That only allows to represent up to 255KB of L1 size, although my CPU should have 512KB L1!

What's wrong with my code? How can I get the actual cache sizes?

EDIT:

Thanks all for the replies. As people pointed out, my bit shifting was inverted. Still there is the question about cache L1. This is what the code looks like now:

void cpuid_caches_amd()
{
    uint32_t eax, ebx, ecx, edx;
    { // L1
        eax = 0x80000005; // the specific code of the cpuid instruction for L1

        __asm__ (
            "cpuid" // cpuid is the name of the instruction that queries the info we want
            : " a" (eax)
            , "=b" (ebx)
            , "=c" (ecx)
            , "=d" (edx)
        );

        uint32_t
            dataCache_size = (ecx >> 24) & 0xFF,
            dataCache_associativity = (ecx >> 16) & 0xFF,
            dataCache_linesPerTag = (ecx >> 8) & 0xFF,
            dataCache_lineSize = ecx & 0xFF;

        uint32_t
            instrCache_size = (edx >> 24) & 0xFF,
            instrCache_associativity = (edx >> 16) & 0xFF,
            instrCache_linesPerTag = (edx >> 8) & 0xFF,
            instrCache_lineSize = edx & 0xFF;

        printf(
            "L1 Data Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %d\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            "\n"
            "L1 Instruction Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %d\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            ,
            dataCache_size,
            dataCache_associativity,
            dataCache_linesPerTag,
            dataCache_lineSize,
            instrCache_size,
            instrCache_associativity,
            instrCache_linesPerTag,
            instrCache_lineSize
        );
    }

    { // L2
        eax = 0x80000006; // the specific code of the cpuid instruction for L1

        __asm__ (
            "cpuid" // cpuid is the name of the instruction that queries the info we want
            : " a" (eax)
            , "=b" (ebx)
            , "=c" (ecx)
            , "=d" (edx)
        );

        uint32_t
            L2_size = (ecx >> 16) & 0xFFFF,
            L2_associativity = (ecx >> 12) & 0xF,
            L2_linesPerTag = (ecx >> 8) & 0xF,
            L2_lineSize = ecx & 0xFF;


        uint32_t
            L3_size = (edx >> 18) & 0x3FFF,
            L3_associativity = (edx >> 12) & 0xF,
            L3_linesPerTag = (edx >> 8) & 0xF,
            L3_lineSize = (edx >> 0) & 0xFF;

        printf(
            "L2 Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %s\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            "\n"
            "L3 Cache:\n"
            "\tSize: %d KB\n"
            "\tAssociativity: %s\n"
            "\tLines per Tag: %d\n"
            "\tLine Size: %d B\n"
            ,
            L2_size,
            amd_L2_L3_associotivity_str(L2_associativity),
            L2_linesPerTag,
            L2_lineSize,
            L3_size * 512,
            amd_L2_L3_associotivity_str(L3_associativity),
            L3_linesPerTag,
            L3_lineSize
        );
    }
}

And the new output:

L1 Data Cache:
        Size: 32 KB
        Associativity: 8
        Lines per Tag: 1
        Line Size: 64 B

L1 Instruction Cache:
        Size: 32 KB
        Associativity: 8
        Lines per Tag: 1
        Line Size: 64 B
L2 Cache:
        Size: 512 KB
        Associativity: 8 way
        Lines per Tag: 1
        Line Size: 64 B

L3 Cache:
        Size: 32768 KB
        Associativity: Value for all fields should be determined from Fn8000_001D
        Lines per Tag: 1
        Line Size: 64 B

CodePudding user response:

It seems that you're mistaken in the bit layout of the result. The size results are in the upper bits, but you're extracting the lower bits. The other fields are similarly reversed in order.

For example, L3Size is in 31:18, but you use a mask of 0x3FFF with no shift, which uses 13:0.

Instead of:

L3_size = edx & 0x3FFF,
L3_associativity = (edx >> 16) & 0xF,
L3_linesPerTag = (edx >> 20) & 0xF,
L3_lineSize = (edx >> 24) & 0xFF;

Write:

L3_size = (edx >> 18) & 0x3FFF,
L3_associativity = (edx >> 12) & 0xF,
L3_linesPerTag = (edx >> 8) & 0xF,
L3_lineSize = (edx >> 0) & 0xFF;

And do similarly for the other registers. The lowest bit of a field is the amount by which you have to shift.


As for the second question about L1 size, the spec sheet has the chip total. However, what matters from the CPUID perspective is that core's cache. If you multiply the 32 kiB of L1i and 32 kiB of L1d by the 8 cores in the processor, you get the expected 512 kiB.

  • Related