// Copyright 2018 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // +build i386 amd64 // Package cpuid provides basic functionality for creating and adjusting CPU // feature sets. // // To use FeatureSets, one should start with an existing FeatureSet (either a // known platform, or HostFeatureSet()) and then add, remove, and test for // features as desired. // // For example: Test for hardware extended state saving, and if we don't have // it, don't expose AVX, which cannot be saved with fxsave. // // if !HostFeatureSet().HasFeature(X86FeatureXSAVE) { // exposedFeatures.Remove(X86FeatureAVX) // } package cpuid import ( "bytes" "fmt" "io/ioutil" "strconv" "strings" "gvisor.dev/gvisor/pkg/log" ) // Common references for CPUID leaves and bits: // // Intel: // * Intel SDM Volume 2, Chapter 3.2 "CPUID" (more up-to-date) // * Intel Application Note 485 (more detailed) // // AMD: // * AMD64 APM Volume 3, Appendix 3 "Obtaining Processor Information ..." // Feature is a unique identifier for a particular cpu feature. We just use an // int as a feature number on x86. // // Features are numbered according to "blocks". Each block is 32 bits, and // feature bits from the same source (cpuid leaf/level) are in the same block. type Feature int // block is a collection of 32 Feature bits. type block int const blockSize = 32 // Feature bits are numbered according to "blocks". Each block is 32 bits, and // feature bits from the same source (cpuid leaf/level) are in the same block. func featureID(b block, bit int) Feature { return Feature(32*int(b) + bit) } // Block 0 constants are all of the "basic" feature bits returned by a cpuid in // ecx with eax=1. const ( X86FeatureSSE3 Feature = iota X86FeaturePCLMULDQ X86FeatureDTES64 X86FeatureMONITOR X86FeatureDSCPL X86FeatureVMX X86FeatureSMX X86FeatureEST X86FeatureTM2 X86FeatureSSSE3 // Not a typo, "supplemental" SSE3. X86FeatureCNXTID X86FeatureSDBG X86FeatureFMA X86FeatureCX16 X86FeatureXTPR X86FeaturePDCM _ // ecx bit 16 is reserved. X86FeaturePCID X86FeatureDCA X86FeatureSSE4_1 X86FeatureSSE4_2 X86FeatureX2APIC X86FeatureMOVBE X86FeaturePOPCNT X86FeatureTSCD X86FeatureAES X86FeatureXSAVE X86FeatureOSXSAVE X86FeatureAVX X86FeatureF16C X86FeatureRDRAND _ // ecx bit 31 is reserved. ) // Block 1 constants are all of the "basic" feature bits returned by a cpuid in // edx with eax=1. const ( X86FeatureFPU Feature = 32 + iota X86FeatureVME X86FeatureDE X86FeaturePSE X86FeatureTSC X86FeatureMSR X86FeaturePAE X86FeatureMCE X86FeatureCX8 X86FeatureAPIC _ // edx bit 10 is reserved. X86FeatureSEP X86FeatureMTRR X86FeaturePGE X86FeatureMCA X86FeatureCMOV X86FeaturePAT X86FeaturePSE36 X86FeaturePSN X86FeatureCLFSH _ // edx bit 20 is reserved. X86FeatureDS X86FeatureACPI X86FeatureMMX X86FeatureFXSR X86FeatureSSE X86FeatureSSE2 X86FeatureSS X86FeatureHTT X86FeatureTM X86FeatureIA64 X86FeaturePBE ) // Block 2 bits are the "structured extended" features returned in ebx for // eax=7, ecx=0. const ( X86FeatureFSGSBase Feature = 2*32 + iota X86FeatureTSC_ADJUST _ // ebx bit 2 is reserved. X86FeatureBMI1 X86FeatureHLE X86FeatureAVX2 X86FeatureFDP_EXCPTN_ONLY X86FeatureSMEP X86FeatureBMI2 X86FeatureERMS X86FeatureINVPCID X86FeatureRTM X86FeatureCQM X86FeatureFPCSDS X86FeatureMPX X86FeatureRDT X86FeatureAVX512F X86FeatureAVX512DQ X86FeatureRDSEED X86FeatureADX X86FeatureSMAP X86FeatureAVX512IFMA X86FeaturePCOMMIT X86FeatureCLFLUSHOPT X86FeatureCLWB X86FeatureIPT // Intel processor trace. X86FeatureAVX512PF X86FeatureAVX512ER X86FeatureAVX512CD X86FeatureSHA X86FeatureAVX512BW X86FeatureAVX512VL ) // Block 3 bits are the "extended" features returned in ecx for eax=7, ecx=0. const ( X86FeaturePREFETCHWT1 Feature = 3*32 + iota X86FeatureAVX512VBMI X86FeatureUMIP X86FeaturePKU X86FeatureOSPKE X86FeatureWAITPKG X86FeatureAVX512_VBMI2 _ // ecx bit 7 is reserved X86FeatureGFNI X86FeatureVAES X86FeatureVPCLMULQDQ X86FeatureAVX512_VNNI X86FeatureAVX512_BITALG X86FeatureTME X86FeatureAVX512_VPOPCNTDQ _ // ecx bit 15 is reserved X86FeatureLA57 // ecx bits 17-21 are reserved _ _ _ _ _ X86FeatureRDPID // ecx bits 23-24 are reserved _ _ X86FeatureCLDEMOTE _ // ecx bit 26 is reserved X86FeatureMOVDIRI X86FeatureMOVDIR64B ) // Block 4 constants are for xsave capabilities in CPUID.(EAX=0DH,ECX=01H):EAX. // The CPUID leaf is available only if 'X86FeatureXSAVE' is present. const ( X86FeatureXSAVEOPT Feature = 4*32 + iota X86FeatureXSAVEC X86FeatureXGETBV1 X86FeatureXSAVES // EAX[31:4] are reserved. ) // Block 5 constants are the extended feature bits in // CPUID.(EAX=0x80000001):ECX. const ( X86FeatureLAHF64 Feature = 5*32 + iota X86FeatureCMP_LEGACY X86FeatureSVM X86FeatureEXTAPIC X86FeatureCR8_LEGACY X86FeatureLZCNT X86FeatureSSE4A X86FeatureMISALIGNSSE X86FeaturePREFETCHW X86FeatureOSVW X86FeatureIBS X86FeatureXOP X86FeatureSKINIT X86FeatureWDT _ // ecx bit 14 is reserved. X86FeatureLWP X86FeatureFMA4 X86FeatureTCE _ // ecx bit 18 is reserved. _ // ecx bit 19 is reserved. _ // ecx bit 20 is reserved. X86FeatureTBM X86FeatureTOPOLOGY X86FeaturePERFCTR_CORE X86FeaturePERFCTR_NB _ // ecx bit 25 is reserved. X86FeatureBPEXT X86FeaturePERFCTR_TSC X86FeaturePERFCTR_LLC X86FeatureMWAITX // ECX[31:30] are reserved. ) // Block 6 constants are the extended feature bits in // CPUID.(EAX=0x80000001):EDX. // // These are sparse, and so the bit positions are assigned manually. const ( // On AMD, EDX[24:23] | EDX[17:12] | EDX[9:0] are duplicate features // also defined in block 1 (in identical bit positions). Those features // are not listed here. block6DuplicateMask = 0x183f3ff X86FeatureSYSCALL Feature = 6*32 + 11 X86FeatureNX Feature = 6*32 + 20 X86FeatureMMXEXT Feature = 6*32 + 22 X86FeatureFXSR_OPT Feature = 6*32 + 25 X86FeatureGBPAGES Feature = 6*32 + 26 X86FeatureRDTSCP Feature = 6*32 + 27 X86FeatureLM Feature = 6*32 + 29 X86Feature3DNOWEXT Feature = 6*32 + 30 X86Feature3DNOW Feature = 6*32 + 31 ) // linuxBlockOrder defines the order in which linux organizes the feature // blocks. Linux also tracks feature bits in 32-bit blocks, but in an order // which doesn't match well here, so for the /proc/cpuinfo generation we simply // re-map the blocks to Linux's ordering and then go through the bits in each // block. var linuxBlockOrder = []block{1, 6, 0, 5, 2, 4, 3} // To make emulation of /proc/cpuinfo easy, these names match the names of the // basic features in Linux defined in arch/x86/kernel/cpu/capflags.c. var x86FeatureStrings = map[Feature]string{ // Block 0. X86FeatureSSE3: "pni", X86FeaturePCLMULDQ: "pclmulqdq", X86FeatureDTES64: "dtes64", X86FeatureMONITOR: "monitor", X86FeatureDSCPL: "ds_cpl", X86FeatureVMX: "vmx", X86FeatureSMX: "smx", X86FeatureEST: "est", X86FeatureTM2: "tm2", X86FeatureSSSE3: "ssse3", X86FeatureCNXTID: "cid", X86FeatureSDBG: "sdbg", X86FeatureFMA: "fma", X86FeatureCX16: "cx16", X86FeatureXTPR: "xtpr", X86FeaturePDCM: "pdcm", X86FeaturePCID: "pcid", X86FeatureDCA: "dca", X86FeatureSSE4_1: "sse4_1", X86FeatureSSE4_2: "sse4_2", X86FeatureX2APIC: "x2apic", X86FeatureMOVBE: "movbe", X86FeaturePOPCNT: "popcnt", X86FeatureTSCD: "tsc_deadline_timer", X86FeatureAES: "aes", X86FeatureXSAVE: "xsave", X86FeatureAVX: "avx", X86FeatureF16C: "f16c", X86FeatureRDRAND: "rdrand", // Block 1. X86FeatureFPU: "fpu", X86FeatureVME: "vme", X86FeatureDE: "de", X86FeaturePSE: "pse", X86FeatureTSC: "tsc", X86FeatureMSR: "msr", X86FeaturePAE: "pae", X86FeatureMCE: "mce", X86FeatureCX8: "cx8", X86FeatureAPIC: "apic", X86FeatureSEP: "sep", X86FeatureMTRR: "mtrr", X86FeaturePGE: "pge", X86FeatureMCA: "mca", X86FeatureCMOV: "cmov", X86FeaturePAT: "pat", X86FeaturePSE36: "pse36", X86FeaturePSN: "pn", X86FeatureCLFSH: "clflush", X86FeatureDS: "dts", X86FeatureACPI: "acpi", X86FeatureMMX: "mmx", X86FeatureFXSR: "fxsr", X86FeatureSSE: "sse", X86FeatureSSE2: "sse2", X86FeatureSS: "ss", X86FeatureHTT: "ht", X86FeatureTM: "tm", X86FeatureIA64: "ia64", X86FeaturePBE: "pbe", // Block 2. X86FeatureFSGSBase: "fsgsbase", X86FeatureTSC_ADJUST: "tsc_adjust", X86FeatureBMI1: "bmi1", X86FeatureHLE: "hle", X86FeatureAVX2: "avx2", X86FeatureSMEP: "smep", X86FeatureBMI2: "bmi2", X86FeatureERMS: "erms", X86FeatureINVPCID: "invpcid", X86FeatureRTM: "rtm", X86FeatureCQM: "cqm", X86FeatureMPX: "mpx", X86FeatureRDT: "rdt_a", X86FeatureAVX512F: "avx512f", X86FeatureAVX512DQ: "avx512dq", X86FeatureRDSEED: "rdseed", X86FeatureADX: "adx", X86FeatureSMAP: "smap", X86FeatureCLWB: "clwb", X86FeatureAVX512PF: "avx512pf", X86FeatureAVX512ER: "avx512er", X86FeatureAVX512CD: "avx512cd", X86FeatureSHA: "sha_ni", X86FeatureAVX512BW: "avx512bw", X86FeatureAVX512VL: "avx512vl", // Block 3. X86FeatureAVX512VBMI: "avx512vbmi", X86FeatureUMIP: "umip", X86FeaturePKU: "pku", X86FeatureOSPKE: "ospke", X86FeatureWAITPKG: "waitpkg", X86FeatureAVX512_VBMI2: "avx512_vbmi2", X86FeatureGFNI: "gfni", X86FeatureVAES: "vaes", X86FeatureVPCLMULQDQ: "vpclmulqdq", X86FeatureAVX512_VNNI: "avx512_vnni", X86FeatureAVX512_BITALG: "avx512_bitalg", X86FeatureTME: "tme", X86FeatureAVX512_VPOPCNTDQ: "avx512_vpopcntdq", X86FeatureLA57: "la57", X86FeatureRDPID: "rdpid", X86FeatureCLDEMOTE: "cldemote", X86FeatureMOVDIRI: "movdiri", X86FeatureMOVDIR64B: "movdir64b", // Block 4. X86FeatureXSAVEOPT: "xsaveopt", X86FeatureXSAVEC: "xsavec", X86FeatureXGETBV1: "xgetbv1", X86FeatureXSAVES: "xsaves", // Block 5. X86FeatureLAHF64: "lahf_lm", // LAHF/SAHF in long mode X86FeatureCMP_LEGACY: "cmp_legacy", X86FeatureSVM: "svm", X86FeatureEXTAPIC: "extapic", X86FeatureCR8_LEGACY: "cr8_legacy", X86FeatureLZCNT: "abm", // Advanced bit manipulation X86FeatureSSE4A: "sse4a", X86FeatureMISALIGNSSE: "misalignsse", X86FeaturePREFETCHW: "3dnowprefetch", X86FeatureOSVW: "osvw", X86FeatureIBS: "ibs", X86FeatureXOP: "xop", X86FeatureSKINIT: "skinit", X86FeatureWDT: "wdt", X86FeatureLWP: "lwp", X86FeatureFMA4: "fma4", X86FeatureTCE: "tce", X86FeatureTBM: "tbm", X86FeatureTOPOLOGY: "topoext", X86FeaturePERFCTR_CORE: "perfctr_core", X86FeaturePERFCTR_NB: "perfctr_nb", X86FeatureBPEXT: "bpext", X86FeaturePERFCTR_TSC: "ptsc", X86FeaturePERFCTR_LLC: "perfctr_llc", X86FeatureMWAITX: "mwaitx", // Block 6. X86FeatureSYSCALL: "syscall", X86FeatureNX: "nx", X86FeatureMMXEXT: "mmxext", X86FeatureFXSR_OPT: "fxsr_opt", X86FeatureGBPAGES: "pdpe1gb", X86FeatureRDTSCP: "rdtscp", X86FeatureLM: "lm", X86Feature3DNOWEXT: "3dnowext", X86Feature3DNOW: "3dnow", } // These flags are parse only---they can be used for setting / unsetting the // flags, but will not get printed out in /proc/cpuinfo. var x86FeatureParseOnlyStrings = map[Feature]string{ // Block 0. X86FeatureOSXSAVE: "osxsave", // Block 2. X86FeatureFDP_EXCPTN_ONLY: "fdp_excptn_only", X86FeatureFPCSDS: "fpcsds", X86FeatureIPT: "pt", X86FeatureCLFLUSHOPT: "clfushopt", // Block 3. X86FeaturePREFETCHWT1: "prefetchwt1", } // intelCacheDescriptors describe the caches and TLBs on the system. They are // returned in the registers for eax=2. Intel only. type intelCacheDescriptor uint8 // Valid cache/TLB descriptors. All descriptors can be found in Intel SDM Vol. // 2, Ch. 3.2, "CPUID", Table 3-12 "Encoding of CPUID Leaf 2 Descriptors". const ( intelNullDescriptor intelCacheDescriptor = 0 intelNoTLBDescriptor intelCacheDescriptor = 0xfe intelNoCacheDescriptor intelCacheDescriptor = 0xff // Most descriptors omitted for brevity as they are currently unused. ) // CacheType describes the type of a cache, as returned in eax[4:0] for eax=4. type CacheType uint8 const ( // cacheNull indicates that there are no more entries. cacheNull CacheType = iota // CacheData is a data cache. CacheData // CacheInstruction is an instruction cache. CacheInstruction // CacheUnified is a unified instruction and data cache. CacheUnified ) // Cache describes the parameters of a single cache on the system. // // +stateify savable type Cache struct { // Level is the hierarchical level of this cache (L1, L2, etc). Level uint32 // Type is the type of cache. Type CacheType // FullyAssociative indicates that entries may be placed in any block. FullyAssociative bool // Partitions is the number of physical partitions in the cache. Partitions uint32 // Ways is the number of ways of associativity in the cache. Ways uint32 // Sets is the number of sets in the cache. Sets uint32 // InvalidateHierarchical indicates that WBINVD/INVD from threads // sharing this cache acts upon lower level caches for threads sharing // this cache. InvalidateHierarchical bool // Inclusive indicates that this cache is inclusive of lower cache // levels. Inclusive bool // DirectMapped indicates that this cache is directly mapped from // address, rather than using a hash function. DirectMapped bool } // Just a way to wrap cpuid function numbers. type cpuidFunction uint32 // The constants below are the lower or "standard" cpuid functions, ordered as // defined by the hardware. const ( vendorID cpuidFunction = iota // Returns vendor ID and largest standard function. featureInfo // Returns basic feature bits and processor signature. intelCacheDescriptors // Returns list of cache descriptors. Intel only. intelSerialNumber // Returns processor serial number (obsolete on new hardware). Intel only. intelDeterministicCacheParams // Returns deterministic cache information. Intel only. monitorMwaitParams // Returns information about monitor/mwait instructions. powerParams // Returns information about power management and thermal sensors. extendedFeatureInfo // Returns extended feature bits. _ // Function 0x8 is reserved. intelDCAParams // Returns direct cache access information. Intel only. intelPMCInfo // Returns information about performance monitoring features. Intel only. intelX2APICInfo // Returns core/logical processor topology. Intel only. _ // Function 0xc is reserved. xSaveInfo // Returns information about extended state management. ) // The "extended" functions start at 0x80000000. const ( extendedFunctionInfo cpuidFunction = 0x80000000 + iota // Returns highest available extended function in eax. extendedFeatures // Returns some extended feature bits in edx and ecx. ) // These are the extended floating point state features. They are used to // enumerate floating point features in XCR0, XSTATE_BV, etc. const ( XSAVEFeatureX87 = 1 << 0 XSAVEFeatureSSE = 1 << 1 XSAVEFeatureAVX = 1 << 2 XSAVEFeatureBNDREGS = 1 << 3 XSAVEFeatureBNDCSR = 1 << 4 XSAVEFeatureAVX512op = 1 << 5 XSAVEFeatureAVX512zmm0 = 1 << 6 XSAVEFeatureAVX512zmm16 = 1 << 7 XSAVEFeaturePKRU = 1 << 9 ) var cpuFreqMHz float64 // x86FeaturesFromString includes features from x86FeatureStrings and // x86FeatureParseOnlyStrings. var x86FeaturesFromString = make(map[string]Feature) // FeatureFromString returns the Feature associated with the given feature // string plus a bool to indicate if it could find the feature. func FeatureFromString(s string) (Feature, bool) { f, b := x86FeaturesFromString[s] return f, b } // String implements fmt.Stringer. func (f Feature) String() string { if s := f.flagString(false); s != "" { return s } block := int(f) / 32 bit := int(f) % 32 return fmt.Sprintf("<cpuflag %d; block %d bit %d>", f, block, bit) } func (f Feature) flagString(cpuinfoOnly bool) string { if s, ok := x86FeatureStrings[f]; ok { return s } if !cpuinfoOnly { return x86FeatureParseOnlyStrings[f] } return "" } // FeatureSet is a set of Features for a CPU. // // +stateify savable type FeatureSet struct { // Set is the set of features that are enabled in this FeatureSet. Set map[Feature]bool // VendorID is the 12-char string returned in ebx:edx:ecx for eax=0. VendorID string // ExtendedFamily is part of the processor signature. ExtendedFamily uint8 // ExtendedModel is part of the processor signature. ExtendedModel uint8 // ProcessorType is part of the processor signature. ProcessorType uint8 // Family is part of the processor signature. Family uint8 // Model is part of the processor signature. Model uint8 // SteppingID is part of the processor signature. SteppingID uint8 // Caches describes the caches on the CPU. Caches []Cache // CacheLine is the size of a cache line in bytes. // // All caches use the same line size. This is not enforced in the CPUID // encoding, but is true on all known x86 processors. CacheLine uint32 } // FlagsString prints out supported CPU flags. If cpuinfoOnly is true, it is // equivalent to the "flags" field in /proc/cpuinfo. func (fs *FeatureSet) FlagsString(cpuinfoOnly bool) string { var s []string for _, b := range linuxBlockOrder { for i := 0; i < blockSize; i++ { if f := featureID(b, i); fs.Set[f] { if fstr := f.flagString(cpuinfoOnly); fstr != "" { s = append(s, fstr) } } } } return strings.Join(s, " ") } // WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is // a minimal /proc/cpuinfo, it is missing some fields like "microcode" that are // not always printed in Linux. The bogomips field is simply made up. func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) { fmt.Fprintf(b, "processor\t: %d\n", cpu) fmt.Fprintf(b, "vendor_id\t: %s\n", fs.VendorID) fmt.Fprintf(b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family) fmt.Fprintf(b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model) fmt.Fprintf(b, "model name\t: %s\n", "unknown") // Unknown for now. fmt.Fprintf(b, "stepping\t: %s\n", "unknown") // Unknown for now. fmt.Fprintf(b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz) fmt.Fprintln(b, "fpu\t\t: yes") fmt.Fprintln(b, "fpu_exception\t: yes") fmt.Fprintf(b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID. fmt.Fprintln(b, "wp\t\t: yes") fmt.Fprintf(b, "flags\t\t: %s\n", fs.FlagsString(true)) fmt.Fprintf(b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway. fmt.Fprintf(b, "clflush size\t: %d\n", fs.CacheLine) fmt.Fprintf(b, "cache_alignment\t: %d\n", fs.CacheLine) fmt.Fprintf(b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48) fmt.Fprintln(b, "power management:") // This is always here, but can be blank. fmt.Fprintln(b, "") // The /proc/cpuinfo file ends with an extra newline. } const ( amdVendorID = "AuthenticAMD" intelVendorID = "GenuineIntel" ) // AMD returns true if fs describes an AMD CPU. func (fs *FeatureSet) AMD() bool { return fs.VendorID == amdVendorID } // Intel returns true if fs describes an Intel CPU. func (fs *FeatureSet) Intel() bool { return fs.VendorID == intelVendorID } // ErrIncompatible is returned by FeatureSet.HostCompatible if fs is not a // subset of the host feature set. type ErrIncompatible struct { message string } // Error implements error. func (e ErrIncompatible) Error() string { return e.message } // CheckHostCompatible returns nil if fs is a subset of the host feature set. func (fs *FeatureSet) CheckHostCompatible() error { hfs := HostFeatureSet() if diff := fs.Subtract(hfs); diff != nil { return ErrIncompatible{fmt.Sprintf("CPU feature set %v incompatible with host feature set %v (missing: %v)", fs.FlagsString(false), hfs.FlagsString(false), diff)} } // The size of a cache line must match, as it is critical to correctly // utilizing CLFLUSH. Other cache properties are allowed to change, as // they are not important to correctness. if fs.CacheLine != hfs.CacheLine { return ErrIncompatible{fmt.Sprintf("CPU cache line size %d incompatible with host cache line size %d", fs.CacheLine, hfs.CacheLine)} } return nil } // Helper to convert 3 regs into 12-byte vendor ID. func vendorIDFromRegs(bx, cx, dx uint32) string { bytes := make([]byte, 0, 12) for i := uint(0); i < 4; i++ { b := byte(bx >> (i * 8)) bytes = append(bytes, b) } for i := uint(0); i < 4; i++ { b := byte(dx >> (i * 8)) bytes = append(bytes, b) } for i := uint(0); i < 4; i++ { b := byte(cx >> (i * 8)) bytes = append(bytes, b) } return string(bytes) } // ExtendedStateSize returns the number of bytes needed to save the "extended // state" for this processor and the boundary it must be aligned to. Extended // state includes floating point registers, and other cpu state that's not // associated with the normal task context. // // Note: We can save some space here with an optimization where we use a // smaller chunk of memory depending on features that are actually enabled. // Currently we just use the largest possible size for simplicity (which is // about 2.5K worst case, with avx512). func (fs *FeatureSet) ExtendedStateSize() (size, align uint) { if fs.UseXsave() { // Leaf 0 of xsaveinfo function returns the size for currently // enabled xsave features in ebx, the maximum size if all valid // features are saved with xsave in ecx, and valid XCR0 bits in // edx:eax. _, _, maxSize, _ := HostID(uint32(xSaveInfo), 0) return uint(maxSize), 64 } // If we don't support xsave, we fall back to fxsave, which requires // 512 bytes aligned to 16 bytes. return 512, 16 } // ValidXCR0Mask returns the bits that may be set to 1 in control register // XCR0. func (fs *FeatureSet) ValidXCR0Mask() uint64 { if !fs.UseXsave() { return 0 } eax, _, _, edx := HostID(uint32(xSaveInfo), 0) return uint64(edx)<<32 | uint64(eax) } // vendorIDRegs returns the 3 register values used to construct the 12-byte // vendor ID string for eax=0. func (fs *FeatureSet) vendorIDRegs() (bx, dx, cx uint32) { for i := uint(0); i < 4; i++ { bx |= uint32(fs.VendorID[i]) << (i * 8) } for i := uint(0); i < 4; i++ { dx |= uint32(fs.VendorID[i+4]) << (i * 8) } for i := uint(0); i < 4; i++ { cx |= uint32(fs.VendorID[i+8]) << (i * 8) } return } // signature returns the signature dword that's returned in eax when eax=1. func (fs *FeatureSet) signature() uint32 { var s uint32 s |= uint32(fs.SteppingID & 0xf) s |= uint32(fs.Model&0xf) << 4 s |= uint32(fs.Family&0xf) << 8 s |= uint32(fs.ProcessorType&0x3) << 12 s |= uint32(fs.ExtendedModel&0xf) << 16 s |= uint32(fs.ExtendedFamily&0xff) << 20 return s } // Helper to deconstruct signature dword. func signatureSplit(v uint32) (ef, em, pt, f, m, sid uint8) { sid = uint8(v & 0xf) m = uint8(v>>4) & 0xf f = uint8(v>>8) & 0xf pt = uint8(v>>12) & 0x3 em = uint8(v>>16) & 0xf ef = uint8(v >> 20) return } // Helper to convert blockwise feature bit masks into a set of features. Masks // must be provided in order for each block, without skipping them. If a block // does not matter for this feature set, 0 is specified. func setFromBlockMasks(blocks ...uint32) map[Feature]bool { s := make(map[Feature]bool) for b, blockMask := range blocks { for i := 0; i < blockSize; i++ { if blockMask&1 != 0 { s[featureID(block(b), i)] = true } blockMask >>= 1 } } return s } // blockMask returns the 32-bit mask associated with a block of features. func (fs *FeatureSet) blockMask(b block) uint32 { var mask uint32 for i := 0; i < blockSize; i++ { if fs.Set[featureID(b, i)] { mask |= 1 << uint(i) } } return mask } // Remove removes a Feature from a FeatureSet. It ignores features // that are not in the FeatureSet. func (fs *FeatureSet) Remove(feature Feature) { delete(fs.Set, feature) } // Add adds a Feature to a FeatureSet. It ignores duplicate features. func (fs *FeatureSet) Add(feature Feature) { fs.Set[feature] = true } // HasFeature tests whether or not a feature is in the given feature set. func (fs *FeatureSet) HasFeature(feature Feature) bool { return fs.Set[feature] } // Subtract returns the features present in fs that are not present in other. // If all features in fs are present in other, Subtract returns nil. func (fs *FeatureSet) Subtract(other *FeatureSet) (diff map[Feature]bool) { for f := range fs.Set { if !other.Set[f] { if diff == nil { diff = make(map[Feature]bool) } diff[f] = true } } return } // EmulateID emulates a cpuid instruction based on the feature set. func (fs *FeatureSet) EmulateID(origAx, origCx uint32) (ax, bx, cx, dx uint32) { switch cpuidFunction(origAx) { case vendorID: ax = uint32(xSaveInfo) // 0xd (xSaveInfo) is the highest function we support. bx, dx, cx = fs.vendorIDRegs() case featureInfo: // CLFLUSH line size is encoded in quadwords. Other fields in bx unsupported. bx = (fs.CacheLine / 8) << 8 cx = fs.blockMask(block(0)) dx = fs.blockMask(block(1)) ax = fs.signature() case intelCacheDescriptors: if !fs.Intel() { // Reserved on non-Intel. return 0, 0, 0, 0 } // "The least-significant byte in register EAX (register AL) // will always return 01H. Software should ignore this value // and not interpret it as an informational descriptor." - SDM // // We only support reporting cache parameters via // intelDeterministicCacheParams; report as much here. // // We do not support exposing TLB information at all. ax = 1 | (uint32(intelNoCacheDescriptor) << 8) case intelDeterministicCacheParams: if !fs.Intel() { // Reserved on non-Intel. return 0, 0, 0, 0 } // cx is the index of the cache to describe. if int(origCx) >= len(fs.Caches) { return uint32(cacheNull), 0, 0, 0 } c := fs.Caches[origCx] ax = uint32(c.Type) ax |= c.Level << 5 ax |= 1 << 8 // Always claim the cache is "self-initializing". if c.FullyAssociative { ax |= 1 << 9 } // Processor topology not supported. bx = fs.CacheLine - 1 bx |= (c.Partitions - 1) << 12 bx |= (c.Ways - 1) << 22 cx = c.Sets - 1 if !c.InvalidateHierarchical { dx |= 1 } if c.Inclusive { dx |= 1 << 1 } if !c.DirectMapped { dx |= 1 << 2 } case xSaveInfo: if !fs.UseXsave() { return 0, 0, 0, 0 } return HostID(uint32(xSaveInfo), origCx) case extendedFeatureInfo: if origCx != 0 { break // Only leaf 0 is supported. } bx = fs.blockMask(block(2)) cx = fs.blockMask(block(3)) case extendedFunctionInfo: // We only support showing the extended features. ax = uint32(extendedFeatures) cx = 0 case extendedFeatures: cx = fs.blockMask(block(5)) dx = fs.blockMask(block(6)) if fs.AMD() { // AMD duplicates some block 1 features in block 6. dx |= fs.blockMask(block(1)) & block6DuplicateMask } } return } // UseXsave returns the choice of fp state saving instruction. func (fs *FeatureSet) UseXsave() bool { return fs.HasFeature(X86FeatureXSAVE) && fs.HasFeature(X86FeatureOSXSAVE) } // UseXsaveopt returns true if 'fs' supports the "xsaveopt" instruction. func (fs *FeatureSet) UseXsaveopt() bool { return fs.UseXsave() && fs.HasFeature(X86FeatureXSAVEOPT) } // HostID executes a native CPUID instruction. func HostID(axArg, cxArg uint32) (ax, bx, cx, dx uint32) // HostFeatureSet uses cpuid to get host values and construct a feature set // that matches that of the host machine. Note that there are several places // where there appear to be some unnecessary assignments between register names // (ax, bx, cx, or dx) and featureBlockN variables. This is to explicitly show // where the different feature blocks come from, to make the code easier to // inspect and read. func HostFeatureSet() *FeatureSet { // eax=0 gets max supported feature and vendor ID. _, bx, cx, dx := HostID(0, 0) vendorID := vendorIDFromRegs(bx, cx, dx) // eax=1 gets basic features in ecx:edx. ax, bx, cx, dx := HostID(1, 0) featureBlock0 := cx featureBlock1 := dx ef, em, pt, f, m, sid := signatureSplit(ax) cacheLine := 8 * (bx >> 8) & 0xff // eax=4, ecx=i gets details about cache index i. Only supported on Intel. var caches []Cache if vendorID == intelVendorID { // ecx selects the cache index until a null type is returned. for i := uint32(0); ; i++ { ax, bx, cx, dx := HostID(4, i) t := CacheType(ax & 0xf) if t == cacheNull { break } lineSize := (bx & 0xfff) + 1 if lineSize != cacheLine { panic(fmt.Sprintf("Mismatched cache line size: %d vs %d", lineSize, cacheLine)) } caches = append(caches, Cache{ Type: t, Level: (ax >> 5) & 0x7, FullyAssociative: ((ax >> 9) & 1) == 1, Partitions: ((bx >> 12) & 0x3ff) + 1, Ways: ((bx >> 22) & 0x3ff) + 1, Sets: cx + 1, InvalidateHierarchical: (dx & 1) == 0, Inclusive: ((dx >> 1) & 1) == 1, DirectMapped: ((dx >> 2) & 1) == 0, }) } } // eax=7, ecx=0 gets extended features in ecx:ebx. _, bx, cx, _ = HostID(7, 0) featureBlock2 := bx featureBlock3 := cx // Leaf 0xd is supported only if CPUID.1:ECX.XSAVE[bit 26] is set. var featureBlock4 uint32 if (featureBlock0 & (1 << 26)) != 0 { featureBlock4, _, _, _ = HostID(uint32(xSaveInfo), 1) } // eax=0x80000000 gets supported extended levels. We use this to // determine if there are any non-zero block 4 or block 6 bits to find. var featureBlock5, featureBlock6 uint32 if ax, _, _, _ := HostID(uint32(extendedFunctionInfo), 0); ax >= uint32(extendedFeatures) { // eax=0x80000001 gets AMD added feature bits. _, _, cx, dx = HostID(uint32(extendedFeatures), 0) featureBlock5 = cx // Ignore features duplicated from block 1 on AMD. These bits // are reserved on Intel. featureBlock6 = dx &^ block6DuplicateMask } set := setFromBlockMasks(featureBlock0, featureBlock1, featureBlock2, featureBlock3, featureBlock4, featureBlock5, featureBlock6) return &FeatureSet{ Set: set, VendorID: vendorID, ExtendedFamily: ef, ExtendedModel: em, ProcessorType: pt, Family: f, Model: m, SteppingID: sid, CacheLine: cacheLine, Caches: caches, } } // Reads max cpu frequency from host /proc/cpuinfo. Must run before // whitelisting. This value is used to create the fake /proc/cpuinfo from a // FeatureSet. func initCPUFreq() { cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo") if err != nil { // Leave it as 0... The standalone VDSO bails out in the same // way. log.Warningf("Could not read /proc/cpuinfo: %v", err) return } cpuinfo := string(cpuinfob) // We get the value straight from host /proc/cpuinfo. On machines with // frequency scaling enabled, this will only get the current value // which will likely be inaccurate. This is fine on machines with // frequency scaling disabled. for _, line := range strings.Split(cpuinfo, "\n") { if strings.Contains(line, "cpu MHz") { splitMHz := strings.Split(line, ":") if len(splitMHz) < 2 { log.Warningf("Could not read /proc/cpuinfo: malformed cpu MHz line") return } // If there was a problem, leave cpuFreqMHz as 0. var err error cpuFreqMHz, err = strconv.ParseFloat(strings.TrimSpace(splitMHz[1]), 64) if err != nil { log.Warningf("Could not parse cpu MHz value %v: %v", splitMHz[1], err) cpuFreqMHz = 0 return } return } } log.Warningf("Could not parse /proc/cpuinfo, it is empty or does not contain cpu MHz") } func initFeaturesFromString() { for f, s := range x86FeatureStrings { x86FeaturesFromString[s] = f } for f, s := range x86FeatureParseOnlyStrings { x86FeaturesFromString[s] = f } } func init() { // initCpuFreq must be run before whitelists are enabled. initCPUFreq() initFeaturesFromString() }