summaryrefslogtreecommitdiffhomepage
path: root/pkg/cpuid/cpuid_x86.go
blob: c236b942d3e55fe30024b77979daf119653ab9eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build 386 || amd64
// +build 386 amd64

package cpuid

import (
	"bytes"
	"fmt"
	"io/ioutil"
	"strconv"
	"strings"

	"gvisor.dev/gvisor/pkg/log"
)

// Common references for CPUID leaves and bits:
//
// Intel:
//   * Intel SDM Volume 2, Chapter 3.2 "CPUID" (more up-to-date)
//   * Intel Application Note 485 (more detailed)
//
// AMD:
//   * AMD64 APM Volume 3, Appendix 3 "Obtaining Processor Information ..."

// block is a collection of 32 Feature bits.
type block int

const blockSize = 32

// Feature bits are numbered according to "blocks". Each block is 32 bits, and
// feature bits from the same source (cpuid leaf/level) are in the same block.
func featureID(b block, bit int) Feature {
	return Feature(32*int(b) + bit)
}

// Block 0 constants are all of the "basic" feature bits returned by a cpuid in
// ecx with eax=1.
const (
	X86FeatureSSE3 Feature = iota
	X86FeaturePCLMULDQ
	X86FeatureDTES64
	X86FeatureMONITOR
	X86FeatureDSCPL
	X86FeatureVMX
	X86FeatureSMX
	X86FeatureEST
	X86FeatureTM2
	X86FeatureSSSE3 // Not a typo, "supplemental" SSE3.
	X86FeatureCNXTID
	X86FeatureSDBG
	X86FeatureFMA
	X86FeatureCX16
	X86FeatureXTPR
	X86FeaturePDCM
	_ // ecx bit 16 is reserved.
	X86FeaturePCID
	X86FeatureDCA
	X86FeatureSSE4_1
	X86FeatureSSE4_2
	X86FeatureX2APIC
	X86FeatureMOVBE
	X86FeaturePOPCNT
	X86FeatureTSCD
	X86FeatureAES
	X86FeatureXSAVE
	X86FeatureOSXSAVE
	X86FeatureAVX
	X86FeatureF16C
	X86FeatureRDRAND
	X86FeatureHypervisor
)

// Block 1 constants are all of the "basic" feature bits returned by a cpuid in
// edx with eax=1.
const (
	X86FeatureFPU Feature = 32 + iota
	X86FeatureVME
	X86FeatureDE
	X86FeaturePSE
	X86FeatureTSC
	X86FeatureMSR
	X86FeaturePAE
	X86FeatureMCE
	X86FeatureCX8
	X86FeatureAPIC
	_ // edx bit 10 is reserved.
	X86FeatureSEP
	X86FeatureMTRR
	X86FeaturePGE
	X86FeatureMCA
	X86FeatureCMOV
	X86FeaturePAT
	X86FeaturePSE36
	X86FeaturePSN
	X86FeatureCLFSH
	_ // edx bit 20 is reserved.
	X86FeatureDS
	X86FeatureACPI
	X86FeatureMMX
	X86FeatureFXSR
	X86FeatureSSE
	X86FeatureSSE2
	X86FeatureSS
	X86FeatureHTT
	X86FeatureTM
	X86FeatureIA64
	X86FeaturePBE
)

// Block 2 bits are the "structured extended" features returned in ebx for
// eax=7, ecx=0.
const (
	X86FeatureFSGSBase Feature = 2*32 + iota
	X86FeatureTSC_ADJUST
	_ // ebx bit 2 is reserved.
	X86FeatureBMI1
	X86FeatureHLE
	X86FeatureAVX2
	X86FeatureFDP_EXCPTN_ONLY
	X86FeatureSMEP
	X86FeatureBMI2
	X86FeatureERMS
	X86FeatureINVPCID
	X86FeatureRTM
	X86FeatureCQM
	X86FeatureFPCSDS
	X86FeatureMPX
	X86FeatureRDT
	X86FeatureAVX512F
	X86FeatureAVX512DQ
	X86FeatureRDSEED
	X86FeatureADX
	X86FeatureSMAP
	X86FeatureAVX512IFMA
	X86FeaturePCOMMIT
	X86FeatureCLFLUSHOPT
	X86FeatureCLWB
	X86FeatureIPT // Intel processor trace.
	X86FeatureAVX512PF
	X86FeatureAVX512ER
	X86FeatureAVX512CD
	X86FeatureSHA
	X86FeatureAVX512BW
	X86FeatureAVX512VL
)

// Block 3 bits are the "extended" features returned in ecx for eax=7, ecx=0.
const (
	X86FeaturePREFETCHWT1 Feature = 3*32 + iota
	X86FeatureAVX512VBMI
	X86FeatureUMIP
	X86FeaturePKU
	X86FeatureOSPKE
	X86FeatureWAITPKG
	X86FeatureAVX512_VBMI2
	_ // ecx bit 7 is reserved
	X86FeatureGFNI
	X86FeatureVAES
	X86FeatureVPCLMULQDQ
	X86FeatureAVX512_VNNI
	X86FeatureAVX512_BITALG
	X86FeatureTME
	X86FeatureAVX512_VPOPCNTDQ
	_ // ecx bit 15 is reserved
	X86FeatureLA57
	// ecx bits 17-21 are reserved
	_
	_
	_
	_
	_
	X86FeatureRDPID
	// ecx bits 23-24 are reserved
	_
	_
	X86FeatureCLDEMOTE
	_ // ecx bit 26 is reserved
	X86FeatureMOVDIRI
	X86FeatureMOVDIR64B
)

// Block 4 constants are for xsave capabilities in CPUID.(EAX=0DH,ECX=01H):EAX.
// The CPUID leaf is available only if 'X86FeatureXSAVE' is present.
const (
	X86FeatureXSAVEOPT Feature = 4*32 + iota
	X86FeatureXSAVEC
	X86FeatureXGETBV1
	X86FeatureXSAVES
	// EAX[31:4] are reserved.
)

// Block 5 constants are the extended feature bits in
// CPUID.(EAX=0x80000001):ECX.
const (
	X86FeatureLAHF64 Feature = 5*32 + iota
	X86FeatureCMP_LEGACY
	X86FeatureSVM
	X86FeatureEXTAPIC
	X86FeatureCR8_LEGACY
	X86FeatureLZCNT
	X86FeatureSSE4A
	X86FeatureMISALIGNSSE
	X86FeaturePREFETCHW
	X86FeatureOSVW
	X86FeatureIBS
	X86FeatureXOP
	X86FeatureSKINIT
	X86FeatureWDT
	_ // ecx bit 14 is reserved.
	X86FeatureLWP
	X86FeatureFMA4
	X86FeatureTCE
	_ // ecx bit 18 is reserved.
	_ // ecx bit 19 is reserved.
	_ // ecx bit 20 is reserved.
	X86FeatureTBM
	X86FeatureTOPOLOGY
	X86FeaturePERFCTR_CORE
	X86FeaturePERFCTR_NB
	_ // ecx bit 25 is reserved.
	X86FeatureBPEXT
	X86FeaturePERFCTR_TSC
	X86FeaturePERFCTR_LLC
	X86FeatureMWAITX
	// TODO(b/152776797): Some CPUs set this but it is not documented anywhere.
	X86FeatureBlock5Bit30
	_ // ecx bit 31 is reserved.
)

// Block 6 constants are the extended feature bits in
// CPUID.(EAX=0x80000001):EDX.
//
// These are sparse, and so the bit positions are assigned manually.
const (
	// On AMD, EDX[24:23] | EDX[17:12] | EDX[9:0] are duplicate features
	// also defined in block 1 (in identical bit positions). Those features
	// are not listed here.
	block6DuplicateMask = 0x183f3ff

	X86FeatureSYSCALL  Feature = 6*32 + 11
	X86FeatureNX       Feature = 6*32 + 20
	X86FeatureMMXEXT   Feature = 6*32 + 22
	X86FeatureFXSR_OPT Feature = 6*32 + 25
	X86FeatureGBPAGES  Feature = 6*32 + 26
	X86FeatureRDTSCP   Feature = 6*32 + 27
	X86FeatureLM       Feature = 6*32 + 29
	X86Feature3DNOWEXT Feature = 6*32 + 30
	X86Feature3DNOW    Feature = 6*32 + 31
)

// linuxBlockOrder defines the order in which linux organizes the feature
// blocks. Linux also tracks feature bits in 32-bit blocks, but in an order
// which doesn't match well here, so for the /proc/cpuinfo generation we simply
// re-map the blocks to Linux's ordering and then go through the bits in each
// block.
var linuxBlockOrder = []block{1, 6, 0, 5, 2, 4, 3}

// To make emulation of /proc/cpuinfo easy, these names match the names of the
// basic features in Linux defined in arch/x86/kernel/cpu/capflags.c.
var x86FeatureStrings = map[Feature]string{
	// Block 0.
	X86FeatureSSE3:     "pni",
	X86FeaturePCLMULDQ: "pclmulqdq",
	X86FeatureDTES64:   "dtes64",
	X86FeatureMONITOR:  "monitor",
	X86FeatureDSCPL:    "ds_cpl",
	X86FeatureVMX:      "vmx",
	X86FeatureSMX:      "smx",
	X86FeatureEST:      "est",
	X86FeatureTM2:      "tm2",
	X86FeatureSSSE3:    "ssse3",
	X86FeatureCNXTID:   "cid",
	X86FeatureSDBG:     "sdbg",
	X86FeatureFMA:      "fma",
	X86FeatureCX16:     "cx16",
	X86FeatureXTPR:     "xtpr",
	X86FeaturePDCM:     "pdcm",
	X86FeaturePCID:     "pcid",
	X86FeatureDCA:      "dca",
	X86FeatureSSE4_1:   "sse4_1",
	X86FeatureSSE4_2:   "sse4_2",
	X86FeatureX2APIC:   "x2apic",
	X86FeatureMOVBE:    "movbe",
	X86FeaturePOPCNT:   "popcnt",
	X86FeatureTSCD:     "tsc_deadline_timer",
	X86FeatureAES:      "aes",
	X86FeatureXSAVE:    "xsave",
	X86FeatureAVX:      "avx",
	X86FeatureF16C:     "f16c",
	X86FeatureRDRAND:   "rdrand",

	// Block 1.
	X86FeatureFPU:   "fpu",
	X86FeatureVME:   "vme",
	X86FeatureDE:    "de",
	X86FeaturePSE:   "pse",
	X86FeatureTSC:   "tsc",
	X86FeatureMSR:   "msr",
	X86FeaturePAE:   "pae",
	X86FeatureMCE:   "mce",
	X86FeatureCX8:   "cx8",
	X86FeatureAPIC:  "apic",
	X86FeatureSEP:   "sep",
	X86FeatureMTRR:  "mtrr",
	X86FeaturePGE:   "pge",
	X86FeatureMCA:   "mca",
	X86FeatureCMOV:  "cmov",
	X86FeaturePAT:   "pat",
	X86FeaturePSE36: "pse36",
	X86FeaturePSN:   "pn",
	X86FeatureCLFSH: "clflush",
	X86FeatureDS:    "dts",
	X86FeatureACPI:  "acpi",
	X86FeatureMMX:   "mmx",
	X86FeatureFXSR:  "fxsr",
	X86FeatureSSE:   "sse",
	X86FeatureSSE2:  "sse2",
	X86FeatureSS:    "ss",
	X86FeatureHTT:   "ht",
	X86FeatureTM:    "tm",
	X86FeatureIA64:  "ia64",
	X86FeaturePBE:   "pbe",

	// Block 2.
	X86FeatureFSGSBase:   "fsgsbase",
	X86FeatureTSC_ADJUST: "tsc_adjust",
	X86FeatureBMI1:       "bmi1",
	X86FeatureHLE:        "hle",
	X86FeatureAVX2:       "avx2",
	X86FeatureSMEP:       "smep",
	X86FeatureBMI2:       "bmi2",
	X86FeatureERMS:       "erms",
	X86FeatureINVPCID:    "invpcid",
	X86FeatureRTM:        "rtm",
	X86FeatureCQM:        "cqm",
	X86FeatureMPX:        "mpx",
	X86FeatureRDT:        "rdt_a",
	X86FeatureAVX512F:    "avx512f",
	X86FeatureAVX512DQ:   "avx512dq",
	X86FeatureRDSEED:     "rdseed",
	X86FeatureADX:        "adx",
	X86FeatureSMAP:       "smap",
	X86FeatureCLWB:       "clwb",
	X86FeatureAVX512PF:   "avx512pf",
	X86FeatureAVX512ER:   "avx512er",
	X86FeatureAVX512CD:   "avx512cd",
	X86FeatureSHA:        "sha_ni",
	X86FeatureAVX512BW:   "avx512bw",
	X86FeatureAVX512VL:   "avx512vl",

	// Block 3.
	X86FeatureAVX512VBMI:       "avx512vbmi",
	X86FeatureUMIP:             "umip",
	X86FeaturePKU:              "pku",
	X86FeatureOSPKE:            "ospke",
	X86FeatureWAITPKG:          "waitpkg",
	X86FeatureAVX512_VBMI2:     "avx512_vbmi2",
	X86FeatureGFNI:             "gfni",
	X86FeatureVAES:             "vaes",
	X86FeatureVPCLMULQDQ:       "vpclmulqdq",
	X86FeatureAVX512_VNNI:      "avx512_vnni",
	X86FeatureAVX512_BITALG:    "avx512_bitalg",
	X86FeatureTME:              "tme",
	X86FeatureAVX512_VPOPCNTDQ: "avx512_vpopcntdq",
	X86FeatureLA57:             "la57",
	X86FeatureRDPID:            "rdpid",
	X86FeatureCLDEMOTE:         "cldemote",
	X86FeatureMOVDIRI:          "movdiri",
	X86FeatureMOVDIR64B:        "movdir64b",

	// Block 4.
	X86FeatureXSAVEOPT: "xsaveopt",
	X86FeatureXSAVEC:   "xsavec",
	X86FeatureXGETBV1:  "xgetbv1",
	X86FeatureXSAVES:   "xsaves",

	// Block 5.
	X86FeatureLAHF64:       "lahf_lm", // LAHF/SAHF in long mode
	X86FeatureCMP_LEGACY:   "cmp_legacy",
	X86FeatureSVM:          "svm",
	X86FeatureEXTAPIC:      "extapic",
	X86FeatureCR8_LEGACY:   "cr8_legacy",
	X86FeatureLZCNT:        "abm", // Advanced bit manipulation
	X86FeatureSSE4A:        "sse4a",
	X86FeatureMISALIGNSSE:  "misalignsse",
	X86FeaturePREFETCHW:    "3dnowprefetch",
	X86FeatureOSVW:         "osvw",
	X86FeatureIBS:          "ibs",
	X86FeatureXOP:          "xop",
	X86FeatureSKINIT:       "skinit",
	X86FeatureWDT:          "wdt",
	X86FeatureLWP:          "lwp",
	X86FeatureFMA4:         "fma4",
	X86FeatureTCE:          "tce",
	X86FeatureTBM:          "tbm",
	X86FeatureTOPOLOGY:     "topoext",
	X86FeaturePERFCTR_CORE: "perfctr_core",
	X86FeaturePERFCTR_NB:   "perfctr_nb",
	X86FeatureBPEXT:        "bpext",
	X86FeaturePERFCTR_TSC:  "ptsc",
	X86FeaturePERFCTR_LLC:  "perfctr_llc",
	X86FeatureMWAITX:       "mwaitx",

	// Block 6.
	X86FeatureSYSCALL:  "syscall",
	X86FeatureNX:       "nx",
	X86FeatureMMXEXT:   "mmxext",
	X86FeatureFXSR_OPT: "fxsr_opt",
	X86FeatureGBPAGES:  "pdpe1gb",
	X86FeatureRDTSCP:   "rdtscp",
	X86FeatureLM:       "lm",
	X86Feature3DNOWEXT: "3dnowext",
	X86Feature3DNOW:    "3dnow",
}

// These flags are parse only---they can be used for setting / unsetting the
// flags, but will not get printed out in /proc/cpuinfo.
var x86FeatureParseOnlyStrings = map[Feature]string{
	// Block 0.
	X86FeatureOSXSAVE: "osxsave",

	// Block 2.
	X86FeatureFDP_EXCPTN_ONLY: "fdp_excptn_only",
	X86FeatureFPCSDS:          "fpcsds",
	X86FeatureIPT:             "pt",
	X86FeatureCLFLUSHOPT:      "clfushopt",

	// Block 3.
	X86FeaturePREFETCHWT1: "prefetchwt1",

	// Block 5.
	X86FeatureBlock5Bit30: "block5_bit30",
}

// intelCacheDescriptors describe the caches and TLBs on the system. They are
// returned in the registers for eax=2. Intel only.
type intelCacheDescriptor uint8

// Valid cache/TLB descriptors. All descriptors can be found in Intel SDM Vol.
// 2, Ch. 3.2, "CPUID", Table 3-12 "Encoding of CPUID Leaf 2 Descriptors".
const (
	intelNullDescriptor    intelCacheDescriptor = 0
	intelNoTLBDescriptor   intelCacheDescriptor = 0xfe
	intelNoCacheDescriptor intelCacheDescriptor = 0xff

	// Most descriptors omitted for brevity as they are currently unused.
)

// CacheType describes the type of a cache, as returned in eax[4:0] for eax=4.
type CacheType uint8

const (
	// cacheNull indicates that there are no more entries.
	cacheNull CacheType = iota

	// CacheData is a data cache.
	CacheData

	// CacheInstruction is an instruction cache.
	CacheInstruction

	// CacheUnified is a unified instruction and data cache.
	CacheUnified
)

// Cache describes the parameters of a single cache on the system.
//
// +stateify savable
type Cache struct {
	// Level is the hierarchical level of this cache (L1, L2, etc).
	Level uint32

	// Type is the type of cache.
	Type CacheType

	// FullyAssociative indicates that entries may be placed in any block.
	FullyAssociative bool

	// Partitions is the number of physical partitions in the cache.
	Partitions uint32

	// Ways is the number of ways of associativity in the cache.
	Ways uint32

	// Sets is the number of sets in the cache.
	Sets uint32

	// InvalidateHierarchical indicates that WBINVD/INVD from threads
	// sharing this cache acts upon lower level caches for threads sharing
	// this cache.
	InvalidateHierarchical bool

	// Inclusive indicates that this cache is inclusive of lower cache
	// levels.
	Inclusive bool

	// DirectMapped indicates that this cache is directly mapped from
	// address, rather than using a hash function.
	DirectMapped bool
}

// Just a way to wrap cpuid function numbers.
type cpuidFunction uint32

// The constants below are the lower or "standard" cpuid functions, ordered as
// defined by the hardware.
const (
	vendorID                      cpuidFunction = iota // Returns vendor ID and largest standard function.
	featureInfo                                        // Returns basic feature bits and processor signature.
	intelCacheDescriptors                              // Returns list of cache descriptors. Intel only.
	intelSerialNumber                                  // Returns processor serial number (obsolete on new hardware). Intel only.
	intelDeterministicCacheParams                      // Returns deterministic cache information. Intel only.
	monitorMwaitParams                                 // Returns information about monitor/mwait instructions.
	powerParams                                        // Returns information about power management and thermal sensors.
	extendedFeatureInfo                                // Returns extended feature bits.
	_                                                  // Function 0x8 is reserved.
	intelDCAParams                                     // Returns direct cache access information. Intel only.
	intelPMCInfo                                       // Returns information about performance monitoring features. Intel only.
	intelX2APICInfo                                    // Returns core/logical processor topology. Intel only.
	_                                                  // Function 0xc is reserved.
	xSaveInfo                                          // Returns information about extended state management.
)

// The "extended" functions start at 0x80000000.
const (
	extendedFunctionInfo cpuidFunction = 0x80000000 + iota // Returns highest available extended function in eax.
	extendedFeatures                                       // Returns some extended feature bits in edx and ecx.
)

// These are the extended floating point state features. They are used to
// enumerate floating point features in XCR0, XSTATE_BV, etc.
const (
	XSAVEFeatureX87         = 1 << 0
	XSAVEFeatureSSE         = 1 << 1
	XSAVEFeatureAVX         = 1 << 2
	XSAVEFeatureBNDREGS     = 1 << 3
	XSAVEFeatureBNDCSR      = 1 << 4
	XSAVEFeatureAVX512op    = 1 << 5
	XSAVEFeatureAVX512zmm0  = 1 << 6
	XSAVEFeatureAVX512zmm16 = 1 << 7
	XSAVEFeaturePKRU        = 1 << 9
)

var cpuFreqMHz float64

// x86FeaturesFromString includes features from x86FeatureStrings and
// x86FeatureParseOnlyStrings.
var x86FeaturesFromString = make(map[string]Feature)

// FeatureFromString returns the Feature associated with the given feature
// string plus a bool to indicate if it could find the feature.
func FeatureFromString(s string) (Feature, bool) {
	f, b := x86FeaturesFromString[s]
	return f, b
}

// String implements fmt.Stringer.
func (f Feature) String() string {
	if s := f.flagString(false); s != "" {
		return s
	}

	block := int(f) / 32
	bit := int(f) % 32
	return fmt.Sprintf("<cpuflag %d; block %d bit %d>", f, block, bit)
}

func (f Feature) flagString(cpuinfoOnly bool) string {
	if s, ok := x86FeatureStrings[f]; ok {
		return s
	}
	if !cpuinfoOnly {
		return x86FeatureParseOnlyStrings[f]
	}
	return ""
}

// FeatureSet is a set of Features for a CPU.
//
// +stateify savable
type FeatureSet struct {
	// Set is the set of features that are enabled in this FeatureSet.
	Set map[Feature]bool

	// VendorID is the 12-char string returned in ebx:edx:ecx for eax=0.
	VendorID string

	// ExtendedFamily is part of the processor signature.
	ExtendedFamily uint8

	// ExtendedModel is part of the processor signature.
	ExtendedModel uint8

	// ProcessorType is part of the processor signature.
	ProcessorType uint8

	// Family is part of the processor signature.
	Family uint8

	// Model is part of the processor signature.
	Model uint8

	// SteppingID is part of the processor signature.
	SteppingID uint8

	// Caches describes the caches on the CPU.
	Caches []Cache

	// CacheLine is the size of a cache line in bytes.
	//
	// All caches use the same line size. This is not enforced in the CPUID
	// encoding, but is true on all known x86 processors.
	CacheLine uint32
}

// Clone returns a copy of fs.
func (fs *FeatureSet) Clone() *FeatureSet {
	fs2 := *fs
	fs2.Set = make(map[Feature]bool)
	for f, b := range fs.Set {
		fs2.Set[f] = b
	}
	fs2.Caches = append([]Cache(nil), fs.Caches...)
	return &fs2
}

// FlagsString prints out supported CPU flags. If cpuinfoOnly is true, it is
// equivalent to the "flags" field in /proc/cpuinfo.
func (fs *FeatureSet) FlagsString(cpuinfoOnly bool) string {
	var s []string
	for _, b := range linuxBlockOrder {
		for i := 0; i < blockSize; i++ {
			if f := featureID(b, i); fs.Set[f] {
				if fstr := f.flagString(cpuinfoOnly); fstr != "" {
					s = append(s, fstr)
				}
			}
		}
	}
	return strings.Join(s, " ")
}

// WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is
// a minimal /proc/cpuinfo, it is missing some fields like "microcode" that are
// not always printed in Linux. The bogomips field is simply made up.
func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) {
	fmt.Fprintf(b, "processor\t: %d\n", cpu)
	fmt.Fprintf(b, "vendor_id\t: %s\n", fs.VendorID)
	fmt.Fprintf(b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family)
	fmt.Fprintf(b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model)
	fmt.Fprintf(b, "model name\t: %s\n", "unknown") // Unknown for now.
	fmt.Fprintf(b, "stepping\t: %s\n", "unknown")   // Unknown for now.
	fmt.Fprintf(b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz)
	fmt.Fprintln(b, "fpu\t\t: yes")
	fmt.Fprintln(b, "fpu_exception\t: yes")
	fmt.Fprintf(b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID.
	fmt.Fprintln(b, "wp\t\t: yes")
	fmt.Fprintf(b, "flags\t\t: %s\n", fs.FlagsString(true))
	fmt.Fprintf(b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway.
	fmt.Fprintf(b, "clflush size\t: %d\n", fs.CacheLine)
	fmt.Fprintf(b, "cache_alignment\t: %d\n", fs.CacheLine)
	fmt.Fprintf(b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48)
	fmt.Fprintln(b, "power management:") // This is always here, but can be blank.
	fmt.Fprintln(b, "")                  // The /proc/cpuinfo file ends with an extra newline.
}

const (
	amdVendorID   = "AuthenticAMD"
	intelVendorID = "GenuineIntel"
)

// AMD returns true if fs describes an AMD CPU.
func (fs *FeatureSet) AMD() bool {
	return fs.VendorID == amdVendorID
}

// Intel returns true if fs describes an Intel CPU.
func (fs *FeatureSet) Intel() bool {
	return fs.VendorID == intelVendorID
}

// CheckHostCompatible returns nil if fs is a subset of the host feature set.
func (fs *FeatureSet) CheckHostCompatible() error {
	hfs := HostFeatureSet()

	if diff := fs.Subtract(hfs); diff != nil {
		return ErrIncompatible{fmt.Sprintf("CPU feature set %v incompatible with host feature set %v (missing: %v)", fs.FlagsString(false), hfs.FlagsString(false), diff)}
	}

	// The size of a cache line must match, as it is critical to correctly
	// utilizing CLFLUSH. Other cache properties are allowed to change, as
	// they are not important to correctness.
	if fs.CacheLine != hfs.CacheLine {
		return ErrIncompatible{fmt.Sprintf("CPU cache line size %d incompatible with host cache line size %d", fs.CacheLine, hfs.CacheLine)}
	}

	return nil
}

// Helper to convert 3 regs into 12-byte vendor ID.
func vendorIDFromRegs(bx, cx, dx uint32) string {
	bytes := make([]byte, 0, 12)
	for i := uint(0); i < 4; i++ {
		b := byte(bx >> (i * 8))
		bytes = append(bytes, b)
	}

	for i := uint(0); i < 4; i++ {
		b := byte(dx >> (i * 8))
		bytes = append(bytes, b)
	}

	for i := uint(0); i < 4; i++ {
		b := byte(cx >> (i * 8))
		bytes = append(bytes, b)
	}
	return string(bytes)
}

var maxXsaveSize = func() uint32 {
	// Leaf 0 of xsaveinfo function returns the size for currently
	// enabled xsave features in ebx, the maximum size if all valid
	// features are saved with xsave in ecx, and valid XCR0 bits in
	// edx:eax.
	//
	// If xSaveInfo isn't supported, cpuid will not fault but will
	// return bogus values.
	_, _, maxXsaveSize, _ := HostID(uint32(xSaveInfo), 0)
	return maxXsaveSize
}()

// ExtendedStateSize returns the number of bytes needed to save the "extended
// state" for this processor and the boundary it must be aligned to. Extended
// state includes floating point registers, and other cpu state that's not
// associated with the normal task context.
//
// Note: We can save some space here with an optimization where we use a
// smaller chunk of memory depending on features that are actually enabled.
// Currently we just use the largest possible size for simplicity (which is
// about 2.5K worst case, with avx512).
func (fs *FeatureSet) ExtendedStateSize() (size, align uint) {
	if fs.UseXsave() {
		return uint(maxXsaveSize), 64
	}

	// If we don't support xsave, we fall back to fxsave, which requires
	// 512 bytes aligned to 16 bytes.
	return 512, 16
}

// ValidXCR0Mask returns the bits that may be set to 1 in control register
// XCR0.
func (fs *FeatureSet) ValidXCR0Mask() uint64 {
	if !fs.UseXsave() {
		return 0
	}
	eax, _, _, edx := HostID(uint32(xSaveInfo), 0)
	return uint64(edx)<<32 | uint64(eax)
}

// vendorIDRegs returns the 3 register values used to construct the 12-byte
// vendor ID string for eax=0.
func (fs *FeatureSet) vendorIDRegs() (bx, dx, cx uint32) {
	for i := uint(0); i < 4; i++ {
		bx |= uint32(fs.VendorID[i]) << (i * 8)
	}

	for i := uint(0); i < 4; i++ {
		dx |= uint32(fs.VendorID[i+4]) << (i * 8)
	}

	for i := uint(0); i < 4; i++ {
		cx |= uint32(fs.VendorID[i+8]) << (i * 8)
	}
	return
}

// signature returns the signature dword that's returned in eax when eax=1.
func (fs *FeatureSet) signature() uint32 {
	var s uint32
	s |= uint32(fs.SteppingID & 0xf)
	s |= uint32(fs.Model&0xf) << 4
	s |= uint32(fs.Family&0xf) << 8
	s |= uint32(fs.ProcessorType&0x3) << 12
	s |= uint32(fs.ExtendedModel&0xf) << 16
	s |= uint32(fs.ExtendedFamily&0xff) << 20
	return s
}

// Helper to deconstruct signature dword.
func signatureSplit(v uint32) (ef, em, pt, f, m, sid uint8) {
	sid = uint8(v & 0xf)
	m = uint8(v>>4) & 0xf
	f = uint8(v>>8) & 0xf
	pt = uint8(v>>12) & 0x3
	em = uint8(v>>16) & 0xf
	ef = uint8(v >> 20)
	return
}

// Helper to convert blockwise feature bit masks into a set of features. Masks
// must be provided in order for each block, without skipping them. If a block
// does not matter for this feature set, 0 is specified.
func setFromBlockMasks(blocks ...uint32) map[Feature]bool {
	s := make(map[Feature]bool)
	for b, blockMask := range blocks {
		for i := 0; i < blockSize; i++ {
			if blockMask&1 != 0 {
				s[featureID(block(b), i)] = true
			}
			blockMask >>= 1
		}
	}
	return s
}

// blockMask returns the 32-bit mask associated with a block of features.
func (fs *FeatureSet) blockMask(b block) uint32 {
	var mask uint32
	for i := 0; i < blockSize; i++ {
		if fs.Set[featureID(b, i)] {
			mask |= 1 << uint(i)
		}
	}
	return mask
}

// Remove removes a Feature from a FeatureSet. It ignores features
// that are not in the FeatureSet.
func (fs *FeatureSet) Remove(feature Feature) {
	delete(fs.Set, feature)
}

// Add adds a Feature to a FeatureSet. It ignores duplicate features.
func (fs *FeatureSet) Add(feature Feature) {
	fs.Set[feature] = true
}

// HasFeature tests whether or not a feature is in the given feature set.
func (fs *FeatureSet) HasFeature(feature Feature) bool {
	return fs.Set[feature]
}

// Subtract returns the features present in fs that are not present in other.
// If all features in fs are present in other, Subtract returns nil.
func (fs *FeatureSet) Subtract(other *FeatureSet) (diff map[Feature]bool) {
	for f := range fs.Set {
		if !other.Set[f] {
			if diff == nil {
				diff = make(map[Feature]bool)
			}
			diff[f] = true
		}
	}

	return
}

// EmulateID emulates a cpuid instruction based on the feature set.
func (fs *FeatureSet) EmulateID(origAx, origCx uint32) (ax, bx, cx, dx uint32) {
	switch cpuidFunction(origAx) {
	case vendorID:
		ax = uint32(xSaveInfo) // 0xd (xSaveInfo) is the highest function we support.
		bx, dx, cx = fs.vendorIDRegs()
	case featureInfo:
		// CLFLUSH line size is encoded in quadwords. Other fields in bx unsupported.
		bx = (fs.CacheLine / 8) << 8
		cx = fs.blockMask(block(0))
		dx = fs.blockMask(block(1))
		ax = fs.signature()
	case intelCacheDescriptors:
		if !fs.Intel() {
			// Reserved on non-Intel.
			return 0, 0, 0, 0
		}

		// "The least-significant byte in register EAX (register AL)
		// will always return 01H. Software should ignore this value
		// and not interpret it as an informational descriptor." - SDM
		//
		// We only support reporting cache parameters via
		// intelDeterministicCacheParams; report as much here.
		//
		// We do not support exposing TLB information at all.
		ax = 1 | (uint32(intelNoCacheDescriptor) << 8)
	case intelDeterministicCacheParams:
		if !fs.Intel() {
			// Reserved on non-Intel.
			return 0, 0, 0, 0
		}

		// cx is the index of the cache to describe.
		if int(origCx) >= len(fs.Caches) {
			return uint32(cacheNull), 0, 0, 0
		}
		c := fs.Caches[origCx]

		ax = uint32(c.Type)
		ax |= c.Level << 5
		ax |= 1 << 8 // Always claim the cache is "self-initializing".
		if c.FullyAssociative {
			ax |= 1 << 9
		}
		// Processor topology not supported.

		bx = fs.CacheLine - 1
		bx |= (c.Partitions - 1) << 12
		bx |= (c.Ways - 1) << 22

		cx = c.Sets - 1

		if !c.InvalidateHierarchical {
			dx |= 1
		}
		if c.Inclusive {
			dx |= 1 << 1
		}
		if !c.DirectMapped {
			dx |= 1 << 2
		}
	case xSaveInfo:
		if !fs.UseXsave() {
			return 0, 0, 0, 0
		}
		return HostID(uint32(xSaveInfo), origCx)
	case extendedFeatureInfo:
		if origCx != 0 {
			break // Only leaf 0 is supported.
		}
		bx = fs.blockMask(block(2))
		cx = fs.blockMask(block(3))
	case extendedFunctionInfo:
		// We only support showing the extended features.
		ax = uint32(extendedFeatures)
		cx = 0
	case extendedFeatures:
		cx = fs.blockMask(block(5))
		dx = fs.blockMask(block(6))
		if fs.AMD() {
			// AMD duplicates some block 1 features in block 6.
			dx |= fs.blockMask(block(1)) & block6DuplicateMask
		}
	}

	return
}

// UseXsave returns the choice of fp state saving instruction.
func (fs *FeatureSet) UseXsave() bool {
	return fs.HasFeature(X86FeatureXSAVE) && fs.HasFeature(X86FeatureOSXSAVE)
}

// UseXsaveopt returns true if 'fs' supports the "xsaveopt" instruction.
func (fs *FeatureSet) UseXsaveopt() bool {
	return fs.UseXsave() && fs.HasFeature(X86FeatureXSAVEOPT)
}

// HostID executes a native CPUID instruction.
func HostID(axArg, cxArg uint32) (ax, bx, cx, dx uint32)

// getHostFeatureSet uses cpuid to get host values and construct a feature set
// that matches that of the host machine. Note that there are several places
// where there appear to be some unnecessary assignments between register names
// (ax, bx, cx, or dx) and featureBlockN variables. This is to explicitly show
// where the different feature blocks come from, to make the code easier to
// inspect and read.
func getHostFeatureSet() *FeatureSet {
	// eax=0 gets max supported feature and vendor ID.
	_, bx, cx, dx := HostID(0, 0)
	vendorID := vendorIDFromRegs(bx, cx, dx)

	// eax=1 gets basic features in ecx:edx.
	ax, bx, cx, dx := HostID(1, 0)
	featureBlock0 := cx
	featureBlock1 := dx
	ef, em, pt, f, m, sid := signatureSplit(ax)
	cacheLine := 8 * (bx >> 8) & 0xff

	// eax=4, ecx=i gets details about cache index i. Only supported on Intel.
	var caches []Cache
	if vendorID == intelVendorID {
		// ecx selects the cache index until a null type is returned.
		for i := uint32(0); ; i++ {
			ax, bx, cx, dx := HostID(4, i)
			t := CacheType(ax & 0xf)
			if t == cacheNull {
				break
			}

			lineSize := (bx & 0xfff) + 1
			if lineSize != cacheLine {
				panic(fmt.Sprintf("Mismatched cache line size: %d vs %d", lineSize, cacheLine))
			}

			caches = append(caches, Cache{
				Type:                   t,
				Level:                  (ax >> 5) & 0x7,
				FullyAssociative:       ((ax >> 9) & 1) == 1,
				Partitions:             ((bx >> 12) & 0x3ff) + 1,
				Ways:                   ((bx >> 22) & 0x3ff) + 1,
				Sets:                   cx + 1,
				InvalidateHierarchical: (dx & 1) == 0,
				Inclusive:              ((dx >> 1) & 1) == 1,
				DirectMapped:           ((dx >> 2) & 1) == 0,
			})
		}
	}

	// eax=7, ecx=0 gets extended features in ecx:ebx.
	_, bx, cx, _ = HostID(7, 0)
	featureBlock2 := bx
	featureBlock3 := cx

	// Leaf 0xd is supported only if CPUID.1:ECX.XSAVE[bit 26] is set.
	var featureBlock4 uint32
	if (featureBlock0 & (1 << 26)) != 0 {
		featureBlock4, _, _, _ = HostID(uint32(xSaveInfo), 1)
	}

	// eax=0x80000000 gets supported extended levels. We use this to
	// determine if there are any non-zero block 4 or block 6 bits to find.
	var featureBlock5, featureBlock6 uint32
	if ax, _, _, _ := HostID(uint32(extendedFunctionInfo), 0); ax >= uint32(extendedFeatures) {
		// eax=0x80000001 gets AMD added feature bits.
		_, _, cx, dx = HostID(uint32(extendedFeatures), 0)
		featureBlock5 = cx
		// Ignore features duplicated from block 1 on AMD. These bits
		// are reserved on Intel.
		featureBlock6 = dx &^ block6DuplicateMask
	}

	set := setFromBlockMasks(featureBlock0, featureBlock1, featureBlock2, featureBlock3, featureBlock4, featureBlock5, featureBlock6)
	return &FeatureSet{
		Set:            set,
		VendorID:       vendorID,
		ExtendedFamily: ef,
		ExtendedModel:  em,
		ProcessorType:  pt,
		Family:         f,
		Model:          m,
		SteppingID:     sid,
		CacheLine:      cacheLine,
		Caches:         caches,
	}
}

// Reads max cpu frequency from host /proc/cpuinfo. Must run before syscall
// filter installation. This value is used to create the fake /proc/cpuinfo
// from a FeatureSet.
func initCPUFreq() {
	cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo")
	if err != nil {
		// Leave it as 0... The standalone VDSO bails out in the same
		// way.
		log.Warningf("Could not read /proc/cpuinfo: %v", err)
		return
	}
	cpuinfo := string(cpuinfob)

	// We get the value straight from host /proc/cpuinfo. On machines with
	// frequency scaling enabled, this will only get the current value
	// which will likely be inaccurate. This is fine on machines with
	// frequency scaling disabled.
	for _, line := range strings.Split(cpuinfo, "\n") {
		if strings.Contains(line, "cpu MHz") {
			splitMHz := strings.Split(line, ":")
			if len(splitMHz) < 2 {
				log.Warningf("Could not read /proc/cpuinfo: malformed cpu MHz line")
				return
			}

			// If there was a problem, leave cpuFreqMHz as 0.
			var err error
			cpuFreqMHz, err = strconv.ParseFloat(strings.TrimSpace(splitMHz[1]), 64)
			if err != nil {
				log.Warningf("Could not parse cpu MHz value %v: %v", splitMHz[1], err)
				cpuFreqMHz = 0
				return
			}
			return
		}
	}
	log.Warningf("Could not parse /proc/cpuinfo, it is empty or does not contain cpu MHz")
}

func initFeaturesFromString() {
	for f, s := range x86FeatureStrings {
		x86FeaturesFromString[s] = f
	}
	for f, s := range x86FeatureParseOnlyStrings {
		x86FeaturesFromString[s] = f
	}
}

func init() {
	initCPUFreq()
	initFeaturesFromString()
	hostFeatureSet = getHostFeatureSet()
}