summaryrefslogtreecommitdiffhomepage
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/abi/linux/netfilter.go7
-rw-r--r--pkg/cpuid/BUILD9
-rw-r--r--pkg/cpuid/cpuid.go1098
-rw-r--r--pkg/cpuid/cpuid_arm64.go482
-rw-r--r--pkg/cpuid/cpuid_arm64_test.go55
-rw-r--r--pkg/cpuid/cpuid_parse_x86_test.go (renamed from pkg/cpuid/cpuid_parse_test.go)2
-rw-r--r--pkg/cpuid/cpuid_x86.go1100
-rw-r--r--pkg/cpuid/cpuid_x86_test.go (renamed from pkg/cpuid/cpuid_test.go)2
-rw-r--r--pkg/p9/client_file.go33
-rw-r--r--pkg/p9/file.go16
-rw-r--r--pkg/p9/handlers.go33
-rw-r--r--pkg/p9/messages.go199
-rw-r--r--pkg/p9/p9.go4
-rw-r--r--pkg/p9/version.go8
-rw-r--r--pkg/seccomp/BUILD2
-rw-r--r--pkg/sentry/BUILD8
-rw-r--r--pkg/sentry/arch/BUILD1
-rw-r--r--pkg/sentry/arch/arch_amd64.s7
-rw-r--r--pkg/sentry/arch/arch_state_x86.go4
-rw-r--r--pkg/sentry/arch/arch_x86.go15
-rw-r--r--pkg/sentry/arch/arch_x86_impl.go43
-rw-r--r--pkg/sentry/fs/copy_up.go2
-rw-r--r--pkg/sentry/fs/fsutil/inode.go20
-rw-r--r--pkg/sentry/fs/gofer/attr.go6
-rw-r--r--pkg/sentry/fs/gofer/context_file.go14
-rw-r--r--pkg/sentry/fs/gofer/inode.go13
-rw-r--r--pkg/sentry/fs/inode.go14
-rw-r--r--pkg/sentry/fs/inode_operations.go13
-rw-r--r--pkg/sentry/fs/inode_overlay.go18
-rw-r--r--pkg/sentry/fs/tmpfs/tmpfs.go9
-rw-r--r--pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go3
-rw-r--r--pkg/sentry/fsimpl/gofer/directory.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go8
-rw-r--r--pkg/sentry/socket/netfilter/extensions.go4
-rw-r--r--pkg/sentry/socket/netfilter/netfilter.go9
-rw-r--r--pkg/sentry/socket/netfilter/tcp_matcher.go2
-rw-r--r--pkg/sentry/strace/linux64_amd64.go4
-rw-r--r--pkg/sentry/strace/socket.go215
-rw-r--r--pkg/sentry/strace/strace.go22
-rw-r--r--pkg/sentry/strace/syscalls.go22
-rw-r--r--pkg/sentry/syscalls/linux/linux64_amd64.go27
-rw-r--r--pkg/sentry/syscalls/linux/linux64_arm64.go37
-rw-r--r--pkg/sentry/syscalls/linux/sys_xattr.go244
-rw-r--r--pkg/sentry/usage/memory.go4
-rw-r--r--pkg/tcpip/stack/ndp.go43
-rw-r--r--pkg/tcpip/stack/ndp_test.go348
-rw-r--r--pkg/tcpip/stack/nic.go30
-rw-r--r--pkg/tcpip/stack/stack_test.go351
48 files changed, 2951 insertions, 1663 deletions
diff --git a/pkg/abi/linux/netfilter.go b/pkg/abi/linux/netfilter.go
index 7363185b7..bbc4df74c 100644
--- a/pkg/abi/linux/netfilter.go
+++ b/pkg/abi/linux/netfilter.go
@@ -370,11 +370,12 @@ type XTTCP struct {
// Option specifies that a particular TCP option must be set.
Option uint8
- // FlagMask masks the FlagCompare byte when comparing to the TCP flag
- // fields.
+ // FlagMask masks TCP flags when comparing to the FlagCompare byte. It allows
+ // for specification of which flags are important to the matcher.
FlagMask uint8
- // FlagCompare is binary and-ed with the TCP flag fields.
+ // FlagCompare, in combination with FlagMask, is used to match only packets
+ // that have certain flags set.
FlagCompare uint8
// InverseFlags flips the meaning of certain fields. See the
diff --git a/pkg/cpuid/BUILD b/pkg/cpuid/BUILD
index 43a432190..d6cb1a549 100644
--- a/pkg/cpuid/BUILD
+++ b/pkg/cpuid/BUILD
@@ -7,6 +7,8 @@ go_library(
srcs = [
"cpu_amd64.s",
"cpuid.go",
+ "cpuid_arm64.go",
+ "cpuid_x86.go",
],
visibility = ["//:sandbox"],
deps = ["//pkg/log"],
@@ -15,7 +17,10 @@ go_library(
go_test(
name = "cpuid_test",
size = "small",
- srcs = ["cpuid_test.go"],
+ srcs = [
+ "cpuid_arm64_test.go",
+ "cpuid_x86_test.go",
+ ],
library = ":cpuid",
)
@@ -23,7 +28,7 @@ go_test(
name = "cpuid_parse_test",
size = "small",
srcs = [
- "cpuid_parse_test.go",
+ "cpuid_parse_x86_test.go",
],
library = ":cpuid",
tags = ["manual"],
diff --git a/pkg/cpuid/cpuid.go b/pkg/cpuid/cpuid.go
index cf50ee53f..f7f9dbf86 100644
--- a/pkg/cpuid/cpuid.go
+++ b/pkg/cpuid/cpuid.go
@@ -1,4 +1,4 @@
-// Copyright 2018 The gVisor Authors.
+// Copyright 2019 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -12,8 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// +build i386 amd64
-
// Package cpuid provides basic functionality for creating and adjusting CPU
// feature sets.
//
@@ -21,1100 +19,20 @@
// known platform, or HostFeatureSet()) and then add, remove, and test for
// features as desired.
//
-// For example: Test for hardware extended state saving, and if we don't have
-// it, don't expose AVX, which cannot be saved with fxsave.
+// For example: on x86, test for hardware extended state saving, and if
+// we don't have it, don't expose AVX, which cannot be saved with fxsave.
//
// if !HostFeatureSet().HasFeature(X86FeatureXSAVE) {
// exposedFeatures.Remove(X86FeatureAVX)
// }
package cpuid
-import (
- "bytes"
- "fmt"
- "io/ioutil"
- "strconv"
- "strings"
-
- "gvisor.dev/gvisor/pkg/log"
-)
-
-// Common references for CPUID leaves and bits:
-//
-// Intel:
-// * Intel SDM Volume 2, Chapter 3.2 "CPUID" (more up-to-date)
-// * Intel Application Note 485 (more detailed)
-//
-// AMD:
-// * AMD64 APM Volume 3, Appendix 3 "Obtaining Processor Information ..."
-
// Feature is a unique identifier for a particular cpu feature. We just use an
-// int as a feature number on x86.
+// int as a feature number on x86 and arm64.
//
-// Features are numbered according to "blocks". Each block is 32 bits, and
-// feature bits from the same source (cpuid leaf/level) are in the same block.
-type Feature int
-
-// block is a collection of 32 Feature bits.
-type block int
-
-const blockSize = 32
-
-// Feature bits are numbered according to "blocks". Each block is 32 bits, and
+// On x86, features are numbered according to "blocks". Each block is 32 bits, and
// feature bits from the same source (cpuid leaf/level) are in the same block.
-func featureID(b block, bit int) Feature {
- return Feature(32*int(b) + bit)
-}
-
-// Block 0 constants are all of the "basic" feature bits returned by a cpuid in
-// ecx with eax=1.
-const (
- X86FeatureSSE3 Feature = iota
- X86FeaturePCLMULDQ
- X86FeatureDTES64
- X86FeatureMONITOR
- X86FeatureDSCPL
- X86FeatureVMX
- X86FeatureSMX
- X86FeatureEST
- X86FeatureTM2
- X86FeatureSSSE3 // Not a typo, "supplemental" SSE3.
- X86FeatureCNXTID
- X86FeatureSDBG
- X86FeatureFMA
- X86FeatureCX16
- X86FeatureXTPR
- X86FeaturePDCM
- _ // ecx bit 16 is reserved.
- X86FeaturePCID
- X86FeatureDCA
- X86FeatureSSE4_1
- X86FeatureSSE4_2
- X86FeatureX2APIC
- X86FeatureMOVBE
- X86FeaturePOPCNT
- X86FeatureTSCD
- X86FeatureAES
- X86FeatureXSAVE
- X86FeatureOSXSAVE
- X86FeatureAVX
- X86FeatureF16C
- X86FeatureRDRAND
- _ // ecx bit 31 is reserved.
-)
-
-// Block 1 constants are all of the "basic" feature bits returned by a cpuid in
-// edx with eax=1.
-const (
- X86FeatureFPU Feature = 32 + iota
- X86FeatureVME
- X86FeatureDE
- X86FeaturePSE
- X86FeatureTSC
- X86FeatureMSR
- X86FeaturePAE
- X86FeatureMCE
- X86FeatureCX8
- X86FeatureAPIC
- _ // edx bit 10 is reserved.
- X86FeatureSEP
- X86FeatureMTRR
- X86FeaturePGE
- X86FeatureMCA
- X86FeatureCMOV
- X86FeaturePAT
- X86FeaturePSE36
- X86FeaturePSN
- X86FeatureCLFSH
- _ // edx bit 20 is reserved.
- X86FeatureDS
- X86FeatureACPI
- X86FeatureMMX
- X86FeatureFXSR
- X86FeatureSSE
- X86FeatureSSE2
- X86FeatureSS
- X86FeatureHTT
- X86FeatureTM
- X86FeatureIA64
- X86FeaturePBE
-)
-
-// Block 2 bits are the "structured extended" features returned in ebx for
-// eax=7, ecx=0.
-const (
- X86FeatureFSGSBase Feature = 2*32 + iota
- X86FeatureTSC_ADJUST
- _ // ebx bit 2 is reserved.
- X86FeatureBMI1
- X86FeatureHLE
- X86FeatureAVX2
- X86FeatureFDP_EXCPTN_ONLY
- X86FeatureSMEP
- X86FeatureBMI2
- X86FeatureERMS
- X86FeatureINVPCID
- X86FeatureRTM
- X86FeatureCQM
- X86FeatureFPCSDS
- X86FeatureMPX
- X86FeatureRDT
- X86FeatureAVX512F
- X86FeatureAVX512DQ
- X86FeatureRDSEED
- X86FeatureADX
- X86FeatureSMAP
- X86FeatureAVX512IFMA
- X86FeaturePCOMMIT
- X86FeatureCLFLUSHOPT
- X86FeatureCLWB
- X86FeatureIPT // Intel processor trace.
- X86FeatureAVX512PF
- X86FeatureAVX512ER
- X86FeatureAVX512CD
- X86FeatureSHA
- X86FeatureAVX512BW
- X86FeatureAVX512VL
-)
-
-// Block 3 bits are the "extended" features returned in ecx for eax=7, ecx=0.
-const (
- X86FeaturePREFETCHWT1 Feature = 3*32 + iota
- X86FeatureAVX512VBMI
- X86FeatureUMIP
- X86FeaturePKU
- X86FeatureOSPKE
- X86FeatureWAITPKG
- X86FeatureAVX512_VBMI2
- _ // ecx bit 7 is reserved
- X86FeatureGFNI
- X86FeatureVAES
- X86FeatureVPCLMULQDQ
- X86FeatureAVX512_VNNI
- X86FeatureAVX512_BITALG
- X86FeatureTME
- X86FeatureAVX512_VPOPCNTDQ
- _ // ecx bit 15 is reserved
- X86FeatureLA57
- // ecx bits 17-21 are reserved
- _
- _
- _
- _
- _
- X86FeatureRDPID
- // ecx bits 23-24 are reserved
- _
- _
- X86FeatureCLDEMOTE
- _ // ecx bit 26 is reserved
- X86FeatureMOVDIRI
- X86FeatureMOVDIR64B
-)
-
-// Block 4 constants are for xsave capabilities in CPUID.(EAX=0DH,ECX=01H):EAX.
-// The CPUID leaf is available only if 'X86FeatureXSAVE' is present.
-const (
- X86FeatureXSAVEOPT Feature = 4*32 + iota
- X86FeatureXSAVEC
- X86FeatureXGETBV1
- X86FeatureXSAVES
- // EAX[31:4] are reserved.
-)
-
-// Block 5 constants are the extended feature bits in
-// CPUID.(EAX=0x80000001):ECX.
-const (
- X86FeatureLAHF64 Feature = 5*32 + iota
- X86FeatureCMP_LEGACY
- X86FeatureSVM
- X86FeatureEXTAPIC
- X86FeatureCR8_LEGACY
- X86FeatureLZCNT
- X86FeatureSSE4A
- X86FeatureMISALIGNSSE
- X86FeaturePREFETCHW
- X86FeatureOSVW
- X86FeatureIBS
- X86FeatureXOP
- X86FeatureSKINIT
- X86FeatureWDT
- _ // ecx bit 14 is reserved.
- X86FeatureLWP
- X86FeatureFMA4
- X86FeatureTCE
- _ // ecx bit 18 is reserved.
- _ // ecx bit 19 is reserved.
- _ // ecx bit 20 is reserved.
- X86FeatureTBM
- X86FeatureTOPOLOGY
- X86FeaturePERFCTR_CORE
- X86FeaturePERFCTR_NB
- _ // ecx bit 25 is reserved.
- X86FeatureBPEXT
- X86FeaturePERFCTR_TSC
- X86FeaturePERFCTR_LLC
- X86FeatureMWAITX
- // ECX[31:30] are reserved.
-)
-
-// Block 6 constants are the extended feature bits in
-// CPUID.(EAX=0x80000001):EDX.
//
-// These are sparse, and so the bit positions are assigned manually.
-const (
- // On AMD, EDX[24:23] | EDX[17:12] | EDX[9:0] are duplicate features
- // also defined in block 1 (in identical bit positions). Those features
- // are not listed here.
- block6DuplicateMask = 0x183f3ff
-
- X86FeatureSYSCALL Feature = 6*32 + 11
- X86FeatureNX Feature = 6*32 + 20
- X86FeatureMMXEXT Feature = 6*32 + 22
- X86FeatureFXSR_OPT Feature = 6*32 + 25
- X86FeatureGBPAGES Feature = 6*32 + 26
- X86FeatureRDTSCP Feature = 6*32 + 27
- X86FeatureLM Feature = 6*32 + 29
- X86Feature3DNOWEXT Feature = 6*32 + 30
- X86Feature3DNOW Feature = 6*32 + 31
-)
-
-// linuxBlockOrder defines the order in which linux organizes the feature
-// blocks. Linux also tracks feature bits in 32-bit blocks, but in an order
-// which doesn't match well here, so for the /proc/cpuinfo generation we simply
-// re-map the blocks to Linux's ordering and then go through the bits in each
-// block.
-var linuxBlockOrder = []block{1, 6, 0, 5, 2, 4, 3}
-
-// To make emulation of /proc/cpuinfo easy, these names match the names of the
-// basic features in Linux defined in arch/x86/kernel/cpu/capflags.c.
-var x86FeatureStrings = map[Feature]string{
- // Block 0.
- X86FeatureSSE3: "pni",
- X86FeaturePCLMULDQ: "pclmulqdq",
- X86FeatureDTES64: "dtes64",
- X86FeatureMONITOR: "monitor",
- X86FeatureDSCPL: "ds_cpl",
- X86FeatureVMX: "vmx",
- X86FeatureSMX: "smx",
- X86FeatureEST: "est",
- X86FeatureTM2: "tm2",
- X86FeatureSSSE3: "ssse3",
- X86FeatureCNXTID: "cid",
- X86FeatureSDBG: "sdbg",
- X86FeatureFMA: "fma",
- X86FeatureCX16: "cx16",
- X86FeatureXTPR: "xtpr",
- X86FeaturePDCM: "pdcm",
- X86FeaturePCID: "pcid",
- X86FeatureDCA: "dca",
- X86FeatureSSE4_1: "sse4_1",
- X86FeatureSSE4_2: "sse4_2",
- X86FeatureX2APIC: "x2apic",
- X86FeatureMOVBE: "movbe",
- X86FeaturePOPCNT: "popcnt",
- X86FeatureTSCD: "tsc_deadline_timer",
- X86FeatureAES: "aes",
- X86FeatureXSAVE: "xsave",
- X86FeatureAVX: "avx",
- X86FeatureF16C: "f16c",
- X86FeatureRDRAND: "rdrand",
-
- // Block 1.
- X86FeatureFPU: "fpu",
- X86FeatureVME: "vme",
- X86FeatureDE: "de",
- X86FeaturePSE: "pse",
- X86FeatureTSC: "tsc",
- X86FeatureMSR: "msr",
- X86FeaturePAE: "pae",
- X86FeatureMCE: "mce",
- X86FeatureCX8: "cx8",
- X86FeatureAPIC: "apic",
- X86FeatureSEP: "sep",
- X86FeatureMTRR: "mtrr",
- X86FeaturePGE: "pge",
- X86FeatureMCA: "mca",
- X86FeatureCMOV: "cmov",
- X86FeaturePAT: "pat",
- X86FeaturePSE36: "pse36",
- X86FeaturePSN: "pn",
- X86FeatureCLFSH: "clflush",
- X86FeatureDS: "dts",
- X86FeatureACPI: "acpi",
- X86FeatureMMX: "mmx",
- X86FeatureFXSR: "fxsr",
- X86FeatureSSE: "sse",
- X86FeatureSSE2: "sse2",
- X86FeatureSS: "ss",
- X86FeatureHTT: "ht",
- X86FeatureTM: "tm",
- X86FeatureIA64: "ia64",
- X86FeaturePBE: "pbe",
-
- // Block 2.
- X86FeatureFSGSBase: "fsgsbase",
- X86FeatureTSC_ADJUST: "tsc_adjust",
- X86FeatureBMI1: "bmi1",
- X86FeatureHLE: "hle",
- X86FeatureAVX2: "avx2",
- X86FeatureSMEP: "smep",
- X86FeatureBMI2: "bmi2",
- X86FeatureERMS: "erms",
- X86FeatureINVPCID: "invpcid",
- X86FeatureRTM: "rtm",
- X86FeatureCQM: "cqm",
- X86FeatureMPX: "mpx",
- X86FeatureRDT: "rdt_a",
- X86FeatureAVX512F: "avx512f",
- X86FeatureAVX512DQ: "avx512dq",
- X86FeatureRDSEED: "rdseed",
- X86FeatureADX: "adx",
- X86FeatureSMAP: "smap",
- X86FeatureCLWB: "clwb",
- X86FeatureAVX512PF: "avx512pf",
- X86FeatureAVX512ER: "avx512er",
- X86FeatureAVX512CD: "avx512cd",
- X86FeatureSHA: "sha_ni",
- X86FeatureAVX512BW: "avx512bw",
- X86FeatureAVX512VL: "avx512vl",
-
- // Block 3.
- X86FeatureAVX512VBMI: "avx512vbmi",
- X86FeatureUMIP: "umip",
- X86FeaturePKU: "pku",
- X86FeatureOSPKE: "ospke",
- X86FeatureWAITPKG: "waitpkg",
- X86FeatureAVX512_VBMI2: "avx512_vbmi2",
- X86FeatureGFNI: "gfni",
- X86FeatureVAES: "vaes",
- X86FeatureVPCLMULQDQ: "vpclmulqdq",
- X86FeatureAVX512_VNNI: "avx512_vnni",
- X86FeatureAVX512_BITALG: "avx512_bitalg",
- X86FeatureTME: "tme",
- X86FeatureAVX512_VPOPCNTDQ: "avx512_vpopcntdq",
- X86FeatureLA57: "la57",
- X86FeatureRDPID: "rdpid",
- X86FeatureCLDEMOTE: "cldemote",
- X86FeatureMOVDIRI: "movdiri",
- X86FeatureMOVDIR64B: "movdir64b",
-
- // Block 4.
- X86FeatureXSAVEOPT: "xsaveopt",
- X86FeatureXSAVEC: "xsavec",
- X86FeatureXGETBV1: "xgetbv1",
- X86FeatureXSAVES: "xsaves",
-
- // Block 5.
- X86FeatureLAHF64: "lahf_lm", // LAHF/SAHF in long mode
- X86FeatureCMP_LEGACY: "cmp_legacy",
- X86FeatureSVM: "svm",
- X86FeatureEXTAPIC: "extapic",
- X86FeatureCR8_LEGACY: "cr8_legacy",
- X86FeatureLZCNT: "abm", // Advanced bit manipulation
- X86FeatureSSE4A: "sse4a",
- X86FeatureMISALIGNSSE: "misalignsse",
- X86FeaturePREFETCHW: "3dnowprefetch",
- X86FeatureOSVW: "osvw",
- X86FeatureIBS: "ibs",
- X86FeatureXOP: "xop",
- X86FeatureSKINIT: "skinit",
- X86FeatureWDT: "wdt",
- X86FeatureLWP: "lwp",
- X86FeatureFMA4: "fma4",
- X86FeatureTCE: "tce",
- X86FeatureTBM: "tbm",
- X86FeatureTOPOLOGY: "topoext",
- X86FeaturePERFCTR_CORE: "perfctr_core",
- X86FeaturePERFCTR_NB: "perfctr_nb",
- X86FeatureBPEXT: "bpext",
- X86FeaturePERFCTR_TSC: "ptsc",
- X86FeaturePERFCTR_LLC: "perfctr_llc",
- X86FeatureMWAITX: "mwaitx",
-
- // Block 6.
- X86FeatureSYSCALL: "syscall",
- X86FeatureNX: "nx",
- X86FeatureMMXEXT: "mmxext",
- X86FeatureFXSR_OPT: "fxsr_opt",
- X86FeatureGBPAGES: "pdpe1gb",
- X86FeatureRDTSCP: "rdtscp",
- X86FeatureLM: "lm",
- X86Feature3DNOWEXT: "3dnowext",
- X86Feature3DNOW: "3dnow",
-}
-
-// These flags are parse only---they can be used for setting / unsetting the
-// flags, but will not get printed out in /proc/cpuinfo.
-var x86FeatureParseOnlyStrings = map[Feature]string{
- // Block 0.
- X86FeatureOSXSAVE: "osxsave",
-
- // Block 2.
- X86FeatureFDP_EXCPTN_ONLY: "fdp_excptn_only",
- X86FeatureFPCSDS: "fpcsds",
- X86FeatureIPT: "pt",
- X86FeatureCLFLUSHOPT: "clfushopt",
-
- // Block 3.
- X86FeaturePREFETCHWT1: "prefetchwt1",
-}
-
-// intelCacheDescriptors describe the caches and TLBs on the system. They are
-// returned in the registers for eax=2. Intel only.
-type intelCacheDescriptor uint8
-
-// Valid cache/TLB descriptors. All descriptors can be found in Intel SDM Vol.
-// 2, Ch. 3.2, "CPUID", Table 3-12 "Encoding of CPUID Leaf 2 Descriptors".
-const (
- intelNullDescriptor intelCacheDescriptor = 0
- intelNoTLBDescriptor intelCacheDescriptor = 0xfe
- intelNoCacheDescriptor intelCacheDescriptor = 0xff
-
- // Most descriptors omitted for brevity as they are currently unused.
-)
-
-// CacheType describes the type of a cache, as returned in eax[4:0] for eax=4.
-type CacheType uint8
-
-const (
- // cacheNull indicates that there are no more entries.
- cacheNull CacheType = iota
-
- // CacheData is a data cache.
- CacheData
-
- // CacheInstruction is an instruction cache.
- CacheInstruction
-
- // CacheUnified is a unified instruction and data cache.
- CacheUnified
-)
-
-// Cache describes the parameters of a single cache on the system.
-//
-// +stateify savable
-type Cache struct {
- // Level is the hierarchical level of this cache (L1, L2, etc).
- Level uint32
-
- // Type is the type of cache.
- Type CacheType
-
- // FullyAssociative indicates that entries may be placed in any block.
- FullyAssociative bool
-
- // Partitions is the number of physical partitions in the cache.
- Partitions uint32
-
- // Ways is the number of ways of associativity in the cache.
- Ways uint32
-
- // Sets is the number of sets in the cache.
- Sets uint32
-
- // InvalidateHierarchical indicates that WBINVD/INVD from threads
- // sharing this cache acts upon lower level caches for threads sharing
- // this cache.
- InvalidateHierarchical bool
-
- // Inclusive indicates that this cache is inclusive of lower cache
- // levels.
- Inclusive bool
-
- // DirectMapped indicates that this cache is directly mapped from
- // address, rather than using a hash function.
- DirectMapped bool
-}
-
-// Just a way to wrap cpuid function numbers.
-type cpuidFunction uint32
-
-// The constants below are the lower or "standard" cpuid functions, ordered as
-// defined by the hardware.
-const (
- vendorID cpuidFunction = iota // Returns vendor ID and largest standard function.
- featureInfo // Returns basic feature bits and processor signature.
- intelCacheDescriptors // Returns list of cache descriptors. Intel only.
- intelSerialNumber // Returns processor serial number (obsolete on new hardware). Intel only.
- intelDeterministicCacheParams // Returns deterministic cache information. Intel only.
- monitorMwaitParams // Returns information about monitor/mwait instructions.
- powerParams // Returns information about power management and thermal sensors.
- extendedFeatureInfo // Returns extended feature bits.
- _ // Function 0x8 is reserved.
- intelDCAParams // Returns direct cache access information. Intel only.
- intelPMCInfo // Returns information about performance monitoring features. Intel only.
- intelX2APICInfo // Returns core/logical processor topology. Intel only.
- _ // Function 0xc is reserved.
- xSaveInfo // Returns information about extended state management.
-)
-
-// The "extended" functions start at 0x80000000.
-const (
- extendedFunctionInfo cpuidFunction = 0x80000000 + iota // Returns highest available extended function in eax.
- extendedFeatures // Returns some extended feature bits in edx and ecx.
-)
-
-// These are the extended floating point state features. They are used to
-// enumerate floating point features in XCR0, XSTATE_BV, etc.
-const (
- XSAVEFeatureX87 = 1 << 0
- XSAVEFeatureSSE = 1 << 1
- XSAVEFeatureAVX = 1 << 2
- XSAVEFeatureBNDREGS = 1 << 3
- XSAVEFeatureBNDCSR = 1 << 4
- XSAVEFeatureAVX512op = 1 << 5
- XSAVEFeatureAVX512zmm0 = 1 << 6
- XSAVEFeatureAVX512zmm16 = 1 << 7
- XSAVEFeaturePKRU = 1 << 9
-)
-
-var cpuFreqMHz float64
-
-// x86FeaturesFromString includes features from x86FeatureStrings and
-// x86FeatureParseOnlyStrings.
-var x86FeaturesFromString = make(map[string]Feature)
-
-// FeatureFromString returns the Feature associated with the given feature
-// string plus a bool to indicate if it could find the feature.
-func FeatureFromString(s string) (Feature, bool) {
- f, b := x86FeaturesFromString[s]
- return f, b
-}
-
-// String implements fmt.Stringer.
-func (f Feature) String() string {
- if s := f.flagString(false); s != "" {
- return s
- }
-
- block := int(f) / 32
- bit := int(f) % 32
- return fmt.Sprintf("<cpuflag %d; block %d bit %d>", f, block, bit)
-}
-
-func (f Feature) flagString(cpuinfoOnly bool) string {
- if s, ok := x86FeatureStrings[f]; ok {
- return s
- }
- if !cpuinfoOnly {
- return x86FeatureParseOnlyStrings[f]
- }
- return ""
-}
-
-// FeatureSet is a set of Features for a CPU.
-//
-// +stateify savable
-type FeatureSet struct {
- // Set is the set of features that are enabled in this FeatureSet.
- Set map[Feature]bool
-
- // VendorID is the 12-char string returned in ebx:edx:ecx for eax=0.
- VendorID string
-
- // ExtendedFamily is part of the processor signature.
- ExtendedFamily uint8
-
- // ExtendedModel is part of the processor signature.
- ExtendedModel uint8
-
- // ProcessorType is part of the processor signature.
- ProcessorType uint8
-
- // Family is part of the processor signature.
- Family uint8
-
- // Model is part of the processor signature.
- Model uint8
-
- // SteppingID is part of the processor signature.
- SteppingID uint8
-
- // Caches describes the caches on the CPU.
- Caches []Cache
-
- // CacheLine is the size of a cache line in bytes.
- //
- // All caches use the same line size. This is not enforced in the CPUID
- // encoding, but is true on all known x86 processors.
- CacheLine uint32
-}
-
-// FlagsString prints out supported CPU flags. If cpuinfoOnly is true, it is
-// equivalent to the "flags" field in /proc/cpuinfo.
-func (fs *FeatureSet) FlagsString(cpuinfoOnly bool) string {
- var s []string
- for _, b := range linuxBlockOrder {
- for i := 0; i < blockSize; i++ {
- if f := featureID(b, i); fs.Set[f] {
- if fstr := f.flagString(cpuinfoOnly); fstr != "" {
- s = append(s, fstr)
- }
- }
- }
- }
- return strings.Join(s, " ")
-}
-
-// WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is
-// a minimal /proc/cpuinfo, it is missing some fields like "microcode" that are
-// not always printed in Linux. The bogomips field is simply made up.
-func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) {
- fmt.Fprintf(b, "processor\t: %d\n", cpu)
- fmt.Fprintf(b, "vendor_id\t: %s\n", fs.VendorID)
- fmt.Fprintf(b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family)
- fmt.Fprintf(b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model)
- fmt.Fprintf(b, "model name\t: %s\n", "unknown") // Unknown for now.
- fmt.Fprintf(b, "stepping\t: %s\n", "unknown") // Unknown for now.
- fmt.Fprintf(b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz)
- fmt.Fprintln(b, "fpu\t\t: yes")
- fmt.Fprintln(b, "fpu_exception\t: yes")
- fmt.Fprintf(b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID.
- fmt.Fprintln(b, "wp\t\t: yes")
- fmt.Fprintf(b, "flags\t\t: %s\n", fs.FlagsString(true))
- fmt.Fprintf(b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway.
- fmt.Fprintf(b, "clflush size\t: %d\n", fs.CacheLine)
- fmt.Fprintf(b, "cache_alignment\t: %d\n", fs.CacheLine)
- fmt.Fprintf(b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48)
- fmt.Fprintln(b, "power management:") // This is always here, but can be blank.
- fmt.Fprintln(b, "") // The /proc/cpuinfo file ends with an extra newline.
-}
-
-const (
- amdVendorID = "AuthenticAMD"
- intelVendorID = "GenuineIntel"
-)
-
-// AMD returns true if fs describes an AMD CPU.
-func (fs *FeatureSet) AMD() bool {
- return fs.VendorID == amdVendorID
-}
-
-// Intel returns true if fs describes an Intel CPU.
-func (fs *FeatureSet) Intel() bool {
- return fs.VendorID == intelVendorID
-}
-
-// ErrIncompatible is returned by FeatureSet.HostCompatible if fs is not a
-// subset of the host feature set.
-type ErrIncompatible struct {
- message string
-}
-
-// Error implements error.
-func (e ErrIncompatible) Error() string {
- return e.message
-}
-
-// CheckHostCompatible returns nil if fs is a subset of the host feature set.
-func (fs *FeatureSet) CheckHostCompatible() error {
- hfs := HostFeatureSet()
-
- if diff := fs.Subtract(hfs); diff != nil {
- return ErrIncompatible{fmt.Sprintf("CPU feature set %v incompatible with host feature set %v (missing: %v)", fs.FlagsString(false), hfs.FlagsString(false), diff)}
- }
-
- // The size of a cache line must match, as it is critical to correctly
- // utilizing CLFLUSH. Other cache properties are allowed to change, as
- // they are not important to correctness.
- if fs.CacheLine != hfs.CacheLine {
- return ErrIncompatible{fmt.Sprintf("CPU cache line size %d incompatible with host cache line size %d", fs.CacheLine, hfs.CacheLine)}
- }
-
- return nil
-}
-
-// Helper to convert 3 regs into 12-byte vendor ID.
-func vendorIDFromRegs(bx, cx, dx uint32) string {
- bytes := make([]byte, 0, 12)
- for i := uint(0); i < 4; i++ {
- b := byte(bx >> (i * 8))
- bytes = append(bytes, b)
- }
-
- for i := uint(0); i < 4; i++ {
- b := byte(dx >> (i * 8))
- bytes = append(bytes, b)
- }
-
- for i := uint(0); i < 4; i++ {
- b := byte(cx >> (i * 8))
- bytes = append(bytes, b)
- }
- return string(bytes)
-}
-
-// ExtendedStateSize returns the number of bytes needed to save the "extended
-// state" for this processor and the boundary it must be aligned to. Extended
-// state includes floating point registers, and other cpu state that's not
-// associated with the normal task context.
-//
-// Note: We can save some space here with an optimization where we use a
-// smaller chunk of memory depending on features that are actually enabled.
-// Currently we just use the largest possible size for simplicity (which is
-// about 2.5K worst case, with avx512).
-func (fs *FeatureSet) ExtendedStateSize() (size, align uint) {
- if fs.UseXsave() {
- // Leaf 0 of xsaveinfo function returns the size for currently
- // enabled xsave features in ebx, the maximum size if all valid
- // features are saved with xsave in ecx, and valid XCR0 bits in
- // edx:eax.
- _, _, maxSize, _ := HostID(uint32(xSaveInfo), 0)
- return uint(maxSize), 64
- }
-
- // If we don't support xsave, we fall back to fxsave, which requires
- // 512 bytes aligned to 16 bytes.
- return 512, 16
-}
-
-// ValidXCR0Mask returns the bits that may be set to 1 in control register
-// XCR0.
-func (fs *FeatureSet) ValidXCR0Mask() uint64 {
- if !fs.UseXsave() {
- return 0
- }
- eax, _, _, edx := HostID(uint32(xSaveInfo), 0)
- return uint64(edx)<<32 | uint64(eax)
-}
-
-// vendorIDRegs returns the 3 register values used to construct the 12-byte
-// vendor ID string for eax=0.
-func (fs *FeatureSet) vendorIDRegs() (bx, dx, cx uint32) {
- for i := uint(0); i < 4; i++ {
- bx |= uint32(fs.VendorID[i]) << (i * 8)
- }
-
- for i := uint(0); i < 4; i++ {
- dx |= uint32(fs.VendorID[i+4]) << (i * 8)
- }
-
- for i := uint(0); i < 4; i++ {
- cx |= uint32(fs.VendorID[i+8]) << (i * 8)
- }
- return
-}
-
-// signature returns the signature dword that's returned in eax when eax=1.
-func (fs *FeatureSet) signature() uint32 {
- var s uint32
- s |= uint32(fs.SteppingID & 0xf)
- s |= uint32(fs.Model&0xf) << 4
- s |= uint32(fs.Family&0xf) << 8
- s |= uint32(fs.ProcessorType&0x3) << 12
- s |= uint32(fs.ExtendedModel&0xf) << 16
- s |= uint32(fs.ExtendedFamily&0xff) << 20
- return s
-}
-
-// Helper to deconstruct signature dword.
-func signatureSplit(v uint32) (ef, em, pt, f, m, sid uint8) {
- sid = uint8(v & 0xf)
- m = uint8(v>>4) & 0xf
- f = uint8(v>>8) & 0xf
- pt = uint8(v>>12) & 0x3
- em = uint8(v>>16) & 0xf
- ef = uint8(v >> 20)
- return
-}
-
-// Helper to convert blockwise feature bit masks into a set of features. Masks
-// must be provided in order for each block, without skipping them. If a block
-// does not matter for this feature set, 0 is specified.
-func setFromBlockMasks(blocks ...uint32) map[Feature]bool {
- s := make(map[Feature]bool)
- for b, blockMask := range blocks {
- for i := 0; i < blockSize; i++ {
- if blockMask&1 != 0 {
- s[featureID(block(b), i)] = true
- }
- blockMask >>= 1
- }
- }
- return s
-}
-
-// blockMask returns the 32-bit mask associated with a block of features.
-func (fs *FeatureSet) blockMask(b block) uint32 {
- var mask uint32
- for i := 0; i < blockSize; i++ {
- if fs.Set[featureID(b, i)] {
- mask |= 1 << uint(i)
- }
- }
- return mask
-}
-
-// Remove removes a Feature from a FeatureSet. It ignores features
-// that are not in the FeatureSet.
-func (fs *FeatureSet) Remove(feature Feature) {
- delete(fs.Set, feature)
-}
-
-// Add adds a Feature to a FeatureSet. It ignores duplicate features.
-func (fs *FeatureSet) Add(feature Feature) {
- fs.Set[feature] = true
-}
-
-// HasFeature tests whether or not a feature is in the given feature set.
-func (fs *FeatureSet) HasFeature(feature Feature) bool {
- return fs.Set[feature]
-}
-
-// Subtract returns the features present in fs that are not present in other.
-// If all features in fs are present in other, Subtract returns nil.
-func (fs *FeatureSet) Subtract(other *FeatureSet) (diff map[Feature]bool) {
- for f := range fs.Set {
- if !other.Set[f] {
- if diff == nil {
- diff = make(map[Feature]bool)
- }
- diff[f] = true
- }
- }
-
- return
-}
-
-// EmulateID emulates a cpuid instruction based on the feature set.
-func (fs *FeatureSet) EmulateID(origAx, origCx uint32) (ax, bx, cx, dx uint32) {
- switch cpuidFunction(origAx) {
- case vendorID:
- ax = uint32(xSaveInfo) // 0xd (xSaveInfo) is the highest function we support.
- bx, dx, cx = fs.vendorIDRegs()
- case featureInfo:
- // CLFLUSH line size is encoded in quadwords. Other fields in bx unsupported.
- bx = (fs.CacheLine / 8) << 8
- cx = fs.blockMask(block(0))
- dx = fs.blockMask(block(1))
- ax = fs.signature()
- case intelCacheDescriptors:
- if !fs.Intel() {
- // Reserved on non-Intel.
- return 0, 0, 0, 0
- }
-
- // "The least-significant byte in register EAX (register AL)
- // will always return 01H. Software should ignore this value
- // and not interpret it as an informational descriptor." - SDM
- //
- // We only support reporting cache parameters via
- // intelDeterministicCacheParams; report as much here.
- //
- // We do not support exposing TLB information at all.
- ax = 1 | (uint32(intelNoCacheDescriptor) << 8)
- case intelDeterministicCacheParams:
- if !fs.Intel() {
- // Reserved on non-Intel.
- return 0, 0, 0, 0
- }
-
- // cx is the index of the cache to describe.
- if int(origCx) >= len(fs.Caches) {
- return uint32(cacheNull), 0, 0, 0
- }
- c := fs.Caches[origCx]
-
- ax = uint32(c.Type)
- ax |= c.Level << 5
- ax |= 1 << 8 // Always claim the cache is "self-initializing".
- if c.FullyAssociative {
- ax |= 1 << 9
- }
- // Processor topology not supported.
-
- bx = fs.CacheLine - 1
- bx |= (c.Partitions - 1) << 12
- bx |= (c.Ways - 1) << 22
-
- cx = c.Sets - 1
-
- if !c.InvalidateHierarchical {
- dx |= 1
- }
- if c.Inclusive {
- dx |= 1 << 1
- }
- if !c.DirectMapped {
- dx |= 1 << 2
- }
- case xSaveInfo:
- if !fs.UseXsave() {
- return 0, 0, 0, 0
- }
- return HostID(uint32(xSaveInfo), origCx)
- case extendedFeatureInfo:
- if origCx != 0 {
- break // Only leaf 0 is supported.
- }
- bx = fs.blockMask(block(2))
- cx = fs.blockMask(block(3))
- case extendedFunctionInfo:
- // We only support showing the extended features.
- ax = uint32(extendedFeatures)
- cx = 0
- case extendedFeatures:
- cx = fs.blockMask(block(5))
- dx = fs.blockMask(block(6))
- if fs.AMD() {
- // AMD duplicates some block 1 features in block 6.
- dx |= fs.blockMask(block(1)) & block6DuplicateMask
- }
- }
-
- return
-}
-
-// UseXsave returns the choice of fp state saving instruction.
-func (fs *FeatureSet) UseXsave() bool {
- return fs.HasFeature(X86FeatureXSAVE) && fs.HasFeature(X86FeatureOSXSAVE)
-}
-
-// UseXsaveopt returns true if 'fs' supports the "xsaveopt" instruction.
-func (fs *FeatureSet) UseXsaveopt() bool {
- return fs.UseXsave() && fs.HasFeature(X86FeatureXSAVEOPT)
-}
-
-// HostID executes a native CPUID instruction.
-func HostID(axArg, cxArg uint32) (ax, bx, cx, dx uint32)
-
-// HostFeatureSet uses cpuid to get host values and construct a feature set
-// that matches that of the host machine. Note that there are several places
-// where there appear to be some unnecessary assignments between register names
-// (ax, bx, cx, or dx) and featureBlockN variables. This is to explicitly show
-// where the different feature blocks come from, to make the code easier to
-// inspect and read.
-func HostFeatureSet() *FeatureSet {
- // eax=0 gets max supported feature and vendor ID.
- _, bx, cx, dx := HostID(0, 0)
- vendorID := vendorIDFromRegs(bx, cx, dx)
-
- // eax=1 gets basic features in ecx:edx.
- ax, bx, cx, dx := HostID(1, 0)
- featureBlock0 := cx
- featureBlock1 := dx
- ef, em, pt, f, m, sid := signatureSplit(ax)
- cacheLine := 8 * (bx >> 8) & 0xff
-
- // eax=4, ecx=i gets details about cache index i. Only supported on Intel.
- var caches []Cache
- if vendorID == intelVendorID {
- // ecx selects the cache index until a null type is returned.
- for i := uint32(0); ; i++ {
- ax, bx, cx, dx := HostID(4, i)
- t := CacheType(ax & 0xf)
- if t == cacheNull {
- break
- }
-
- lineSize := (bx & 0xfff) + 1
- if lineSize != cacheLine {
- panic(fmt.Sprintf("Mismatched cache line size: %d vs %d", lineSize, cacheLine))
- }
-
- caches = append(caches, Cache{
- Type: t,
- Level: (ax >> 5) & 0x7,
- FullyAssociative: ((ax >> 9) & 1) == 1,
- Partitions: ((bx >> 12) & 0x3ff) + 1,
- Ways: ((bx >> 22) & 0x3ff) + 1,
- Sets: cx + 1,
- InvalidateHierarchical: (dx & 1) == 0,
- Inclusive: ((dx >> 1) & 1) == 1,
- DirectMapped: ((dx >> 2) & 1) == 0,
- })
- }
- }
-
- // eax=7, ecx=0 gets extended features in ecx:ebx.
- _, bx, cx, _ = HostID(7, 0)
- featureBlock2 := bx
- featureBlock3 := cx
-
- // Leaf 0xd is supported only if CPUID.1:ECX.XSAVE[bit 26] is set.
- var featureBlock4 uint32
- if (featureBlock0 & (1 << 26)) != 0 {
- featureBlock4, _, _, _ = HostID(uint32(xSaveInfo), 1)
- }
-
- // eax=0x80000000 gets supported extended levels. We use this to
- // determine if there are any non-zero block 4 or block 6 bits to find.
- var featureBlock5, featureBlock6 uint32
- if ax, _, _, _ := HostID(uint32(extendedFunctionInfo), 0); ax >= uint32(extendedFeatures) {
- // eax=0x80000001 gets AMD added feature bits.
- _, _, cx, dx = HostID(uint32(extendedFeatures), 0)
- featureBlock5 = cx
- // Ignore features duplicated from block 1 on AMD. These bits
- // are reserved on Intel.
- featureBlock6 = dx &^ block6DuplicateMask
- }
-
- set := setFromBlockMasks(featureBlock0, featureBlock1, featureBlock2, featureBlock3, featureBlock4, featureBlock5, featureBlock6)
- return &FeatureSet{
- Set: set,
- VendorID: vendorID,
- ExtendedFamily: ef,
- ExtendedModel: em,
- ProcessorType: pt,
- Family: f,
- Model: m,
- SteppingID: sid,
- CacheLine: cacheLine,
- Caches: caches,
- }
-}
-
-// Reads max cpu frequency from host /proc/cpuinfo. Must run before
-// whitelisting. This value is used to create the fake /proc/cpuinfo from a
-// FeatureSet.
-func initCPUFreq() {
- cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo")
- if err != nil {
- // Leave it as 0... The standalone VDSO bails out in the same
- // way.
- log.Warningf("Could not read /proc/cpuinfo: %v", err)
- return
- }
- cpuinfo := string(cpuinfob)
-
- // We get the value straight from host /proc/cpuinfo. On machines with
- // frequency scaling enabled, this will only get the current value
- // which will likely be inaccurate. This is fine on machines with
- // frequency scaling disabled.
- for _, line := range strings.Split(cpuinfo, "\n") {
- if strings.Contains(line, "cpu MHz") {
- splitMHz := strings.Split(line, ":")
- if len(splitMHz) < 2 {
- log.Warningf("Could not read /proc/cpuinfo: malformed cpu MHz line")
- return
- }
-
- // If there was a problem, leave cpuFreqMHz as 0.
- var err error
- cpuFreqMHz, err = strconv.ParseFloat(strings.TrimSpace(splitMHz[1]), 64)
- if err != nil {
- log.Warningf("Could not parse cpu MHz value %v: %v", splitMHz[1], err)
- cpuFreqMHz = 0
- return
- }
- return
- }
- }
- log.Warningf("Could not parse /proc/cpuinfo, it is empty or does not contain cpu MHz")
-}
-
-func initFeaturesFromString() {
- for f, s := range x86FeatureStrings {
- x86FeaturesFromString[s] = f
- }
- for f, s := range x86FeatureParseOnlyStrings {
- x86FeaturesFromString[s] = f
- }
-}
-
-func init() {
- // initCpuFreq must be run before whitelists are enabled.
- initCPUFreq()
- initFeaturesFromString()
-}
+// On arm64, features are numbered according to the ELF HWCAP definition.
+// arch/arm64/include/uapi/asm/hwcap.h
+type Feature int
diff --git a/pkg/cpuid/cpuid_arm64.go b/pkg/cpuid/cpuid_arm64.go
new file mode 100644
index 000000000..08381c1c0
--- /dev/null
+++ b/pkg/cpuid/cpuid_arm64.go
@@ -0,0 +1,482 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package cpuid
+
+import (
+ "bytes"
+ "encoding/binary"
+ "fmt"
+ "io/ioutil"
+ "strconv"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/log"
+)
+
+// ARM64 doesn't have a 'cpuid' equivalent, which means it have no architected
+// discovery mechanism for hardware features available to userspace code at EL0.
+// The kernel exposes the presence of these features to userspace through a set
+// of flags(HWCAP/HWCAP2) bits, exposed in the auxilliary vector.
+// Ref Documentation/arm64/elf_hwcaps.rst for more info.
+//
+// Currently, only the HWCAP bits are supported.
+
+const (
+ // ARM64FeatureFP indicates support for single and double precision
+ // float point types.
+ ARM64FeatureFP Feature = iota
+
+ // ARM64FeatureASIMD indicates support for Advanced SIMD with single
+ // and double precision float point arithmetic.
+ ARM64FeatureASIMD
+
+ // ARM64FeatureEVTSTRM indicates support for the generic timer
+ // configured to generate events at a frequency of approximately
+ // 100KHz.
+ ARM64FeatureEVTSTRM
+
+ // ARM64FeatureAES indicates support for AES instructions
+ // (AESE/AESD/AESMC/AESIMC).
+ ARM64FeatureAES
+
+ // ARM64FeaturePMULL indicates support for AES instructions
+ // (PMULL/PMULL2).
+ ARM64FeaturePMULL
+
+ // ARM64FeatureSHA1 indicates support for SHA1 instructions
+ // (SHA1C/SHA1P/SHA1M etc).
+ ARM64FeatureSHA1
+
+ // ARM64FeatureSHA2 indicates support for SHA2 instructions
+ // (SHA256H/SHA256H2/SHA256SU0 etc).
+ ARM64FeatureSHA2
+
+ // ARM64FeatureCRC32 indicates support for CRC32 instructions
+ // (CRC32B/CRC32H/CRC32W etc).
+ ARM64FeatureCRC32
+
+ // ARM64FeatureATOMICS indicates support for atomic instructions
+ // (LDADD/LDCLR/LDEOR/LDSET etc).
+ ARM64FeatureATOMICS
+
+ // ARM64FeatureFPHP indicates support for half precision float point
+ // arithmetic.
+ ARM64FeatureFPHP
+
+ // ARM64FeatureASIMDHP indicates support for ASIMD with half precision
+ // float point arithmetic.
+ ARM64FeatureASIMDHP
+
+ // ARM64FeatureCPUID indicates support for EL0 access to certain ID
+ // registers is available.
+ ARM64FeatureCPUID
+
+ // ARM64FeatureASIMDRDM indicates support for SQRDMLAH and SQRDMLSH
+ // instructions.
+ ARM64FeatureASIMDRDM
+
+ // ARM64FeatureJSCVT indicates support for the FJCVTZS instruction.
+ ARM64FeatureJSCVT
+
+ // ARM64FeatureFCMA indicates support for the FCMLA and FCADD
+ // instructions.
+ ARM64FeatureFCMA
+
+ // ARM64FeatureLRCPC indicates support for the LDAPRB/LDAPRH/LDAPR
+ // instructions.
+ ARM64FeatureLRCPC
+
+ // ARM64FeatureDCPOP indicates support for DC instruction (DC CVAP).
+ ARM64FeatureDCPOP
+
+ // ARM64FeatureSHA3 indicates support for SHA3 instructions
+ // (EOR3/RAX1/XAR/BCAX).
+ ARM64FeatureSHA3
+
+ // ARM64FeatureSM3 indicates support for SM3 instructions
+ // (SM3SS1/SM3TT1A/SM3TT1B).
+ ARM64FeatureSM3
+
+ // ARM64FeatureSM4 indicates support for SM4 instructions
+ // (SM4E/SM4EKEY).
+ ARM64FeatureSM4
+
+ // ARM64FeatureASIMDDP indicates support for dot product instructions
+ // (UDOT/SDOT).
+ ARM64FeatureASIMDDP
+
+ // ARM64FeatureSHA512 indicates support for SHA2 instructions
+ // (SHA512H/SHA512H2/SHA512SU0).
+ ARM64FeatureSHA512
+
+ // ARM64FeatureSVE indicates support for Scalable Vector Extension.
+ ARM64FeatureSVE
+
+ // ARM64FeatureASIMDFHM indicates support for FMLAL and FMLSL
+ // instructions.
+ ARM64FeatureASIMDFHM
+)
+
+// ELF auxiliary vector tags
+const (
+ _AT_NULL = 0 // End of vector
+ _AT_HWCAP = 16 // hardware capability bit vector
+ _AT_HWCAP2 = 26 // hardware capability bit vector 2
+)
+
+// These should not be changed after they are initialized.
+var hwCap uint
+
+// To make emulation of /proc/cpuinfo easy, these names match the names of the
+// basic features in Linux defined in arch/arm64/kernel/cpuinfo.c.
+var arm64FeatureStrings = map[Feature]string{
+ ARM64FeatureFP: "fp",
+ ARM64FeatureASIMD: "asimd",
+ ARM64FeatureEVTSTRM: "evtstrm",
+ ARM64FeatureAES: "aes",
+ ARM64FeaturePMULL: "pmull",
+ ARM64FeatureSHA1: "sha1",
+ ARM64FeatureSHA2: "sha2",
+ ARM64FeatureCRC32: "crc32",
+ ARM64FeatureATOMICS: "atomics",
+ ARM64FeatureFPHP: "fphp",
+ ARM64FeatureASIMDHP: "asimdhp",
+ ARM64FeatureCPUID: "cpuid",
+ ARM64FeatureASIMDRDM: "asimdrdm",
+ ARM64FeatureJSCVT: "jscvt",
+ ARM64FeatureFCMA: "fcma",
+ ARM64FeatureLRCPC: "lrcpc",
+ ARM64FeatureDCPOP: "dcpop",
+ ARM64FeatureSHA3: "sha3",
+ ARM64FeatureSM3: "sm3",
+ ARM64FeatureSM4: "sm4",
+ ARM64FeatureASIMDDP: "asimddp",
+ ARM64FeatureSHA512: "sha512",
+ ARM64FeatureSVE: "sve",
+ ARM64FeatureASIMDFHM: "asimdfhm",
+}
+
+var (
+ cpuFreqMHz float64
+ cpuImplHex uint64
+ cpuArchDec uint64
+ cpuVarHex uint64
+ cpuPartHex uint64
+ cpuRevDec uint64
+)
+
+// arm64FeaturesFromString includes features from arm64FeatureStrings.
+var arm64FeaturesFromString = make(map[string]Feature)
+
+// FeatureFromString returns the Feature associated with the given feature
+// string plus a bool to indicate if it could find the feature.
+func FeatureFromString(s string) (Feature, bool) {
+ f, b := arm64FeaturesFromString[s]
+ return f, b
+}
+
+// String implements fmt.Stringer.
+func (f Feature) String() string {
+ if s := f.flagString(); s != "" {
+ return s
+ }
+
+ return fmt.Sprintf("<cpuflag %d>", f)
+}
+
+func (f Feature) flagString() string {
+ if s, ok := arm64FeatureStrings[f]; ok {
+ return s
+ }
+
+ return ""
+}
+
+// FeatureSet is a set of Features for a CPU.
+//
+// +stateify savable
+type FeatureSet struct {
+ // Set is the set of features that are enabled in this FeatureSet.
+ Set map[Feature]bool
+
+ // CPUImplementer is part of the processor signature.
+ CPUImplementer uint8
+
+ // CPUArchitecture is part of the processor signature.
+ CPUArchitecture uint8
+
+ // CPUVariant is part of the processor signature.
+ CPUVariant uint8
+
+ // CPUPartnum is part of the processor signature.
+ CPUPartnum uint16
+
+ // CPURevision is part of the processor signature.
+ CPURevision uint8
+}
+
+// CheckHostCompatible returns nil if fs is a subset of the host feature set.
+// Noop on arm64.
+func (fs *FeatureSet) CheckHostCompatible() error {
+ return nil
+}
+
+// ExtendedStateSize returns the number of bytes needed to save the "extended
+// state" for this processor and the boundary it must be aligned to. Extended
+// state includes floating point(NEON) registers, and other cpu state that's not
+// associated with the normal task context.
+func (fs *FeatureSet) ExtendedStateSize() (size, align uint) {
+ // ARMv8 provide 32x128bits NEON registers.
+ //
+ // Ref arch/arm64/include/uapi/asm/ptrace.h
+ // struct user_fpsimd_state {
+ // __uint128_t vregs[32];
+ // __u32 fpsr;
+ // __u32 fpcr;
+ // __u32 __reserved[2];
+ // };
+ return 528, 16
+}
+
+// HasFeature tests whether or not a feature is in the given feature set.
+func (fs *FeatureSet) HasFeature(feature Feature) bool {
+ return fs.Set[feature]
+}
+
+// UseXsave returns true if 'fs' supports the "xsave" instruction.
+//
+// Irrelevant on arm64.
+func (fs *FeatureSet) UseXsave() bool {
+ return false
+}
+
+// FlagsString prints out supported CPU "flags" field in /proc/cpuinfo.
+func (fs *FeatureSet) FlagsString() string {
+ var s []string
+ for f, _ := range arm64FeatureStrings {
+ if fs.Set[f] {
+ if fstr := f.flagString(); fstr != "" {
+ s = append(s, fstr)
+ }
+ }
+ }
+ return strings.Join(s, " ")
+}
+
+// WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is
+// a minimal /proc/cpuinfo, and the bogomips field is simply made up.
+func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) {
+ fmt.Fprintf(b, "processor\t: %d\n", cpu)
+ fmt.Fprintf(b, "BogoMIPS\t: %.02f\n", cpuFreqMHz) // It's bogus anyway.
+ fmt.Fprintf(b, "Features\t\t: %s\n", fs.FlagsString())
+ fmt.Fprintf(b, "CPU implementer\t: 0x%x\n", cpuImplHex)
+ fmt.Fprintf(b, "CPU architecture\t: %d\n", cpuArchDec)
+ fmt.Fprintf(b, "CPU variant\t: 0x%x\n", cpuVarHex)
+ fmt.Fprintf(b, "CPU part\t: 0x%x\n", cpuPartHex)
+ fmt.Fprintf(b, "CPU revision\t: %d\n", cpuRevDec)
+ fmt.Fprintln(b, "") // The /proc/cpuinfo file ends with an extra newline.
+}
+
+// HostFeatureSet uses hwCap to get host values and construct a feature set
+// that matches that of the host machine.
+func HostFeatureSet() *FeatureSet {
+ s := make(map[Feature]bool)
+
+ for f, _ := range arm64FeatureStrings {
+ if hwCap&(1<<f) != 0 {
+ s[f] = true
+ }
+ }
+
+ return &FeatureSet{
+ Set: s,
+ CPUImplementer: uint8(cpuImplHex),
+ CPUArchitecture: uint8(cpuArchDec),
+ CPUVariant: uint8(cpuVarHex),
+ CPUPartnum: uint16(cpuPartHex),
+ CPURevision: uint8(cpuRevDec),
+ }
+}
+
+// Reads bogomips from host /proc/cpuinfo. Must run before whitelisting.
+// This value is used to create the fake /proc/cpuinfo from a FeatureSet.
+func initCPUInfo() {
+ cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo")
+ if err != nil {
+ // Leave it as 0. The standalone VDSO bails out in the same way.
+ log.Warningf("Could not read /proc/cpuinfo: %v", err)
+ return
+ }
+ cpuinfo := string(cpuinfob)
+
+ // We get the value straight from host /proc/cpuinfo.
+ for _, line := range strings.Split(cpuinfo, "\n") {
+ switch {
+ case strings.Contains(line, "BogoMIPS"):
+ {
+ splitMHz := strings.Split(line, ":")
+ if len(splitMHz) < 2 {
+ log.Warningf("Could not read /proc/cpuinfo: malformed BogoMIPS")
+ break
+ }
+
+ // If there was a problem, leave cpuFreqMHz as 0.
+ var err error
+ cpuFreqMHz, err = strconv.ParseFloat(strings.TrimSpace(splitMHz[1]), 64)
+ if err != nil {
+ log.Warningf("Could not parse BogoMIPS value %v: %v", splitMHz[1], err)
+ cpuFreqMHz = 0
+ }
+ }
+ case strings.Contains(line, "CPU implementer"):
+ {
+ splitImpl := strings.Split(line, ":")
+ if len(splitImpl) < 2 {
+ log.Warningf("Could not read /proc/cpuinfo: malformed CPU implementer")
+ break
+ }
+
+ // If there was a problem, leave cpuImplHex as 0.
+ var err error
+ cpuImplHex, err = strconv.ParseUint(strings.TrimSpace(splitImpl[1]), 0, 64)
+ if err != nil {
+ log.Warningf("Could not parse CPU implementer value %v: %v", splitImpl[1], err)
+ cpuImplHex = 0
+ }
+ }
+ case strings.Contains(line, "CPU architecture"):
+ {
+ splitArch := strings.Split(line, ":")
+ if len(splitArch) < 2 {
+ log.Warningf("Could not read /proc/cpuinfo: malformed CPU architecture")
+ break
+ }
+
+ // If there was a problem, leave cpuArchDec as 0.
+ var err error
+ cpuArchDec, err = strconv.ParseUint(strings.TrimSpace(splitArch[1]), 0, 64)
+ if err != nil {
+ log.Warningf("Could not parse CPU architecture value %v: %v", splitArch[1], err)
+ cpuArchDec = 0
+ }
+ }
+ case strings.Contains(line, "CPU variant"):
+ {
+ splitVar := strings.Split(line, ":")
+ if len(splitVar) < 2 {
+ log.Warningf("Could not read /proc/cpuinfo: malformed CPU variant")
+ break
+ }
+
+ // If there was a problem, leave cpuVarHex as 0.
+ var err error
+ cpuVarHex, err = strconv.ParseUint(strings.TrimSpace(splitVar[1]), 0, 64)
+ if err != nil {
+ log.Warningf("Could not parse CPU variant value %v: %v", splitVar[1], err)
+ cpuVarHex = 0
+ }
+ }
+ case strings.Contains(line, "CPU part"):
+ {
+ splitPart := strings.Split(line, ":")
+ if len(splitPart) < 2 {
+ log.Warningf("Could not read /proc/cpuinfo: malformed CPU part")
+ break
+ }
+
+ // If there was a problem, leave cpuPartHex as 0.
+ var err error
+ cpuPartHex, err = strconv.ParseUint(strings.TrimSpace(splitPart[1]), 0, 64)
+ if err != nil {
+ log.Warningf("Could not parse CPU part value %v: %v", splitPart[1], err)
+ cpuPartHex = 0
+ }
+ }
+ case strings.Contains(line, "CPU revision"):
+ {
+ splitRev := strings.Split(line, ":")
+ if len(splitRev) < 2 {
+ log.Warningf("Could not read /proc/cpuinfo: malformed CPU revision")
+ break
+ }
+
+ // If there was a problem, leave cpuRevDec as 0.
+ var err error
+ cpuRevDec, err = strconv.ParseUint(strings.TrimSpace(splitRev[1]), 0, 64)
+ if err != nil {
+ log.Warningf("Could not parse CPU revision value %v: %v", splitRev[1], err)
+ cpuRevDec = 0
+ }
+ }
+ }
+ }
+}
+
+// The auxiliary vector of a process on the Linux system can be read
+// from /proc/self/auxv, and tags and values are stored as 8-bytes
+// decimal key-value pairs on the 64-bit system.
+//
+// $ od -t d8 /proc/self/auxv
+// 0000000 33 140734615224320
+// 0000020 16 3219913727
+// 0000040 6 4096
+// 0000060 17 100
+// 0000100 3 94665627353152
+// 0000120 4 56
+// 0000140 5 9
+// 0000160 7 140425502162944
+// 0000200 8 0
+// 0000220 9 94665627365760
+// 0000240 11 1000
+// 0000260 12 1000
+// 0000300 13 1000
+// 0000320 14 1000
+// 0000340 23 0
+// 0000360 25 140734614619513
+// 0000400 26 0
+// 0000420 31 140734614626284
+// 0000440 15 140734614619529
+// 0000460 0 0
+func initHwCap() {
+ auxv, err := ioutil.ReadFile("/proc/self/auxv")
+ if err != nil {
+ log.Warningf("Could not read /proc/self/auxv: %v", err)
+ return
+ }
+
+ l := len(auxv) / 16
+ for i := 0; i < l; i++ {
+ tag := binary.LittleEndian.Uint64(auxv[i*16:])
+ val := binary.LittleEndian.Uint64(auxv[(i*16 + 8):])
+ if tag == _AT_HWCAP {
+ hwCap = uint(val)
+ break
+ }
+ }
+}
+
+func initFeaturesFromString() {
+ for f, s := range arm64FeatureStrings {
+ arm64FeaturesFromString[s] = f
+ }
+}
+
+func init() {
+ initCPUInfo()
+ initHwCap()
+ initFeaturesFromString()
+}
diff --git a/pkg/cpuid/cpuid_arm64_test.go b/pkg/cpuid/cpuid_arm64_test.go
new file mode 100644
index 000000000..a34f67779
--- /dev/null
+++ b/pkg/cpuid/cpuid_arm64_test.go
@@ -0,0 +1,55 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build arm64
+
+package cpuid
+
+import (
+ "testing"
+)
+
+var justFP = &FeatureSet{
+ Set: map[Feature]bool{
+ ARM64FeatureFP: true,
+ }}
+
+func TestHostFeatureSet(t *testing.T) {
+ hostFeatures := HostFeatureSet()
+ if len(hostFeatures.Set) == 0 {
+ t.Errorf("Got invalid feature set %v from HostFeatureSet()", hostFeatures)
+ }
+}
+
+func TestHasFeature(t *testing.T) {
+ if !justFP.HasFeature(ARM64FeatureFP) {
+ t.Errorf("HasFeature failed, %v should contain %v", justFP, ARM64FeatureFP)
+ }
+
+ if justFP.HasFeature(ARM64FeatureSM3) {
+ t.Errorf("HasFeature failed, %v should not contain %v", justFP, ARM64FeatureSM3)
+ }
+}
+
+func TestFeatureFromString(t *testing.T) {
+ f, ok := FeatureFromString("asimd")
+ if f != ARM64FeatureASIMD || !ok {
+ t.Errorf("got %v want asimd", f)
+ }
+
+ f, ok = FeatureFromString("bad")
+ if ok {
+ t.Errorf("got %v want nothing", f)
+ }
+}
diff --git a/pkg/cpuid/cpuid_parse_test.go b/pkg/cpuid/cpuid_parse_x86_test.go
index dd9969db4..d48418e69 100644
--- a/pkg/cpuid/cpuid_parse_test.go
+++ b/pkg/cpuid/cpuid_parse_x86_test.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// +build i386 amd64
+
package cpuid
import (
diff --git a/pkg/cpuid/cpuid_x86.go b/pkg/cpuid/cpuid_x86.go
new file mode 100644
index 000000000..333ca0a04
--- /dev/null
+++ b/pkg/cpuid/cpuid_x86.go
@@ -0,0 +1,1100 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build i386 amd64
+
+package cpuid
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "strconv"
+ "strings"
+
+ "gvisor.dev/gvisor/pkg/log"
+)
+
+// Common references for CPUID leaves and bits:
+//
+// Intel:
+// * Intel SDM Volume 2, Chapter 3.2 "CPUID" (more up-to-date)
+// * Intel Application Note 485 (more detailed)
+//
+// AMD:
+// * AMD64 APM Volume 3, Appendix 3 "Obtaining Processor Information ..."
+
+// block is a collection of 32 Feature bits.
+type block int
+
+const blockSize = 32
+
+// Feature bits are numbered according to "blocks". Each block is 32 bits, and
+// feature bits from the same source (cpuid leaf/level) are in the same block.
+func featureID(b block, bit int) Feature {
+ return Feature(32*int(b) + bit)
+}
+
+// Block 0 constants are all of the "basic" feature bits returned by a cpuid in
+// ecx with eax=1.
+const (
+ X86FeatureSSE3 Feature = iota
+ X86FeaturePCLMULDQ
+ X86FeatureDTES64
+ X86FeatureMONITOR
+ X86FeatureDSCPL
+ X86FeatureVMX
+ X86FeatureSMX
+ X86FeatureEST
+ X86FeatureTM2
+ X86FeatureSSSE3 // Not a typo, "supplemental" SSE3.
+ X86FeatureCNXTID
+ X86FeatureSDBG
+ X86FeatureFMA
+ X86FeatureCX16
+ X86FeatureXTPR
+ X86FeaturePDCM
+ _ // ecx bit 16 is reserved.
+ X86FeaturePCID
+ X86FeatureDCA
+ X86FeatureSSE4_1
+ X86FeatureSSE4_2
+ X86FeatureX2APIC
+ X86FeatureMOVBE
+ X86FeaturePOPCNT
+ X86FeatureTSCD
+ X86FeatureAES
+ X86FeatureXSAVE
+ X86FeatureOSXSAVE
+ X86FeatureAVX
+ X86FeatureF16C
+ X86FeatureRDRAND
+ _ // ecx bit 31 is reserved.
+)
+
+// Block 1 constants are all of the "basic" feature bits returned by a cpuid in
+// edx with eax=1.
+const (
+ X86FeatureFPU Feature = 32 + iota
+ X86FeatureVME
+ X86FeatureDE
+ X86FeaturePSE
+ X86FeatureTSC
+ X86FeatureMSR
+ X86FeaturePAE
+ X86FeatureMCE
+ X86FeatureCX8
+ X86FeatureAPIC
+ _ // edx bit 10 is reserved.
+ X86FeatureSEP
+ X86FeatureMTRR
+ X86FeaturePGE
+ X86FeatureMCA
+ X86FeatureCMOV
+ X86FeaturePAT
+ X86FeaturePSE36
+ X86FeaturePSN
+ X86FeatureCLFSH
+ _ // edx bit 20 is reserved.
+ X86FeatureDS
+ X86FeatureACPI
+ X86FeatureMMX
+ X86FeatureFXSR
+ X86FeatureSSE
+ X86FeatureSSE2
+ X86FeatureSS
+ X86FeatureHTT
+ X86FeatureTM
+ X86FeatureIA64
+ X86FeaturePBE
+)
+
+// Block 2 bits are the "structured extended" features returned in ebx for
+// eax=7, ecx=0.
+const (
+ X86FeatureFSGSBase Feature = 2*32 + iota
+ X86FeatureTSC_ADJUST
+ _ // ebx bit 2 is reserved.
+ X86FeatureBMI1
+ X86FeatureHLE
+ X86FeatureAVX2
+ X86FeatureFDP_EXCPTN_ONLY
+ X86FeatureSMEP
+ X86FeatureBMI2
+ X86FeatureERMS
+ X86FeatureINVPCID
+ X86FeatureRTM
+ X86FeatureCQM
+ X86FeatureFPCSDS
+ X86FeatureMPX
+ X86FeatureRDT
+ X86FeatureAVX512F
+ X86FeatureAVX512DQ
+ X86FeatureRDSEED
+ X86FeatureADX
+ X86FeatureSMAP
+ X86FeatureAVX512IFMA
+ X86FeaturePCOMMIT
+ X86FeatureCLFLUSHOPT
+ X86FeatureCLWB
+ X86FeatureIPT // Intel processor trace.
+ X86FeatureAVX512PF
+ X86FeatureAVX512ER
+ X86FeatureAVX512CD
+ X86FeatureSHA
+ X86FeatureAVX512BW
+ X86FeatureAVX512VL
+)
+
+// Block 3 bits are the "extended" features returned in ecx for eax=7, ecx=0.
+const (
+ X86FeaturePREFETCHWT1 Feature = 3*32 + iota
+ X86FeatureAVX512VBMI
+ X86FeatureUMIP
+ X86FeaturePKU
+ X86FeatureOSPKE
+ X86FeatureWAITPKG
+ X86FeatureAVX512_VBMI2
+ _ // ecx bit 7 is reserved
+ X86FeatureGFNI
+ X86FeatureVAES
+ X86FeatureVPCLMULQDQ
+ X86FeatureAVX512_VNNI
+ X86FeatureAVX512_BITALG
+ X86FeatureTME
+ X86FeatureAVX512_VPOPCNTDQ
+ _ // ecx bit 15 is reserved
+ X86FeatureLA57
+ // ecx bits 17-21 are reserved
+ _
+ _
+ _
+ _
+ _
+ X86FeatureRDPID
+ // ecx bits 23-24 are reserved
+ _
+ _
+ X86FeatureCLDEMOTE
+ _ // ecx bit 26 is reserved
+ X86FeatureMOVDIRI
+ X86FeatureMOVDIR64B
+)
+
+// Block 4 constants are for xsave capabilities in CPUID.(EAX=0DH,ECX=01H):EAX.
+// The CPUID leaf is available only if 'X86FeatureXSAVE' is present.
+const (
+ X86FeatureXSAVEOPT Feature = 4*32 + iota
+ X86FeatureXSAVEC
+ X86FeatureXGETBV1
+ X86FeatureXSAVES
+ // EAX[31:4] are reserved.
+)
+
+// Block 5 constants are the extended feature bits in
+// CPUID.(EAX=0x80000001):ECX.
+const (
+ X86FeatureLAHF64 Feature = 5*32 + iota
+ X86FeatureCMP_LEGACY
+ X86FeatureSVM
+ X86FeatureEXTAPIC
+ X86FeatureCR8_LEGACY
+ X86FeatureLZCNT
+ X86FeatureSSE4A
+ X86FeatureMISALIGNSSE
+ X86FeaturePREFETCHW
+ X86FeatureOSVW
+ X86FeatureIBS
+ X86FeatureXOP
+ X86FeatureSKINIT
+ X86FeatureWDT
+ _ // ecx bit 14 is reserved.
+ X86FeatureLWP
+ X86FeatureFMA4
+ X86FeatureTCE
+ _ // ecx bit 18 is reserved.
+ _ // ecx bit 19 is reserved.
+ _ // ecx bit 20 is reserved.
+ X86FeatureTBM
+ X86FeatureTOPOLOGY
+ X86FeaturePERFCTR_CORE
+ X86FeaturePERFCTR_NB
+ _ // ecx bit 25 is reserved.
+ X86FeatureBPEXT
+ X86FeaturePERFCTR_TSC
+ X86FeaturePERFCTR_LLC
+ X86FeatureMWAITX
+ // ECX[31:30] are reserved.
+)
+
+// Block 6 constants are the extended feature bits in
+// CPUID.(EAX=0x80000001):EDX.
+//
+// These are sparse, and so the bit positions are assigned manually.
+const (
+ // On AMD, EDX[24:23] | EDX[17:12] | EDX[9:0] are duplicate features
+ // also defined in block 1 (in identical bit positions). Those features
+ // are not listed here.
+ block6DuplicateMask = 0x183f3ff
+
+ X86FeatureSYSCALL Feature = 6*32 + 11
+ X86FeatureNX Feature = 6*32 + 20
+ X86FeatureMMXEXT Feature = 6*32 + 22
+ X86FeatureFXSR_OPT Feature = 6*32 + 25
+ X86FeatureGBPAGES Feature = 6*32 + 26
+ X86FeatureRDTSCP Feature = 6*32 + 27
+ X86FeatureLM Feature = 6*32 + 29
+ X86Feature3DNOWEXT Feature = 6*32 + 30
+ X86Feature3DNOW Feature = 6*32 + 31
+)
+
+// linuxBlockOrder defines the order in which linux organizes the feature
+// blocks. Linux also tracks feature bits in 32-bit blocks, but in an order
+// which doesn't match well here, so for the /proc/cpuinfo generation we simply
+// re-map the blocks to Linux's ordering and then go through the bits in each
+// block.
+var linuxBlockOrder = []block{1, 6, 0, 5, 2, 4, 3}
+
+// To make emulation of /proc/cpuinfo easy, these names match the names of the
+// basic features in Linux defined in arch/x86/kernel/cpu/capflags.c.
+var x86FeatureStrings = map[Feature]string{
+ // Block 0.
+ X86FeatureSSE3: "pni",
+ X86FeaturePCLMULDQ: "pclmulqdq",
+ X86FeatureDTES64: "dtes64",
+ X86FeatureMONITOR: "monitor",
+ X86FeatureDSCPL: "ds_cpl",
+ X86FeatureVMX: "vmx",
+ X86FeatureSMX: "smx",
+ X86FeatureEST: "est",
+ X86FeatureTM2: "tm2",
+ X86FeatureSSSE3: "ssse3",
+ X86FeatureCNXTID: "cid",
+ X86FeatureSDBG: "sdbg",
+ X86FeatureFMA: "fma",
+ X86FeatureCX16: "cx16",
+ X86FeatureXTPR: "xtpr",
+ X86FeaturePDCM: "pdcm",
+ X86FeaturePCID: "pcid",
+ X86FeatureDCA: "dca",
+ X86FeatureSSE4_1: "sse4_1",
+ X86FeatureSSE4_2: "sse4_2",
+ X86FeatureX2APIC: "x2apic",
+ X86FeatureMOVBE: "movbe",
+ X86FeaturePOPCNT: "popcnt",
+ X86FeatureTSCD: "tsc_deadline_timer",
+ X86FeatureAES: "aes",
+ X86FeatureXSAVE: "xsave",
+ X86FeatureAVX: "avx",
+ X86FeatureF16C: "f16c",
+ X86FeatureRDRAND: "rdrand",
+
+ // Block 1.
+ X86FeatureFPU: "fpu",
+ X86FeatureVME: "vme",
+ X86FeatureDE: "de",
+ X86FeaturePSE: "pse",
+ X86FeatureTSC: "tsc",
+ X86FeatureMSR: "msr",
+ X86FeaturePAE: "pae",
+ X86FeatureMCE: "mce",
+ X86FeatureCX8: "cx8",
+ X86FeatureAPIC: "apic",
+ X86FeatureSEP: "sep",
+ X86FeatureMTRR: "mtrr",
+ X86FeaturePGE: "pge",
+ X86FeatureMCA: "mca",
+ X86FeatureCMOV: "cmov",
+ X86FeaturePAT: "pat",
+ X86FeaturePSE36: "pse36",
+ X86FeaturePSN: "pn",
+ X86FeatureCLFSH: "clflush",
+ X86FeatureDS: "dts",
+ X86FeatureACPI: "acpi",
+ X86FeatureMMX: "mmx",
+ X86FeatureFXSR: "fxsr",
+ X86FeatureSSE: "sse",
+ X86FeatureSSE2: "sse2",
+ X86FeatureSS: "ss",
+ X86FeatureHTT: "ht",
+ X86FeatureTM: "tm",
+ X86FeatureIA64: "ia64",
+ X86FeaturePBE: "pbe",
+
+ // Block 2.
+ X86FeatureFSGSBase: "fsgsbase",
+ X86FeatureTSC_ADJUST: "tsc_adjust",
+ X86FeatureBMI1: "bmi1",
+ X86FeatureHLE: "hle",
+ X86FeatureAVX2: "avx2",
+ X86FeatureSMEP: "smep",
+ X86FeatureBMI2: "bmi2",
+ X86FeatureERMS: "erms",
+ X86FeatureINVPCID: "invpcid",
+ X86FeatureRTM: "rtm",
+ X86FeatureCQM: "cqm",
+ X86FeatureMPX: "mpx",
+ X86FeatureRDT: "rdt_a",
+ X86FeatureAVX512F: "avx512f",
+ X86FeatureAVX512DQ: "avx512dq",
+ X86FeatureRDSEED: "rdseed",
+ X86FeatureADX: "adx",
+ X86FeatureSMAP: "smap",
+ X86FeatureCLWB: "clwb",
+ X86FeatureAVX512PF: "avx512pf",
+ X86FeatureAVX512ER: "avx512er",
+ X86FeatureAVX512CD: "avx512cd",
+ X86FeatureSHA: "sha_ni",
+ X86FeatureAVX512BW: "avx512bw",
+ X86FeatureAVX512VL: "avx512vl",
+
+ // Block 3.
+ X86FeatureAVX512VBMI: "avx512vbmi",
+ X86FeatureUMIP: "umip",
+ X86FeaturePKU: "pku",
+ X86FeatureOSPKE: "ospke",
+ X86FeatureWAITPKG: "waitpkg",
+ X86FeatureAVX512_VBMI2: "avx512_vbmi2",
+ X86FeatureGFNI: "gfni",
+ X86FeatureVAES: "vaes",
+ X86FeatureVPCLMULQDQ: "vpclmulqdq",
+ X86FeatureAVX512_VNNI: "avx512_vnni",
+ X86FeatureAVX512_BITALG: "avx512_bitalg",
+ X86FeatureTME: "tme",
+ X86FeatureAVX512_VPOPCNTDQ: "avx512_vpopcntdq",
+ X86FeatureLA57: "la57",
+ X86FeatureRDPID: "rdpid",
+ X86FeatureCLDEMOTE: "cldemote",
+ X86FeatureMOVDIRI: "movdiri",
+ X86FeatureMOVDIR64B: "movdir64b",
+
+ // Block 4.
+ X86FeatureXSAVEOPT: "xsaveopt",
+ X86FeatureXSAVEC: "xsavec",
+ X86FeatureXGETBV1: "xgetbv1",
+ X86FeatureXSAVES: "xsaves",
+
+ // Block 5.
+ X86FeatureLAHF64: "lahf_lm", // LAHF/SAHF in long mode
+ X86FeatureCMP_LEGACY: "cmp_legacy",
+ X86FeatureSVM: "svm",
+ X86FeatureEXTAPIC: "extapic",
+ X86FeatureCR8_LEGACY: "cr8_legacy",
+ X86FeatureLZCNT: "abm", // Advanced bit manipulation
+ X86FeatureSSE4A: "sse4a",
+ X86FeatureMISALIGNSSE: "misalignsse",
+ X86FeaturePREFETCHW: "3dnowprefetch",
+ X86FeatureOSVW: "osvw",
+ X86FeatureIBS: "ibs",
+ X86FeatureXOP: "xop",
+ X86FeatureSKINIT: "skinit",
+ X86FeatureWDT: "wdt",
+ X86FeatureLWP: "lwp",
+ X86FeatureFMA4: "fma4",
+ X86FeatureTCE: "tce",
+ X86FeatureTBM: "tbm",
+ X86FeatureTOPOLOGY: "topoext",
+ X86FeaturePERFCTR_CORE: "perfctr_core",
+ X86FeaturePERFCTR_NB: "perfctr_nb",
+ X86FeatureBPEXT: "bpext",
+ X86FeaturePERFCTR_TSC: "ptsc",
+ X86FeaturePERFCTR_LLC: "perfctr_llc",
+ X86FeatureMWAITX: "mwaitx",
+
+ // Block 6.
+ X86FeatureSYSCALL: "syscall",
+ X86FeatureNX: "nx",
+ X86FeatureMMXEXT: "mmxext",
+ X86FeatureFXSR_OPT: "fxsr_opt",
+ X86FeatureGBPAGES: "pdpe1gb",
+ X86FeatureRDTSCP: "rdtscp",
+ X86FeatureLM: "lm",
+ X86Feature3DNOWEXT: "3dnowext",
+ X86Feature3DNOW: "3dnow",
+}
+
+// These flags are parse only---they can be used for setting / unsetting the
+// flags, but will not get printed out in /proc/cpuinfo.
+var x86FeatureParseOnlyStrings = map[Feature]string{
+ // Block 0.
+ X86FeatureOSXSAVE: "osxsave",
+
+ // Block 2.
+ X86FeatureFDP_EXCPTN_ONLY: "fdp_excptn_only",
+ X86FeatureFPCSDS: "fpcsds",
+ X86FeatureIPT: "pt",
+ X86FeatureCLFLUSHOPT: "clfushopt",
+
+ // Block 3.
+ X86FeaturePREFETCHWT1: "prefetchwt1",
+}
+
+// intelCacheDescriptors describe the caches and TLBs on the system. They are
+// returned in the registers for eax=2. Intel only.
+type intelCacheDescriptor uint8
+
+// Valid cache/TLB descriptors. All descriptors can be found in Intel SDM Vol.
+// 2, Ch. 3.2, "CPUID", Table 3-12 "Encoding of CPUID Leaf 2 Descriptors".
+const (
+ intelNullDescriptor intelCacheDescriptor = 0
+ intelNoTLBDescriptor intelCacheDescriptor = 0xfe
+ intelNoCacheDescriptor intelCacheDescriptor = 0xff
+
+ // Most descriptors omitted for brevity as they are currently unused.
+)
+
+// CacheType describes the type of a cache, as returned in eax[4:0] for eax=4.
+type CacheType uint8
+
+const (
+ // cacheNull indicates that there are no more entries.
+ cacheNull CacheType = iota
+
+ // CacheData is a data cache.
+ CacheData
+
+ // CacheInstruction is an instruction cache.
+ CacheInstruction
+
+ // CacheUnified is a unified instruction and data cache.
+ CacheUnified
+)
+
+// Cache describes the parameters of a single cache on the system.
+//
+// +stateify savable
+type Cache struct {
+ // Level is the hierarchical level of this cache (L1, L2, etc).
+ Level uint32
+
+ // Type is the type of cache.
+ Type CacheType
+
+ // FullyAssociative indicates that entries may be placed in any block.
+ FullyAssociative bool
+
+ // Partitions is the number of physical partitions in the cache.
+ Partitions uint32
+
+ // Ways is the number of ways of associativity in the cache.
+ Ways uint32
+
+ // Sets is the number of sets in the cache.
+ Sets uint32
+
+ // InvalidateHierarchical indicates that WBINVD/INVD from threads
+ // sharing this cache acts upon lower level caches for threads sharing
+ // this cache.
+ InvalidateHierarchical bool
+
+ // Inclusive indicates that this cache is inclusive of lower cache
+ // levels.
+ Inclusive bool
+
+ // DirectMapped indicates that this cache is directly mapped from
+ // address, rather than using a hash function.
+ DirectMapped bool
+}
+
+// Just a way to wrap cpuid function numbers.
+type cpuidFunction uint32
+
+// The constants below are the lower or "standard" cpuid functions, ordered as
+// defined by the hardware.
+const (
+ vendorID cpuidFunction = iota // Returns vendor ID and largest standard function.
+ featureInfo // Returns basic feature bits and processor signature.
+ intelCacheDescriptors // Returns list of cache descriptors. Intel only.
+ intelSerialNumber // Returns processor serial number (obsolete on new hardware). Intel only.
+ intelDeterministicCacheParams // Returns deterministic cache information. Intel only.
+ monitorMwaitParams // Returns information about monitor/mwait instructions.
+ powerParams // Returns information about power management and thermal sensors.
+ extendedFeatureInfo // Returns extended feature bits.
+ _ // Function 0x8 is reserved.
+ intelDCAParams // Returns direct cache access information. Intel only.
+ intelPMCInfo // Returns information about performance monitoring features. Intel only.
+ intelX2APICInfo // Returns core/logical processor topology. Intel only.
+ _ // Function 0xc is reserved.
+ xSaveInfo // Returns information about extended state management.
+)
+
+// The "extended" functions start at 0x80000000.
+const (
+ extendedFunctionInfo cpuidFunction = 0x80000000 + iota // Returns highest available extended function in eax.
+ extendedFeatures // Returns some extended feature bits in edx and ecx.
+)
+
+// These are the extended floating point state features. They are used to
+// enumerate floating point features in XCR0, XSTATE_BV, etc.
+const (
+ XSAVEFeatureX87 = 1 << 0
+ XSAVEFeatureSSE = 1 << 1
+ XSAVEFeatureAVX = 1 << 2
+ XSAVEFeatureBNDREGS = 1 << 3
+ XSAVEFeatureBNDCSR = 1 << 4
+ XSAVEFeatureAVX512op = 1 << 5
+ XSAVEFeatureAVX512zmm0 = 1 << 6
+ XSAVEFeatureAVX512zmm16 = 1 << 7
+ XSAVEFeaturePKRU = 1 << 9
+)
+
+var cpuFreqMHz float64
+
+// x86FeaturesFromString includes features from x86FeatureStrings and
+// x86FeatureParseOnlyStrings.
+var x86FeaturesFromString = make(map[string]Feature)
+
+// FeatureFromString returns the Feature associated with the given feature
+// string plus a bool to indicate if it could find the feature.
+func FeatureFromString(s string) (Feature, bool) {
+ f, b := x86FeaturesFromString[s]
+ return f, b
+}
+
+// String implements fmt.Stringer.
+func (f Feature) String() string {
+ if s := f.flagString(false); s != "" {
+ return s
+ }
+
+ block := int(f) / 32
+ bit := int(f) % 32
+ return fmt.Sprintf("<cpuflag %d; block %d bit %d>", f, block, bit)
+}
+
+func (f Feature) flagString(cpuinfoOnly bool) string {
+ if s, ok := x86FeatureStrings[f]; ok {
+ return s
+ }
+ if !cpuinfoOnly {
+ return x86FeatureParseOnlyStrings[f]
+ }
+ return ""
+}
+
+// FeatureSet is a set of Features for a CPU.
+//
+// +stateify savable
+type FeatureSet struct {
+ // Set is the set of features that are enabled in this FeatureSet.
+ Set map[Feature]bool
+
+ // VendorID is the 12-char string returned in ebx:edx:ecx for eax=0.
+ VendorID string
+
+ // ExtendedFamily is part of the processor signature.
+ ExtendedFamily uint8
+
+ // ExtendedModel is part of the processor signature.
+ ExtendedModel uint8
+
+ // ProcessorType is part of the processor signature.
+ ProcessorType uint8
+
+ // Family is part of the processor signature.
+ Family uint8
+
+ // Model is part of the processor signature.
+ Model uint8
+
+ // SteppingID is part of the processor signature.
+ SteppingID uint8
+
+ // Caches describes the caches on the CPU.
+ Caches []Cache
+
+ // CacheLine is the size of a cache line in bytes.
+ //
+ // All caches use the same line size. This is not enforced in the CPUID
+ // encoding, but is true on all known x86 processors.
+ CacheLine uint32
+}
+
+// FlagsString prints out supported CPU flags. If cpuinfoOnly is true, it is
+// equivalent to the "flags" field in /proc/cpuinfo.
+func (fs *FeatureSet) FlagsString(cpuinfoOnly bool) string {
+ var s []string
+ for _, b := range linuxBlockOrder {
+ for i := 0; i < blockSize; i++ {
+ if f := featureID(b, i); fs.Set[f] {
+ if fstr := f.flagString(cpuinfoOnly); fstr != "" {
+ s = append(s, fstr)
+ }
+ }
+ }
+ }
+ return strings.Join(s, " ")
+}
+
+// WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is
+// a minimal /proc/cpuinfo, it is missing some fields like "microcode" that are
+// not always printed in Linux. The bogomips field is simply made up.
+func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) {
+ fmt.Fprintf(b, "processor\t: %d\n", cpu)
+ fmt.Fprintf(b, "vendor_id\t: %s\n", fs.VendorID)
+ fmt.Fprintf(b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family)
+ fmt.Fprintf(b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model)
+ fmt.Fprintf(b, "model name\t: %s\n", "unknown") // Unknown for now.
+ fmt.Fprintf(b, "stepping\t: %s\n", "unknown") // Unknown for now.
+ fmt.Fprintf(b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz)
+ fmt.Fprintln(b, "fpu\t\t: yes")
+ fmt.Fprintln(b, "fpu_exception\t: yes")
+ fmt.Fprintf(b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID.
+ fmt.Fprintln(b, "wp\t\t: yes")
+ fmt.Fprintf(b, "flags\t\t: %s\n", fs.FlagsString(true))
+ fmt.Fprintf(b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway.
+ fmt.Fprintf(b, "clflush size\t: %d\n", fs.CacheLine)
+ fmt.Fprintf(b, "cache_alignment\t: %d\n", fs.CacheLine)
+ fmt.Fprintf(b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48)
+ fmt.Fprintln(b, "power management:") // This is always here, but can be blank.
+ fmt.Fprintln(b, "") // The /proc/cpuinfo file ends with an extra newline.
+}
+
+const (
+ amdVendorID = "AuthenticAMD"
+ intelVendorID = "GenuineIntel"
+)
+
+// AMD returns true if fs describes an AMD CPU.
+func (fs *FeatureSet) AMD() bool {
+ return fs.VendorID == amdVendorID
+}
+
+// Intel returns true if fs describes an Intel CPU.
+func (fs *FeatureSet) Intel() bool {
+ return fs.VendorID == intelVendorID
+}
+
+// ErrIncompatible is returned by FeatureSet.HostCompatible if fs is not a
+// subset of the host feature set.
+type ErrIncompatible struct {
+ message string
+}
+
+// Error implements error.
+func (e ErrIncompatible) Error() string {
+ return e.message
+}
+
+// CheckHostCompatible returns nil if fs is a subset of the host feature set.
+func (fs *FeatureSet) CheckHostCompatible() error {
+ hfs := HostFeatureSet()
+
+ if diff := fs.Subtract(hfs); diff != nil {
+ return ErrIncompatible{fmt.Sprintf("CPU feature set %v incompatible with host feature set %v (missing: %v)", fs.FlagsString(false), hfs.FlagsString(false), diff)}
+ }
+
+ // The size of a cache line must match, as it is critical to correctly
+ // utilizing CLFLUSH. Other cache properties are allowed to change, as
+ // they are not important to correctness.
+ if fs.CacheLine != hfs.CacheLine {
+ return ErrIncompatible{fmt.Sprintf("CPU cache line size %d incompatible with host cache line size %d", fs.CacheLine, hfs.CacheLine)}
+ }
+
+ return nil
+}
+
+// Helper to convert 3 regs into 12-byte vendor ID.
+func vendorIDFromRegs(bx, cx, dx uint32) string {
+ bytes := make([]byte, 0, 12)
+ for i := uint(0); i < 4; i++ {
+ b := byte(bx >> (i * 8))
+ bytes = append(bytes, b)
+ }
+
+ for i := uint(0); i < 4; i++ {
+ b := byte(dx >> (i * 8))
+ bytes = append(bytes, b)
+ }
+
+ for i := uint(0); i < 4; i++ {
+ b := byte(cx >> (i * 8))
+ bytes = append(bytes, b)
+ }
+ return string(bytes)
+}
+
+// ExtendedStateSize returns the number of bytes needed to save the "extended
+// state" for this processor and the boundary it must be aligned to. Extended
+// state includes floating point registers, and other cpu state that's not
+// associated with the normal task context.
+//
+// Note: We can save some space here with an optimization where we use a
+// smaller chunk of memory depending on features that are actually enabled.
+// Currently we just use the largest possible size for simplicity (which is
+// about 2.5K worst case, with avx512).
+func (fs *FeatureSet) ExtendedStateSize() (size, align uint) {
+ if fs.UseXsave() {
+ // Leaf 0 of xsaveinfo function returns the size for currently
+ // enabled xsave features in ebx, the maximum size if all valid
+ // features are saved with xsave in ecx, and valid XCR0 bits in
+ // edx:eax.
+ _, _, maxSize, _ := HostID(uint32(xSaveInfo), 0)
+ return uint(maxSize), 64
+ }
+
+ // If we don't support xsave, we fall back to fxsave, which requires
+ // 512 bytes aligned to 16 bytes.
+ return 512, 16
+}
+
+// ValidXCR0Mask returns the bits that may be set to 1 in control register
+// XCR0.
+func (fs *FeatureSet) ValidXCR0Mask() uint64 {
+ if !fs.UseXsave() {
+ return 0
+ }
+ eax, _, _, edx := HostID(uint32(xSaveInfo), 0)
+ return uint64(edx)<<32 | uint64(eax)
+}
+
+// vendorIDRegs returns the 3 register values used to construct the 12-byte
+// vendor ID string for eax=0.
+func (fs *FeatureSet) vendorIDRegs() (bx, dx, cx uint32) {
+ for i := uint(0); i < 4; i++ {
+ bx |= uint32(fs.VendorID[i]) << (i * 8)
+ }
+
+ for i := uint(0); i < 4; i++ {
+ dx |= uint32(fs.VendorID[i+4]) << (i * 8)
+ }
+
+ for i := uint(0); i < 4; i++ {
+ cx |= uint32(fs.VendorID[i+8]) << (i * 8)
+ }
+ return
+}
+
+// signature returns the signature dword that's returned in eax when eax=1.
+func (fs *FeatureSet) signature() uint32 {
+ var s uint32
+ s |= uint32(fs.SteppingID & 0xf)
+ s |= uint32(fs.Model&0xf) << 4
+ s |= uint32(fs.Family&0xf) << 8
+ s |= uint32(fs.ProcessorType&0x3) << 12
+ s |= uint32(fs.ExtendedModel&0xf) << 16
+ s |= uint32(fs.ExtendedFamily&0xff) << 20
+ return s
+}
+
+// Helper to deconstruct signature dword.
+func signatureSplit(v uint32) (ef, em, pt, f, m, sid uint8) {
+ sid = uint8(v & 0xf)
+ m = uint8(v>>4) & 0xf
+ f = uint8(v>>8) & 0xf
+ pt = uint8(v>>12) & 0x3
+ em = uint8(v>>16) & 0xf
+ ef = uint8(v >> 20)
+ return
+}
+
+// Helper to convert blockwise feature bit masks into a set of features. Masks
+// must be provided in order for each block, without skipping them. If a block
+// does not matter for this feature set, 0 is specified.
+func setFromBlockMasks(blocks ...uint32) map[Feature]bool {
+ s := make(map[Feature]bool)
+ for b, blockMask := range blocks {
+ for i := 0; i < blockSize; i++ {
+ if blockMask&1 != 0 {
+ s[featureID(block(b), i)] = true
+ }
+ blockMask >>= 1
+ }
+ }
+ return s
+}
+
+// blockMask returns the 32-bit mask associated with a block of features.
+func (fs *FeatureSet) blockMask(b block) uint32 {
+ var mask uint32
+ for i := 0; i < blockSize; i++ {
+ if fs.Set[featureID(b, i)] {
+ mask |= 1 << uint(i)
+ }
+ }
+ return mask
+}
+
+// Remove removes a Feature from a FeatureSet. It ignores features
+// that are not in the FeatureSet.
+func (fs *FeatureSet) Remove(feature Feature) {
+ delete(fs.Set, feature)
+}
+
+// Add adds a Feature to a FeatureSet. It ignores duplicate features.
+func (fs *FeatureSet) Add(feature Feature) {
+ fs.Set[feature] = true
+}
+
+// HasFeature tests whether or not a feature is in the given feature set.
+func (fs *FeatureSet) HasFeature(feature Feature) bool {
+ return fs.Set[feature]
+}
+
+// Subtract returns the features present in fs that are not present in other.
+// If all features in fs are present in other, Subtract returns nil.
+func (fs *FeatureSet) Subtract(other *FeatureSet) (diff map[Feature]bool) {
+ for f := range fs.Set {
+ if !other.Set[f] {
+ if diff == nil {
+ diff = make(map[Feature]bool)
+ }
+ diff[f] = true
+ }
+ }
+
+ return
+}
+
+// EmulateID emulates a cpuid instruction based on the feature set.
+func (fs *FeatureSet) EmulateID(origAx, origCx uint32) (ax, bx, cx, dx uint32) {
+ switch cpuidFunction(origAx) {
+ case vendorID:
+ ax = uint32(xSaveInfo) // 0xd (xSaveInfo) is the highest function we support.
+ bx, dx, cx = fs.vendorIDRegs()
+ case featureInfo:
+ // CLFLUSH line size is encoded in quadwords. Other fields in bx unsupported.
+ bx = (fs.CacheLine / 8) << 8
+ cx = fs.blockMask(block(0))
+ dx = fs.blockMask(block(1))
+ ax = fs.signature()
+ case intelCacheDescriptors:
+ if !fs.Intel() {
+ // Reserved on non-Intel.
+ return 0, 0, 0, 0
+ }
+
+ // "The least-significant byte in register EAX (register AL)
+ // will always return 01H. Software should ignore this value
+ // and not interpret it as an informational descriptor." - SDM
+ //
+ // We only support reporting cache parameters via
+ // intelDeterministicCacheParams; report as much here.
+ //
+ // We do not support exposing TLB information at all.
+ ax = 1 | (uint32(intelNoCacheDescriptor) << 8)
+ case intelDeterministicCacheParams:
+ if !fs.Intel() {
+ // Reserved on non-Intel.
+ return 0, 0, 0, 0
+ }
+
+ // cx is the index of the cache to describe.
+ if int(origCx) >= len(fs.Caches) {
+ return uint32(cacheNull), 0, 0, 0
+ }
+ c := fs.Caches[origCx]
+
+ ax = uint32(c.Type)
+ ax |= c.Level << 5
+ ax |= 1 << 8 // Always claim the cache is "self-initializing".
+ if c.FullyAssociative {
+ ax |= 1 << 9
+ }
+ // Processor topology not supported.
+
+ bx = fs.CacheLine - 1
+ bx |= (c.Partitions - 1) << 12
+ bx |= (c.Ways - 1) << 22
+
+ cx = c.Sets - 1
+
+ if !c.InvalidateHierarchical {
+ dx |= 1
+ }
+ if c.Inclusive {
+ dx |= 1 << 1
+ }
+ if !c.DirectMapped {
+ dx |= 1 << 2
+ }
+ case xSaveInfo:
+ if !fs.UseXsave() {
+ return 0, 0, 0, 0
+ }
+ return HostID(uint32(xSaveInfo), origCx)
+ case extendedFeatureInfo:
+ if origCx != 0 {
+ break // Only leaf 0 is supported.
+ }
+ bx = fs.blockMask(block(2))
+ cx = fs.blockMask(block(3))
+ case extendedFunctionInfo:
+ // We only support showing the extended features.
+ ax = uint32(extendedFeatures)
+ cx = 0
+ case extendedFeatures:
+ cx = fs.blockMask(block(5))
+ dx = fs.blockMask(block(6))
+ if fs.AMD() {
+ // AMD duplicates some block 1 features in block 6.
+ dx |= fs.blockMask(block(1)) & block6DuplicateMask
+ }
+ }
+
+ return
+}
+
+// UseXsave returns the choice of fp state saving instruction.
+func (fs *FeatureSet) UseXsave() bool {
+ return fs.HasFeature(X86FeatureXSAVE) && fs.HasFeature(X86FeatureOSXSAVE)
+}
+
+// UseXsaveopt returns true if 'fs' supports the "xsaveopt" instruction.
+func (fs *FeatureSet) UseXsaveopt() bool {
+ return fs.UseXsave() && fs.HasFeature(X86FeatureXSAVEOPT)
+}
+
+// HostID executes a native CPUID instruction.
+func HostID(axArg, cxArg uint32) (ax, bx, cx, dx uint32)
+
+// HostFeatureSet uses cpuid to get host values and construct a feature set
+// that matches that of the host machine. Note that there are several places
+// where there appear to be some unnecessary assignments between register names
+// (ax, bx, cx, or dx) and featureBlockN variables. This is to explicitly show
+// where the different feature blocks come from, to make the code easier to
+// inspect and read.
+func HostFeatureSet() *FeatureSet {
+ // eax=0 gets max supported feature and vendor ID.
+ _, bx, cx, dx := HostID(0, 0)
+ vendorID := vendorIDFromRegs(bx, cx, dx)
+
+ // eax=1 gets basic features in ecx:edx.
+ ax, bx, cx, dx := HostID(1, 0)
+ featureBlock0 := cx
+ featureBlock1 := dx
+ ef, em, pt, f, m, sid := signatureSplit(ax)
+ cacheLine := 8 * (bx >> 8) & 0xff
+
+ // eax=4, ecx=i gets details about cache index i. Only supported on Intel.
+ var caches []Cache
+ if vendorID == intelVendorID {
+ // ecx selects the cache index until a null type is returned.
+ for i := uint32(0); ; i++ {
+ ax, bx, cx, dx := HostID(4, i)
+ t := CacheType(ax & 0xf)
+ if t == cacheNull {
+ break
+ }
+
+ lineSize := (bx & 0xfff) + 1
+ if lineSize != cacheLine {
+ panic(fmt.Sprintf("Mismatched cache line size: %d vs %d", lineSize, cacheLine))
+ }
+
+ caches = append(caches, Cache{
+ Type: t,
+ Level: (ax >> 5) & 0x7,
+ FullyAssociative: ((ax >> 9) & 1) == 1,
+ Partitions: ((bx >> 12) & 0x3ff) + 1,
+ Ways: ((bx >> 22) & 0x3ff) + 1,
+ Sets: cx + 1,
+ InvalidateHierarchical: (dx & 1) == 0,
+ Inclusive: ((dx >> 1) & 1) == 1,
+ DirectMapped: ((dx >> 2) & 1) == 0,
+ })
+ }
+ }
+
+ // eax=7, ecx=0 gets extended features in ecx:ebx.
+ _, bx, cx, _ = HostID(7, 0)
+ featureBlock2 := bx
+ featureBlock3 := cx
+
+ // Leaf 0xd is supported only if CPUID.1:ECX.XSAVE[bit 26] is set.
+ var featureBlock4 uint32
+ if (featureBlock0 & (1 << 26)) != 0 {
+ featureBlock4, _, _, _ = HostID(uint32(xSaveInfo), 1)
+ }
+
+ // eax=0x80000000 gets supported extended levels. We use this to
+ // determine if there are any non-zero block 4 or block 6 bits to find.
+ var featureBlock5, featureBlock6 uint32
+ if ax, _, _, _ := HostID(uint32(extendedFunctionInfo), 0); ax >= uint32(extendedFeatures) {
+ // eax=0x80000001 gets AMD added feature bits.
+ _, _, cx, dx = HostID(uint32(extendedFeatures), 0)
+ featureBlock5 = cx
+ // Ignore features duplicated from block 1 on AMD. These bits
+ // are reserved on Intel.
+ featureBlock6 = dx &^ block6DuplicateMask
+ }
+
+ set := setFromBlockMasks(featureBlock0, featureBlock1, featureBlock2, featureBlock3, featureBlock4, featureBlock5, featureBlock6)
+ return &FeatureSet{
+ Set: set,
+ VendorID: vendorID,
+ ExtendedFamily: ef,
+ ExtendedModel: em,
+ ProcessorType: pt,
+ Family: f,
+ Model: m,
+ SteppingID: sid,
+ CacheLine: cacheLine,
+ Caches: caches,
+ }
+}
+
+// Reads max cpu frequency from host /proc/cpuinfo. Must run before
+// whitelisting. This value is used to create the fake /proc/cpuinfo from a
+// FeatureSet.
+func initCPUFreq() {
+ cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo")
+ if err != nil {
+ // Leave it as 0... The standalone VDSO bails out in the same
+ // way.
+ log.Warningf("Could not read /proc/cpuinfo: %v", err)
+ return
+ }
+ cpuinfo := string(cpuinfob)
+
+ // We get the value straight from host /proc/cpuinfo. On machines with
+ // frequency scaling enabled, this will only get the current value
+ // which will likely be inaccurate. This is fine on machines with
+ // frequency scaling disabled.
+ for _, line := range strings.Split(cpuinfo, "\n") {
+ if strings.Contains(line, "cpu MHz") {
+ splitMHz := strings.Split(line, ":")
+ if len(splitMHz) < 2 {
+ log.Warningf("Could not read /proc/cpuinfo: malformed cpu MHz line")
+ return
+ }
+
+ // If there was a problem, leave cpuFreqMHz as 0.
+ var err error
+ cpuFreqMHz, err = strconv.ParseFloat(strings.TrimSpace(splitMHz[1]), 64)
+ if err != nil {
+ log.Warningf("Could not parse cpu MHz value %v: %v", splitMHz[1], err)
+ cpuFreqMHz = 0
+ return
+ }
+ return
+ }
+ }
+ log.Warningf("Could not parse /proc/cpuinfo, it is empty or does not contain cpu MHz")
+}
+
+func initFeaturesFromString() {
+ for f, s := range x86FeatureStrings {
+ x86FeaturesFromString[s] = f
+ }
+ for f, s := range x86FeatureParseOnlyStrings {
+ x86FeaturesFromString[s] = f
+ }
+}
+
+func init() {
+ // initCpuFreq must be run before whitelists are enabled.
+ initCPUFreq()
+ initFeaturesFromString()
+}
diff --git a/pkg/cpuid/cpuid_test.go b/pkg/cpuid/cpuid_x86_test.go
index a707ebb55..0fe20c213 100644
--- a/pkg/cpuid/cpuid_test.go
+++ b/pkg/cpuid/cpuid_x86_test.go
@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+// +build i386 amd64
+
package cpuid
import (
diff --git a/pkg/p9/client_file.go b/pkg/p9/client_file.go
index 0254e4ccc..2ee07b664 100644
--- a/pkg/p9/client_file.go
+++ b/pkg/p9/client_file.go
@@ -194,6 +194,39 @@ func (c *clientFile) SetXattr(name, value string, flags uint32) error {
return c.client.sendRecv(&Tsetxattr{FID: c.fid, Name: name, Value: value, Flags: flags}, &Rsetxattr{})
}
+// ListXattr implements File.ListXattr.
+func (c *clientFile) ListXattr(size uint64) (map[string]struct{}, error) {
+ if atomic.LoadUint32(&c.closed) != 0 {
+ return nil, syscall.EBADF
+ }
+ if !versionSupportsListRemoveXattr(c.client.version) {
+ return nil, syscall.EOPNOTSUPP
+ }
+
+ rlistxattr := Rlistxattr{}
+ if err := c.client.sendRecv(&Tlistxattr{FID: c.fid, Size: size}, &rlistxattr); err != nil {
+ return nil, err
+ }
+
+ xattrs := make(map[string]struct{}, len(rlistxattr.Xattrs))
+ for _, x := range rlistxattr.Xattrs {
+ xattrs[x] = struct{}{}
+ }
+ return xattrs, nil
+}
+
+// RemoveXattr implements File.RemoveXattr.
+func (c *clientFile) RemoveXattr(name string) error {
+ if atomic.LoadUint32(&c.closed) != 0 {
+ return syscall.EBADF
+ }
+ if !versionSupportsListRemoveXattr(c.client.version) {
+ return syscall.EOPNOTSUPP
+ }
+
+ return c.client.sendRecv(&Tremovexattr{FID: c.fid, Name: name}, &Rremovexattr{})
+}
+
// Allocate implements File.Allocate.
func (c *clientFile) Allocate(mode AllocateMode, offset, length uint64) error {
if atomic.LoadUint32(&c.closed) != 0 {
diff --git a/pkg/p9/file.go b/pkg/p9/file.go
index 4607cfcdf..d4ffbc8e3 100644
--- a/pkg/p9/file.go
+++ b/pkg/p9/file.go
@@ -105,6 +105,22 @@ type File interface {
// TODO(b/127675828): Determine concurrency guarantees once implemented.
SetXattr(name, value string, flags uint32) error
+ // ListXattr lists the names of the extended attributes on this node.
+ //
+ // Size indicates the size of the buffer that has been allocated to hold the
+ // attribute list. If the list would be larger than size, implementations may
+ // return ERANGE to indicate that the buffer is too small, but they are also
+ // free to ignore the hint entirely (i.e. the value returned may be larger
+ // than size). All size checking is done independently at the syscall layer.
+ //
+ // TODO(b/148303075): Determine concurrency guarantees once implemented.
+ ListXattr(size uint64) (map[string]struct{}, error)
+
+ // RemoveXattr removes extended attributes on this node.
+ //
+ // TODO(b/148303075): Determine concurrency guarantees once implemented.
+ RemoveXattr(name string) error
+
// Allocate allows the caller to directly manipulate the allocated disk space
// for the file. See fallocate(2) for more details.
Allocate(mode AllocateMode, offset, length uint64) error
diff --git a/pkg/p9/handlers.go b/pkg/p9/handlers.go
index 7d6653a07..2ac45eb80 100644
--- a/pkg/p9/handlers.go
+++ b/pkg/p9/handlers.go
@@ -942,6 +942,39 @@ func (t *Tsetxattr) handle(cs *connState) message {
}
// handle implements handler.handle.
+func (t *Tlistxattr) handle(cs *connState) message {
+ ref, ok := cs.LookupFID(t.FID)
+ if !ok {
+ return newErr(syscall.EBADF)
+ }
+ defer ref.DecRef()
+
+ xattrs, err := ref.file.ListXattr(t.Size)
+ if err != nil {
+ return newErr(err)
+ }
+ xattrList := make([]string, 0, len(xattrs))
+ for x := range xattrs {
+ xattrList = append(xattrList, x)
+ }
+ return &Rlistxattr{Xattrs: xattrList}
+}
+
+// handle implements handler.handle.
+func (t *Tremovexattr) handle(cs *connState) message {
+ ref, ok := cs.LookupFID(t.FID)
+ if !ok {
+ return newErr(syscall.EBADF)
+ }
+ defer ref.DecRef()
+
+ if err := ref.file.RemoveXattr(t.Name); err != nil {
+ return newErr(err)
+ }
+ return &Rremovexattr{}
+}
+
+// handle implements handler.handle.
func (t *Treaddir) handle(cs *connState) message {
ref, ok := cs.LookupFID(t.Directory)
if !ok {
diff --git a/pkg/p9/messages.go b/pkg/p9/messages.go
index ceb723d86..b1cede5f5 100644
--- a/pkg/p9/messages.go
+++ b/pkg/p9/messages.go
@@ -174,11 +174,11 @@ type Rflush struct {
}
// Decode implements encoder.Decode.
-func (*Rflush) Decode(b *buffer) {
+func (*Rflush) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rflush) Encode(b *buffer) {
+func (*Rflush) Encode(*buffer) {
}
// Type implements message.Type.
@@ -188,7 +188,7 @@ func (*Rflush) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rflush) String() string {
- return fmt.Sprintf("RFlush{}")
+ return "RFlush{}"
}
// Twalk is a walk request.
@@ -300,11 +300,11 @@ type Rclunk struct {
}
// Decode implements encoder.Decode.
-func (*Rclunk) Decode(b *buffer) {
+func (*Rclunk) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rclunk) Encode(b *buffer) {
+func (*Rclunk) Encode(*buffer) {
}
// Type implements message.Type.
@@ -314,7 +314,7 @@ func (*Rclunk) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rclunk) String() string {
- return fmt.Sprintf("Rclunk{}")
+ return "Rclunk{}"
}
// Tremove is a remove request.
@@ -350,11 +350,11 @@ type Rremove struct {
}
// Decode implements encoder.Decode.
-func (*Rremove) Decode(b *buffer) {
+func (*Rremove) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rremove) Encode(b *buffer) {
+func (*Rremove) Encode(*buffer) {
}
// Type implements message.Type.
@@ -364,7 +364,7 @@ func (*Rremove) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rremove) String() string {
- return fmt.Sprintf("Rremove{}")
+ return "Rremove{}"
}
// Rlerror is an error response.
@@ -745,16 +745,16 @@ func (*Rlink) Type() MsgType {
}
// Decode implements encoder.Decode.
-func (*Rlink) Decode(b *buffer) {
+func (*Rlink) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rlink) Encode(b *buffer) {
+func (*Rlink) Encode(*buffer) {
}
// String implements fmt.Stringer.
func (r *Rlink) String() string {
- return fmt.Sprintf("Rlink{}")
+ return "Rlink{}"
}
// Trenameat is a rename request.
@@ -803,11 +803,11 @@ type Rrenameat struct {
}
// Decode implements encoder.Decode.
-func (*Rrenameat) Decode(b *buffer) {
+func (*Rrenameat) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rrenameat) Encode(b *buffer) {
+func (*Rrenameat) Encode(*buffer) {
}
// Type implements message.Type.
@@ -817,7 +817,7 @@ func (*Rrenameat) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rrenameat) String() string {
- return fmt.Sprintf("Rrenameat{}")
+ return "Rrenameat{}"
}
// Tunlinkat is an unlink request.
@@ -861,11 +861,11 @@ type Runlinkat struct {
}
// Decode implements encoder.Decode.
-func (*Runlinkat) Decode(b *buffer) {
+func (*Runlinkat) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Runlinkat) Encode(b *buffer) {
+func (*Runlinkat) Encode(*buffer) {
}
// Type implements message.Type.
@@ -875,7 +875,7 @@ func (*Runlinkat) Type() MsgType {
// String implements fmt.Stringer.
func (r *Runlinkat) String() string {
- return fmt.Sprintf("Runlinkat{}")
+ return "Runlinkat{}"
}
// Trename is a rename request.
@@ -922,11 +922,11 @@ type Rrename struct {
}
// Decode implements encoder.Decode.
-func (*Rrename) Decode(b *buffer) {
+func (*Rrename) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rrename) Encode(b *buffer) {
+func (*Rrename) Encode(*buffer) {
}
// Type implements message.Type.
@@ -936,7 +936,7 @@ func (*Rrename) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rrename) String() string {
- return fmt.Sprintf("Rrename{}")
+ return "Rrename{}"
}
// Treadlink is a readlink request.
@@ -1409,11 +1409,11 @@ type Rsetattr struct {
}
// Decode implements encoder.Decode.
-func (*Rsetattr) Decode(b *buffer) {
+func (*Rsetattr) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rsetattr) Encode(b *buffer) {
+func (*Rsetattr) Encode(*buffer) {
}
// Type implements message.Type.
@@ -1423,7 +1423,7 @@ func (*Rsetattr) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rsetattr) String() string {
- return fmt.Sprintf("Rsetattr{}")
+ return "Rsetattr{}"
}
// Tallocate is an allocate request. This is an extension to 9P protocol, not
@@ -1466,11 +1466,11 @@ type Rallocate struct {
}
// Decode implements encoder.Decode.
-func (*Rallocate) Decode(b *buffer) {
+func (*Rallocate) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rallocate) Encode(b *buffer) {
+func (*Rallocate) Encode(*buffer) {
}
// Type implements message.Type.
@@ -1480,7 +1480,71 @@ func (*Rallocate) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rallocate) String() string {
- return fmt.Sprintf("Rallocate{}")
+ return "Rallocate{}"
+}
+
+// Tlistxattr is a listxattr request.
+type Tlistxattr struct {
+ // FID refers to the file on which to list xattrs.
+ FID FID
+
+ // Size is the buffer size for the xattr list.
+ Size uint64
+}
+
+// Decode implements encoder.Decode.
+func (t *Tlistxattr) Decode(b *buffer) {
+ t.FID = b.ReadFID()
+ t.Size = b.Read64()
+}
+
+// Encode implements encoder.Encode.
+func (t *Tlistxattr) Encode(b *buffer) {
+ b.WriteFID(t.FID)
+ b.Write64(t.Size)
+}
+
+// Type implements message.Type.
+func (*Tlistxattr) Type() MsgType {
+ return MsgTlistxattr
+}
+
+// String implements fmt.Stringer.
+func (t *Tlistxattr) String() string {
+ return fmt.Sprintf("Tlistxattr{FID: %d, Size: %d}", t.FID, t.Size)
+}
+
+// Rlistxattr is a listxattr response.
+type Rlistxattr struct {
+ // Xattrs is a list of extended attribute names.
+ Xattrs []string
+}
+
+// Decode implements encoder.Decode.
+func (r *Rlistxattr) Decode(b *buffer) {
+ n := b.Read16()
+ r.Xattrs = r.Xattrs[:0]
+ for i := 0; i < int(n); i++ {
+ r.Xattrs = append(r.Xattrs, b.ReadString())
+ }
+}
+
+// Encode implements encoder.Encode.
+func (r *Rlistxattr) Encode(b *buffer) {
+ b.Write16(uint16(len(r.Xattrs)))
+ for _, x := range r.Xattrs {
+ b.WriteString(x)
+ }
+}
+
+// Type implements message.Type.
+func (*Rlistxattr) Type() MsgType {
+ return MsgRlistxattr
+}
+
+// String implements fmt.Stringer.
+func (r *Rlistxattr) String() string {
+ return fmt.Sprintf("Rlistxattr{Xattrs: %v}", r.Xattrs)
}
// Txattrwalk walks extended attributes.
@@ -1594,11 +1658,11 @@ type Rxattrcreate struct {
}
// Decode implements encoder.Decode.
-func (r *Rxattrcreate) Decode(b *buffer) {
+func (r *Rxattrcreate) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (r *Rxattrcreate) Encode(b *buffer) {
+func (r *Rxattrcreate) Encode(*buffer) {
}
// Type implements message.Type.
@@ -1608,7 +1672,7 @@ func (*Rxattrcreate) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rxattrcreate) String() string {
- return fmt.Sprintf("Rxattrcreate{}")
+ return "Rxattrcreate{}"
}
// Tgetxattr is a getxattr request.
@@ -1719,11 +1783,11 @@ type Rsetxattr struct {
}
// Decode implements encoder.Decode.
-func (r *Rsetxattr) Decode(b *buffer) {
+func (r *Rsetxattr) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (r *Rsetxattr) Encode(b *buffer) {
+func (r *Rsetxattr) Encode(*buffer) {
}
// Type implements message.Type.
@@ -1733,7 +1797,60 @@ func (*Rsetxattr) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rsetxattr) String() string {
- return fmt.Sprintf("Rsetxattr{}")
+ return "Rsetxattr{}"
+}
+
+// Tremovexattr is a removexattr request.
+type Tremovexattr struct {
+ // FID refers to the file on which to set xattrs.
+ FID FID
+
+ // Name is the attribute name.
+ Name string
+}
+
+// Decode implements encoder.Decode.
+func (t *Tremovexattr) Decode(b *buffer) {
+ t.FID = b.ReadFID()
+ t.Name = b.ReadString()
+}
+
+// Encode implements encoder.Encode.
+func (t *Tremovexattr) Encode(b *buffer) {
+ b.WriteFID(t.FID)
+ b.WriteString(t.Name)
+}
+
+// Type implements message.Type.
+func (*Tremovexattr) Type() MsgType {
+ return MsgTremovexattr
+}
+
+// String implements fmt.Stringer.
+func (t *Tremovexattr) String() string {
+ return fmt.Sprintf("Tremovexattr{FID: %d, Name: %s}", t.FID, t.Name)
+}
+
+// Rremovexattr is a removexattr response.
+type Rremovexattr struct {
+}
+
+// Decode implements encoder.Decode.
+func (r *Rremovexattr) Decode(*buffer) {
+}
+
+// Encode implements encoder.Encode.
+func (r *Rremovexattr) Encode(*buffer) {
+}
+
+// Type implements message.Type.
+func (*Rremovexattr) Type() MsgType {
+ return MsgRremovexattr
+}
+
+// String implements fmt.Stringer.
+func (r *Rremovexattr) String() string {
+ return "Rremovexattr{}"
}
// Treaddir is a readdir request.
@@ -1880,11 +1997,11 @@ type Rfsync struct {
}
// Decode implements encoder.Decode.
-func (*Rfsync) Decode(b *buffer) {
+func (*Rfsync) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rfsync) Encode(b *buffer) {
+func (*Rfsync) Encode(*buffer) {
}
// Type implements message.Type.
@@ -1894,7 +2011,7 @@ func (*Rfsync) Type() MsgType {
// String implements fmt.Stringer.
func (r *Rfsync) String() string {
- return fmt.Sprintf("Rfsync{}")
+ return "Rfsync{}"
}
// Tstatfs is a stat request.
@@ -1980,11 +2097,11 @@ type Rflushf struct {
}
// Decode implements encoder.Decode.
-func (*Rflushf) Decode(b *buffer) {
+func (*Rflushf) Decode(*buffer) {
}
// Encode implements encoder.Encode.
-func (*Rflushf) Encode(b *buffer) {
+func (*Rflushf) Encode(*buffer) {
}
// Type implements message.Type.
@@ -1994,7 +2111,7 @@ func (*Rflushf) Type() MsgType {
// String implements fmt.Stringer.
func (*Rflushf) String() string {
- return fmt.Sprintf("Rflushf{}")
+ return "Rflushf{}"
}
// Twalkgetattr is a walk request.
@@ -2484,6 +2601,8 @@ func init() {
msgRegistry.register(MsgRgetattr, func() message { return &Rgetattr{} })
msgRegistry.register(MsgTsetattr, func() message { return &Tsetattr{} })
msgRegistry.register(MsgRsetattr, func() message { return &Rsetattr{} })
+ msgRegistry.register(MsgTlistxattr, func() message { return &Tlistxattr{} })
+ msgRegistry.register(MsgRlistxattr, func() message { return &Rlistxattr{} })
msgRegistry.register(MsgTxattrwalk, func() message { return &Txattrwalk{} })
msgRegistry.register(MsgRxattrwalk, func() message { return &Rxattrwalk{} })
msgRegistry.register(MsgTxattrcreate, func() message { return &Txattrcreate{} })
@@ -2492,6 +2611,8 @@ func init() {
msgRegistry.register(MsgRgetxattr, func() message { return &Rgetxattr{} })
msgRegistry.register(MsgTsetxattr, func() message { return &Tsetxattr{} })
msgRegistry.register(MsgRsetxattr, func() message { return &Rsetxattr{} })
+ msgRegistry.register(MsgTremovexattr, func() message { return &Tremovexattr{} })
+ msgRegistry.register(MsgRremovexattr, func() message { return &Rremovexattr{} })
msgRegistry.register(MsgTreaddir, func() message { return &Treaddir{} })
msgRegistry.register(MsgRreaddir, func() message { return &Rreaddir{} })
msgRegistry.register(MsgTfsync, func() message { return &Tfsync{} })
diff --git a/pkg/p9/p9.go b/pkg/p9/p9.go
index 5ab00d625..20ab31f7a 100644
--- a/pkg/p9/p9.go
+++ b/pkg/p9/p9.go
@@ -335,6 +335,8 @@ const (
MsgRgetattr = 25
MsgTsetattr = 26
MsgRsetattr = 27
+ MsgTlistxattr = 28
+ MsgRlistxattr = 29
MsgTxattrwalk = 30
MsgRxattrwalk = 31
MsgTxattrcreate = 32
@@ -343,6 +345,8 @@ const (
MsgRgetxattr = 35
MsgTsetxattr = 36
MsgRsetxattr = 37
+ MsgTremovexattr = 38
+ MsgRremovexattr = 39
MsgTreaddir = 40
MsgRreaddir = 41
MsgTfsync = 50
diff --git a/pkg/p9/version.go b/pkg/p9/version.go
index 34a15eb55..09cde9f5a 100644
--- a/pkg/p9/version.go
+++ b/pkg/p9/version.go
@@ -26,7 +26,7 @@ const (
//
// Clients are expected to start requesting this version number and
// to continuously decrement it until a Tversion request succeeds.
- highestSupportedVersion uint32 = 10
+ highestSupportedVersion uint32 = 11
// lowestSupportedVersion is the lowest supported version X in a
// version string of the format 9P2000.L.Google.X.
@@ -167,3 +167,9 @@ func VersionSupportsOpenTruncateFlag(v uint32) bool {
func versionSupportsGetSetXattr(v uint32) bool {
return v >= 10
}
+
+// versionSupportsListRemoveXattr returns true if version v supports
+// the Tlistxattr and Tremovexattr messages.
+func versionSupportsListRemoveXattr(v uint32) bool {
+ return v >= 11
+}
diff --git a/pkg/seccomp/BUILD b/pkg/seccomp/BUILD
index 742c8b79b..c5fca2ba3 100644
--- a/pkg/seccomp/BUILD
+++ b/pkg/seccomp/BUILD
@@ -26,7 +26,7 @@ go_library(
"seccomp_rules.go",
"seccomp_unsafe.go",
],
- visibility = ["//visibility:public"],
+ visibility = ["//:sandbox"],
deps = [
"//pkg/abi/linux",
"//pkg/bpf",
diff --git a/pkg/sentry/BUILD b/pkg/sentry/BUILD
index e8b794179..e759dc36f 100644
--- a/pkg/sentry/BUILD
+++ b/pkg/sentry/BUILD
@@ -1,13 +1,11 @@
-# This BUILD file defines a package_group that allows for interdependencies for
-# sentry-internal packages.
-
package(licenses = ["notice"])
+# The "internal" package_group should be used as much as possible by packages
+# that should remain Sentry-internal (i.e. not be exposed directly to command
+# line tooling or APIs).
package_group(
name = "internal",
packages = [
- "//cloud/gvisor/gopkg/sentry/...",
- "//cloud/gvisor/sentry/...",
"//pkg/sentry/...",
"//runsc/...",
# Code generated by go_marshal relies on go_marshal libraries.
diff --git a/pkg/sentry/arch/BUILD b/pkg/sentry/arch/BUILD
index 34c0a867d..e27f21e5e 100644
--- a/pkg/sentry/arch/BUILD
+++ b/pkg/sentry/arch/BUILD
@@ -14,6 +14,7 @@ go_library(
"arch_state_aarch64.go",
"arch_state_x86.go",
"arch_x86.go",
+ "arch_x86_impl.go",
"auxv.go",
"signal.go",
"signal_act.go",
diff --git a/pkg/sentry/arch/arch_amd64.s b/pkg/sentry/arch/arch_amd64.s
index bd61402cf..6c10336e7 100644
--- a/pkg/sentry/arch/arch_amd64.s
+++ b/pkg/sentry/arch/arch_amd64.s
@@ -26,10 +26,11 @@
//
// func initX86FPState(data *FloatingPointData, useXsave bool)
//
-// We need to clear out and initialize an empty fp state area since the sentry
-// may have left sensitive information in the floating point registers.
+// We need to clear out and initialize an empty fp state area since the sentry,
+// or any previous loader, may have left sensitive information in the floating
+// point registers.
//
-// Preconditions: data is zeroed
+// Preconditions: data is zeroed.
TEXT ·initX86FPState(SB), $24-16
// Save MXCSR (callee-save)
STMXCSR mxcsr-8(SP)
diff --git a/pkg/sentry/arch/arch_state_x86.go b/pkg/sentry/arch/arch_state_x86.go
index d388ee9cf..e35c9214a 100644
--- a/pkg/sentry/arch/arch_state_x86.go
+++ b/pkg/sentry/arch/arch_state_x86.go
@@ -43,8 +43,8 @@ func (e ErrFloatingPoint) Error() string {
// and SSE state, so this is the equivalent XSTATE_BV value.
const fxsaveBV uint64 = cpuid.XSAVEFeatureX87 | cpuid.XSAVEFeatureSSE
-// afterLoad is invoked by stateify.
-func (s *State) afterLoad() {
+// afterLoadFPState is invoked by afterLoad.
+func (s *State) afterLoadFPState() {
old := s.x86FPState
// Recreate the slice. This is done to ensure that it is aligned
diff --git a/pkg/sentry/arch/arch_x86.go b/pkg/sentry/arch/arch_x86.go
index 3db8bd34b..88b40a9d1 100644
--- a/pkg/sentry/arch/arch_x86.go
+++ b/pkg/sentry/arch/arch_x86.go
@@ -155,21 +155,6 @@ func NewFloatingPointData() *FloatingPointData {
return (*FloatingPointData)(&(newX86FPState()[0]))
}
-// State contains the common architecture bits for X86 (the build tag of this
-// file ensures it's only built on x86).
-//
-// +stateify savable
-type State struct {
- // The system registers.
- Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"`
-
- // Our floating point state.
- x86FPState `state:"wait"`
-
- // FeatureSet is a pointer to the currently active feature set.
- FeatureSet *cpuid.FeatureSet
-}
-
// Proto returns a protobuf representation of the system registers in State.
func (s State) Proto() *rpb.Registers {
regs := &rpb.AMD64Registers{
diff --git a/pkg/sentry/arch/arch_x86_impl.go b/pkg/sentry/arch/arch_x86_impl.go
new file mode 100644
index 000000000..04ac283c6
--- /dev/null
+++ b/pkg/sentry/arch/arch_x86_impl.go
@@ -0,0 +1,43 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build amd64 i386
+
+package arch
+
+import (
+ "syscall"
+
+ "gvisor.dev/gvisor/pkg/cpuid"
+)
+
+// State contains the common architecture bits for X86 (the build tag of this
+// file ensures it's only built on x86).
+//
+// +stateify savable
+type State struct {
+ // The system registers.
+ Regs syscall.PtraceRegs `state:".(syscallPtraceRegs)"`
+
+ // Our floating point state.
+ x86FPState `state:"wait"`
+
+ // FeatureSet is a pointer to the currently active feature set.
+ FeatureSet *cpuid.FeatureSet
+}
+
+// afterLoad is invoked by stateify.
+func (s *State) afterLoad() {
+ s.afterLoadFPState()
+}
diff --git a/pkg/sentry/fs/copy_up.go b/pkg/sentry/fs/copy_up.go
index f6c79e51b..b060a12ff 100644
--- a/pkg/sentry/fs/copy_up.go
+++ b/pkg/sentry/fs/copy_up.go
@@ -401,7 +401,7 @@ func copyAttributesLocked(ctx context.Context, upper *Inode, lower *Inode) error
if err != nil {
return err
}
- lowerXattr, err := lower.ListXattr(ctx)
+ lowerXattr, err := lower.ListXattr(ctx, linux.XATTR_SIZE_MAX)
if err != nil && err != syserror.EOPNOTSUPP {
return err
}
diff --git a/pkg/sentry/fs/fsutil/inode.go b/pkg/sentry/fs/fsutil/inode.go
index 252830572..daecc4ffe 100644
--- a/pkg/sentry/fs/fsutil/inode.go
+++ b/pkg/sentry/fs/fsutil/inode.go
@@ -247,7 +247,7 @@ func (i *InodeSimpleExtendedAttributes) SetXattr(_ context.Context, _ *fs.Inode,
}
// ListXattr implements fs.InodeOperations.ListXattr.
-func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) {
+func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) {
i.mu.RLock()
names := make(map[string]struct{}, len(i.xattrs))
for name := range i.xattrs {
@@ -257,6 +257,17 @@ func (i *InodeSimpleExtendedAttributes) ListXattr(context.Context, *fs.Inode) (m
return names, nil
}
+// RemoveXattr implements fs.InodeOperations.RemoveXattr.
+func (i *InodeSimpleExtendedAttributes) RemoveXattr(_ context.Context, _ *fs.Inode, name string) error {
+ i.mu.RLock()
+ defer i.mu.RUnlock()
+ if _, ok := i.xattrs[name]; ok {
+ delete(i.xattrs, name)
+ return nil
+ }
+ return syserror.ENOATTR
+}
+
// staticFile is a file with static contents. It is returned by
// InodeStaticFileGetter.GetFile.
//
@@ -460,10 +471,15 @@ func (InodeNoExtendedAttributes) SetXattr(context.Context, *fs.Inode, string, st
}
// ListXattr implements fs.InodeOperations.ListXattr.
-func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) {
+func (InodeNoExtendedAttributes) ListXattr(context.Context, *fs.Inode, uint64) (map[string]struct{}, error) {
return nil, syserror.EOPNOTSUPP
}
+// RemoveXattr implements fs.InodeOperations.RemoveXattr.
+func (InodeNoExtendedAttributes) RemoveXattr(context.Context, *fs.Inode, string) error {
+ return syserror.EOPNOTSUPP
+}
+
// InodeNoopRelease implements fs.InodeOperations.Release as a noop.
type InodeNoopRelease struct{}
diff --git a/pkg/sentry/fs/gofer/attr.go b/pkg/sentry/fs/gofer/attr.go
index 71cccdc34..6db4b762d 100644
--- a/pkg/sentry/fs/gofer/attr.go
+++ b/pkg/sentry/fs/gofer/attr.go
@@ -88,8 +88,9 @@ func bsize(pattr p9.Attr) int64 {
if pattr.BlockSize > 0 {
return int64(pattr.BlockSize)
}
- // Some files may have no clue of their block size. Better not to report
- // something misleading or buggy and have a safe default.
+ // Some files, particularly those that are not on a local file system,
+ // may have no clue of their block size. Better not to report something
+ // misleading or buggy and have a safe default.
return usermem.PageSize
}
@@ -149,6 +150,7 @@ func links(valid p9.AttrMask, pattr p9.Attr) uint64 {
}
// This node is likely backed by a file system that doesn't support links.
+ //
// We could readdir() and count children directories to provide an accurate
// link count. However this may be expensive since the gofer may be backed by remote
// storage. Instead, simply return 2 links for directories and 1 for everything else
diff --git a/pkg/sentry/fs/gofer/context_file.go b/pkg/sentry/fs/gofer/context_file.go
index 3da818aed..125907d70 100644
--- a/pkg/sentry/fs/gofer/context_file.go
+++ b/pkg/sentry/fs/gofer/context_file.go
@@ -73,6 +73,20 @@ func (c *contextFile) setXattr(ctx context.Context, name, value string, flags ui
return err
}
+func (c *contextFile) listXattr(ctx context.Context, size uint64) (map[string]struct{}, error) {
+ ctx.UninterruptibleSleepStart(false)
+ xattrs, err := c.file.ListXattr(size)
+ ctx.UninterruptibleSleepFinish(false)
+ return xattrs, err
+}
+
+func (c *contextFile) removeXattr(ctx context.Context, name string) error {
+ ctx.UninterruptibleSleepStart(false)
+ err := c.file.RemoveXattr(name)
+ ctx.UninterruptibleSleepFinish(false)
+ return err
+}
+
func (c *contextFile) allocate(ctx context.Context, mode p9.AllocateMode, offset, length uint64) error {
ctx.UninterruptibleSleepStart(false)
err := c.file.Allocate(mode, offset, length)
diff --git a/pkg/sentry/fs/gofer/inode.go b/pkg/sentry/fs/gofer/inode.go
index ac28174d2..1c934981b 100644
--- a/pkg/sentry/fs/gofer/inode.go
+++ b/pkg/sentry/fs/gofer/inode.go
@@ -604,18 +604,23 @@ func (i *inodeOperations) Truncate(ctx context.Context, inode *fs.Inode, length
}
// GetXattr implements fs.InodeOperations.GetXattr.
-func (i *inodeOperations) GetXattr(ctx context.Context, inode *fs.Inode, name string, size uint64) (string, error) {
+func (i *inodeOperations) GetXattr(ctx context.Context, _ *fs.Inode, name string, size uint64) (string, error) {
return i.fileState.file.getXattr(ctx, name, size)
}
// SetXattr implements fs.InodeOperations.SetXattr.
-func (i *inodeOperations) SetXattr(ctx context.Context, inode *fs.Inode, name string, value string, flags uint32) error {
+func (i *inodeOperations) SetXattr(ctx context.Context, _ *fs.Inode, name string, value string, flags uint32) error {
return i.fileState.file.setXattr(ctx, name, value, flags)
}
// ListXattr implements fs.InodeOperations.ListXattr.
-func (i *inodeOperations) ListXattr(context.Context, *fs.Inode) (map[string]struct{}, error) {
- return nil, syscall.EOPNOTSUPP
+func (i *inodeOperations) ListXattr(ctx context.Context, _ *fs.Inode, size uint64) (map[string]struct{}, error) {
+ return i.fileState.file.listXattr(ctx, size)
+}
+
+// RemoveXattr implements fs.InodeOperations.RemoveXattr.
+func (i *inodeOperations) RemoveXattr(ctx context.Context, _ *fs.Inode, name string) error {
+ return i.fileState.file.removeXattr(ctx, name)
}
// Allocate implements fs.InodeOperations.Allocate.
diff --git a/pkg/sentry/fs/inode.go b/pkg/sentry/fs/inode.go
index b66c091ab..55fb71c16 100644
--- a/pkg/sentry/fs/inode.go
+++ b/pkg/sentry/fs/inode.go
@@ -278,11 +278,19 @@ func (i *Inode) SetXattr(ctx context.Context, d *Dirent, name, value string, fla
}
// ListXattr calls i.InodeOperations.ListXattr with i as the Inode.
-func (i *Inode) ListXattr(ctx context.Context) (map[string]struct{}, error) {
+func (i *Inode) ListXattr(ctx context.Context, size uint64) (map[string]struct{}, error) {
if i.overlay != nil {
- return overlayListXattr(ctx, i.overlay)
+ return overlayListXattr(ctx, i.overlay, size)
}
- return i.InodeOperations.ListXattr(ctx, i)
+ return i.InodeOperations.ListXattr(ctx, i, size)
+}
+
+// RemoveXattr calls i.InodeOperations.RemoveXattr with i as the Inode.
+func (i *Inode) RemoveXattr(ctx context.Context, d *Dirent, name string) error {
+ if i.overlay != nil {
+ return overlayRemoveXattr(ctx, i.overlay, d, name)
+ }
+ return i.InodeOperations.RemoveXattr(ctx, i, name)
}
// CheckPermission will check if the caller may access this file in the
diff --git a/pkg/sentry/fs/inode_operations.go b/pkg/sentry/fs/inode_operations.go
index 70f2eae96..2bbfb72ef 100644
--- a/pkg/sentry/fs/inode_operations.go
+++ b/pkg/sentry/fs/inode_operations.go
@@ -190,7 +190,18 @@ type InodeOperations interface {
// ListXattr returns the set of all extended attributes names that
// have values. Inodes that do not support extended attributes return
// EOPNOTSUPP.
- ListXattr(ctx context.Context, inode *Inode) (map[string]struct{}, error)
+ //
+ // If this is called through the listxattr(2) syscall, size indicates the
+ // size of the buffer that the application has allocated to hold the
+ // attribute list. If the list would be larger than size, implementations may
+ // return ERANGE to indicate that the buffer is too small, but they are also
+ // free to ignore the hint entirely. All size checking is done independently
+ // at the syscall layer.
+ ListXattr(ctx context.Context, inode *Inode, size uint64) (map[string]struct{}, error)
+
+ // RemoveXattr removes an extended attribute specified by name. Inodes that
+ // do not support extended attributes return EOPNOTSUPP.
+ RemoveXattr(ctx context.Context, inode *Inode, name string) error
// Check determines whether an Inode can be accessed with the
// requested permission mask using the context (which gives access
diff --git a/pkg/sentry/fs/inode_overlay.go b/pkg/sentry/fs/inode_overlay.go
index 4729b4aac..5ada33a32 100644
--- a/pkg/sentry/fs/inode_overlay.go
+++ b/pkg/sentry/fs/inode_overlay.go
@@ -564,15 +564,15 @@ func overlaySetxattr(ctx context.Context, o *overlayEntry, d *Dirent, name, valu
return o.upper.SetXattr(ctx, d, name, value, flags)
}
-func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}, error) {
+func overlayListXattr(ctx context.Context, o *overlayEntry, size uint64) (map[string]struct{}, error) {
o.copyMu.RLock()
defer o.copyMu.RUnlock()
var names map[string]struct{}
var err error
if o.upper != nil {
- names, err = o.upper.ListXattr(ctx)
+ names, err = o.upper.ListXattr(ctx, size)
} else {
- names, err = o.lower.ListXattr(ctx)
+ names, err = o.lower.ListXattr(ctx, size)
}
for name := range names {
// Same as overlayGetXattr, we shouldn't forward along
@@ -584,6 +584,18 @@ func overlayListXattr(ctx context.Context, o *overlayEntry) (map[string]struct{}
return names, err
}
+func overlayRemoveXattr(ctx context.Context, o *overlayEntry, d *Dirent, name string) error {
+ // Don't allow changes to overlay xattrs through a removexattr syscall.
+ if strings.HasPrefix(XattrOverlayPrefix, name) {
+ return syserror.EPERM
+ }
+
+ if err := copyUp(ctx, d); err != nil {
+ return err
+ }
+ return o.upper.RemoveXattr(ctx, d, name)
+}
+
func overlayCheck(ctx context.Context, o *overlayEntry, p PermMask) error {
o.copyMu.RLock()
// Hot path. Avoid defers.
diff --git a/pkg/sentry/fs/tmpfs/tmpfs.go b/pkg/sentry/fs/tmpfs/tmpfs.go
index c00cef0a5..3c2b583ae 100644
--- a/pkg/sentry/fs/tmpfs/tmpfs.go
+++ b/pkg/sentry/fs/tmpfs/tmpfs.go
@@ -159,8 +159,13 @@ func (d *Dir) SetXattr(ctx context.Context, i *fs.Inode, name, value string, fla
}
// ListXattr implements fs.InodeOperations.ListXattr.
-func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode) (map[string]struct{}, error) {
- return d.ramfsDir.ListXattr(ctx, i)
+func (d *Dir) ListXattr(ctx context.Context, i *fs.Inode, size uint64) (map[string]struct{}, error) {
+ return d.ramfsDir.ListXattr(ctx, i, size)
+}
+
+// RemoveXattr implements fs.InodeOperations.RemoveXattr.
+func (d *Dir) RemoveXattr(ctx context.Context, i *fs.Inode, name string) error {
+ return d.ramfsDir.RemoveXattr(ctx, i, name)
}
// Lookup implements fs.InodeOperations.Lookup.
diff --git a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
index d1436b943..2015a8871 100644
--- a/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
+++ b/pkg/sentry/fsimpl/ext/benchmark/benchmark_test.go
@@ -15,6 +15,9 @@
// These benchmarks emulate memfs benchmarks. Ext4 images must be created
// before this benchmark is run using the `make_deep_ext4.sh` script at
// /tmp/image-{depth}.ext4 for all the depths tested below.
+//
+// The benchmark itself cannot run the script because the script requires
+// sudo privileges to create the file system images.
package benchmark_test
import (
diff --git a/pkg/sentry/fsimpl/gofer/directory.go b/pkg/sentry/fsimpl/gofer/directory.go
index baa2cdd8e..6d4ebc2bf 100644
--- a/pkg/sentry/fsimpl/gofer/directory.go
+++ b/pkg/sentry/fsimpl/gofer/directory.go
@@ -87,6 +87,10 @@ func (d *dentry) getDirents(ctx context.Context) ([]vfs.Dirent, error) {
// to assume that directory fids have the correct semantics, and translates
// struct file_operations::readdir calls directly to readdir RPCs), but is
// consistent with VFS1.
+ //
+ // NOTE(b/135560623): In particular, some gofer implementations may not
+ // retain state between calls to Readdir, so may not provide a coherent
+ // directory stream across in the presence of mutation.
d.fs.renameMu.RLock()
defer d.fs.renameMu.RUnlock()
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 9d65d0179..e49303c26 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -111,10 +111,10 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
// Dentry isn't cached; it either doesn't exist or failed
// revalidation. Attempt to resolve it via Lookup.
//
- // FIXME(b/144498111): Inode.Lookup() should return *(kernfs.)Dentry,
- // not *vfs.Dentry, since (kernfs.)Filesystem assumes that all dentries
- // in the filesystem are (kernfs.)Dentry and performs vfs.DentryImpl
- // casts accordingly.
+ // FIXME(gvisor.dev/issue/1193): Inode.Lookup() should return
+ // *(kernfs.)Dentry, not *vfs.Dentry, since (kernfs.)Filesystem assumes
+ // that all dentries in the filesystem are (kernfs.)Dentry and performs
+ // vfs.DentryImpl casts accordingly.
var err error
childVFSD, err = parent.inode.Lookup(ctx, name)
if err != nil {
diff --git a/pkg/sentry/socket/netfilter/extensions.go b/pkg/sentry/socket/netfilter/extensions.go
index 3082976cd..22fd0ebe7 100644
--- a/pkg/sentry/socket/netfilter/extensions.go
+++ b/pkg/sentry/socket/netfilter/extensions.go
@@ -45,6 +45,8 @@ type matchMaker interface {
unmarshal(buf []byte, filter iptables.IPHeaderFilter) (iptables.Matcher, error)
}
+// matchMakers maps the name of supported matchers to the matchMaker that
+// marshals and unmarshals it. It is immutable after package initialization.
var matchMakers = map[string]matchMaker{}
// registermatchMaker should be called by match extensions to register them
@@ -59,7 +61,7 @@ func registerMatchMaker(mm matchMaker) {
func marshalMatcher(matcher iptables.Matcher) []byte {
matchMaker, ok := matchMakers[matcher.Name()]
if !ok {
- panic(fmt.Errorf("Unknown matcher of type %T.", matcher))
+ panic(fmt.Sprintf("Unknown matcher of type %T.", matcher))
}
return matchMaker.marshal(matcher)
}
diff --git a/pkg/sentry/socket/netfilter/netfilter.go b/pkg/sentry/socket/netfilter/netfilter.go
index ea43a0ce3..ea02627de 100644
--- a/pkg/sentry/socket/netfilter/netfilter.go
+++ b/pkg/sentry/socket/netfilter/netfilter.go
@@ -149,7 +149,6 @@ func FillDefaultIPTables(stack *stack.Stack) {
stack.SetIPTables(ipt)
}
-// TODO: Return proto.
// convertNetstackToBinary converts the iptables as stored in netstack to the
// format expected by the iptables tool. Linux stores each table as a binary
// blob that can only be traversed by parsing a bit, reading some offsets,
@@ -161,7 +160,7 @@ func convertNetstackToBinary(tablename string, table iptables.Table) (linux.Kern
// The table name has to fit in the struct.
if linux.XT_TABLE_MAXNAMELEN < len(tablename) {
- return linux.KernelIPTGetEntries{}, metadata{}, fmt.Errorf("Table name %q too long.", tablename)
+ return linux.KernelIPTGetEntries{}, metadata{}, fmt.Errorf("table name %q too long.", tablename)
}
copy(entries.Name[:], tablename)
@@ -302,7 +301,7 @@ func translateToStandardVerdict(val int32) (iptables.Verdict, error) {
case linux.NF_RETURN:
return iptables.Invalid, errors.New("unsupported iptables verdict RETURN")
default:
- return iptables.Invalid, fmt.Errorf("unknown iptables verdict %d.", val)
+ return iptables.Invalid, fmt.Errorf("unknown iptables verdict %d", val)
}
}
@@ -553,12 +552,12 @@ func parseTarget(optVal []byte) (iptables.Target, error) {
case errorTargetName:
return iptables.ErrorTarget{}, nil
default:
- return nil, fmt.Errorf("Unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String())
+ return nil, fmt.Errorf("unknown error target %q doesn't exist or isn't supported yet.", errorTarget.Name.String())
}
}
// Unknown target.
- return nil, fmt.Errorf("Unknown target %q doesn't exist or isn't supported yet.", target.Name.String())
+ return nil, fmt.Errorf("unknown target %q doesn't exist or isn't supported yet.", target.Name.String())
}
func filterFromIPTIP(iptip linux.IPTIP) (iptables.IPHeaderFilter, error) {
diff --git a/pkg/sentry/socket/netfilter/tcp_matcher.go b/pkg/sentry/socket/netfilter/tcp_matcher.go
index 6b2f4c31a..f9945e214 100644
--- a/pkg/sentry/socket/netfilter/tcp_matcher.go
+++ b/pkg/sentry/socket/netfilter/tcp_matcher.go
@@ -48,7 +48,7 @@ func (tcpMarshaler) marshal(mr iptables.Matcher) []byte {
DestinationPortStart: matcher.destinationPortStart,
DestinationPortEnd: matcher.destinationPortEnd,
}
- buf := make([]byte, 0, linux.SizeOfXTUDP)
+ buf := make([]byte, 0, linux.SizeOfXTTCP)
return marshalEntryMatch(matcherNameTCP, binary.Marshal(buf, usermem.ByteOrder, xttcp))
}
diff --git a/pkg/sentry/strace/linux64_amd64.go b/pkg/sentry/strace/linux64_amd64.go
index 85ec66fd3..a4de545e9 100644
--- a/pkg/sentry/strace/linux64_amd64.go
+++ b/pkg/sentry/strace/linux64_amd64.go
@@ -78,8 +78,8 @@ var linuxAMD64 = SyscallMap{
51: makeSyscallInfo("getsockname", FD, PostSockAddr, SockLen),
52: makeSyscallInfo("getpeername", FD, PostSockAddr, SockLen),
53: makeSyscallInfo("socketpair", SockFamily, SockType, SockProtocol, Hex),
- 54: makeSyscallInfo("setsockopt", FD, Hex, Hex, Hex, Hex),
- 55: makeSyscallInfo("getsockopt", FD, Hex, Hex, Hex, Hex),
+ 54: makeSyscallInfo("setsockopt", FD, SockOptLevel, SockOptName, SetSockOptVal, Hex /* length by value, not a pointer */),
+ 55: makeSyscallInfo("getsockopt", FD, SockOptLevel, SockOptName, GetSockOptVal, SockLen),
56: makeSyscallInfo("clone", CloneFlags, Hex, Hex, Hex, Hex),
57: makeSyscallInfo("fork"),
58: makeSyscallInfo("vfork"),
diff --git a/pkg/sentry/strace/socket.go b/pkg/sentry/strace/socket.go
index d2079c85f..f7ff4573e 100644
--- a/pkg/sentry/strace/socket.go
+++ b/pkg/sentry/strace/socket.go
@@ -419,3 +419,218 @@ func sockFlags(flags int32) string {
}
return SocketFlagSet.Parse(uint64(flags))
}
+
+func getSockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen usermem.Addr, maximumBlobSize uint, rval uintptr) string {
+ if int(rval) < 0 {
+ return hexNum(uint64(optVal))
+ }
+ if optVal == 0 {
+ return "null"
+ }
+ l, err := copySockLen(t, optLen)
+ if err != nil {
+ return fmt.Sprintf("%#x {error reading length: %v}", optLen, err)
+ }
+ return sockOptVal(t, level, optname, optVal, uint64(l), maximumBlobSize)
+}
+
+func sockOptVal(t *kernel.Task, level, optname uint64, optVal usermem.Addr, optLen uint64, maximumBlobSize uint) string {
+ switch optLen {
+ case 1:
+ var v uint8
+ _, err := t.CopyIn(optVal, &v)
+ if err != nil {
+ return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err)
+ }
+ return fmt.Sprintf("%#x {value=%v}", optVal, v)
+ case 2:
+ var v uint16
+ _, err := t.CopyIn(optVal, &v)
+ if err != nil {
+ return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err)
+ }
+ return fmt.Sprintf("%#x {value=%v}", optVal, v)
+ case 4:
+ var v uint32
+ _, err := t.CopyIn(optVal, &v)
+ if err != nil {
+ return fmt.Sprintf("%#x {error reading optval: %v}", optVal, err)
+ }
+ return fmt.Sprintf("%#x {value=%v}", optVal, v)
+ default:
+ return dump(t, optVal, uint(optLen), maximumBlobSize)
+ }
+}
+
+var sockOptLevels = abi.ValueSet{
+ linux.SOL_IP: "SOL_IP",
+ linux.SOL_SOCKET: "SOL_SOCKET",
+ linux.SOL_TCP: "SOL_TCP",
+ linux.SOL_UDP: "SOL_UDP",
+ linux.SOL_IPV6: "SOL_IPV6",
+ linux.SOL_ICMPV6: "SOL_ICMPV6",
+ linux.SOL_RAW: "SOL_RAW",
+ linux.SOL_PACKET: "SOL_PACKET",
+ linux.SOL_NETLINK: "SOL_NETLINK",
+}
+
+var sockOptNames = map[uint64]abi.ValueSet{
+ linux.SOL_IP: {
+ linux.IP_TTL: "IP_TTL",
+ linux.IP_MULTICAST_TTL: "IP_MULTICAST_TTL",
+ linux.IP_MULTICAST_IF: "IP_MULTICAST_IF",
+ linux.IP_MULTICAST_LOOP: "IP_MULTICAST_LOOP",
+ linux.IP_TOS: "IP_TOS",
+ linux.IP_RECVTOS: "IP_RECVTOS",
+ linux.IPT_SO_GET_INFO: "IPT_SO_GET_INFO",
+ linux.IPT_SO_GET_ENTRIES: "IPT_SO_GET_ENTRIES",
+ linux.IP_ADD_MEMBERSHIP: "IP_ADD_MEMBERSHIP",
+ linux.IP_DROP_MEMBERSHIP: "IP_DROP_MEMBERSHIP",
+ linux.MCAST_JOIN_GROUP: "MCAST_JOIN_GROUP",
+ linux.IP_ADD_SOURCE_MEMBERSHIP: "IP_ADD_SOURCE_MEMBERSHIP",
+ linux.IP_BIND_ADDRESS_NO_PORT: "IP_BIND_ADDRESS_NO_PORT",
+ linux.IP_BLOCK_SOURCE: "IP_BLOCK_SOURCE",
+ linux.IP_CHECKSUM: "IP_CHECKSUM",
+ linux.IP_DROP_SOURCE_MEMBERSHIP: "IP_DROP_SOURCE_MEMBERSHIP",
+ linux.IP_FREEBIND: "IP_FREEBIND",
+ linux.IP_HDRINCL: "IP_HDRINCL",
+ linux.IP_IPSEC_POLICY: "IP_IPSEC_POLICY",
+ linux.IP_MINTTL: "IP_MINTTL",
+ linux.IP_MSFILTER: "IP_MSFILTER",
+ linux.IP_MTU_DISCOVER: "IP_MTU_DISCOVER",
+ linux.IP_MULTICAST_ALL: "IP_MULTICAST_ALL",
+ linux.IP_NODEFRAG: "IP_NODEFRAG",
+ linux.IP_OPTIONS: "IP_OPTIONS",
+ linux.IP_PASSSEC: "IP_PASSSEC",
+ linux.IP_PKTINFO: "IP_PKTINFO",
+ linux.IP_RECVERR: "IP_RECVERR",
+ linux.IP_RECVFRAGSIZE: "IP_RECVFRAGSIZE",
+ linux.IP_RECVOPTS: "IP_RECVOPTS",
+ linux.IP_RECVORIGDSTADDR: "IP_RECVORIGDSTADDR",
+ linux.IP_RECVTTL: "IP_RECVTTL",
+ linux.IP_RETOPTS: "IP_RETOPTS",
+ linux.IP_TRANSPARENT: "IP_TRANSPARENT",
+ linux.IP_UNBLOCK_SOURCE: "IP_UNBLOCK_SOURCE",
+ linux.IP_UNICAST_IF: "IP_UNICAST_IF",
+ linux.IP_XFRM_POLICY: "IP_XFRM_POLICY",
+ linux.MCAST_BLOCK_SOURCE: "MCAST_BLOCK_SOURCE",
+ linux.MCAST_JOIN_SOURCE_GROUP: "MCAST_JOIN_SOURCE_GROUP",
+ linux.MCAST_LEAVE_GROUP: "MCAST_LEAVE_GROUP",
+ linux.MCAST_LEAVE_SOURCE_GROUP: "MCAST_LEAVE_SOURCE_GROUP",
+ linux.MCAST_MSFILTER: "MCAST_MSFILTER",
+ linux.MCAST_UNBLOCK_SOURCE: "MCAST_UNBLOCK_SOURCE",
+ linux.IP_ROUTER_ALERT: "IP_ROUTER_ALERT",
+ linux.IP_PKTOPTIONS: "IP_PKTOPTIONS",
+ linux.IP_MTU: "IP_MTU",
+ },
+ linux.SOL_SOCKET: {
+ linux.SO_ERROR: "SO_ERROR",
+ linux.SO_PEERCRED: "SO_PEERCRED",
+ linux.SO_PASSCRED: "SO_PASSCRED",
+ linux.SO_SNDBUF: "SO_SNDBUF",
+ linux.SO_RCVBUF: "SO_RCVBUF",
+ linux.SO_REUSEADDR: "SO_REUSEADDR",
+ linux.SO_REUSEPORT: "SO_REUSEPORT",
+ linux.SO_BINDTODEVICE: "SO_BINDTODEVICE",
+ linux.SO_BROADCAST: "SO_BROADCAST",
+ linux.SO_KEEPALIVE: "SO_KEEPALIVE",
+ linux.SO_LINGER: "SO_LINGER",
+ linux.SO_SNDTIMEO: "SO_SNDTIMEO",
+ linux.SO_RCVTIMEO: "SO_RCVTIMEO",
+ linux.SO_OOBINLINE: "SO_OOBINLINE",
+ linux.SO_TIMESTAMP: "SO_TIMESTAMP",
+ },
+ linux.SOL_TCP: {
+ linux.TCP_NODELAY: "TCP_NODELAY",
+ linux.TCP_CORK: "TCP_CORK",
+ linux.TCP_QUICKACK: "TCP_QUICKACK",
+ linux.TCP_MAXSEG: "TCP_MAXSEG",
+ linux.TCP_KEEPIDLE: "TCP_KEEPIDLE",
+ linux.TCP_KEEPINTVL: "TCP_KEEPINTVL",
+ linux.TCP_USER_TIMEOUT: "TCP_USER_TIMEOUT",
+ linux.TCP_INFO: "TCP_INFO",
+ linux.TCP_CC_INFO: "TCP_CC_INFO",
+ linux.TCP_NOTSENT_LOWAT: "TCP_NOTSENT_LOWAT",
+ linux.TCP_ZEROCOPY_RECEIVE: "TCP_ZEROCOPY_RECEIVE",
+ linux.TCP_CONGESTION: "TCP_CONGESTION",
+ linux.TCP_LINGER2: "TCP_LINGER2",
+ linux.TCP_DEFER_ACCEPT: "TCP_DEFER_ACCEPT",
+ linux.TCP_REPAIR_OPTIONS: "TCP_REPAIR_OPTIONS",
+ linux.TCP_INQ: "TCP_INQ",
+ linux.TCP_FASTOPEN: "TCP_FASTOPEN",
+ linux.TCP_FASTOPEN_CONNECT: "TCP_FASTOPEN_CONNECT",
+ linux.TCP_FASTOPEN_KEY: "TCP_FASTOPEN_KEY",
+ linux.TCP_FASTOPEN_NO_COOKIE: "TCP_FASTOPEN_NO_COOKIE",
+ linux.TCP_KEEPCNT: "TCP_KEEPCNT",
+ linux.TCP_QUEUE_SEQ: "TCP_QUEUE_SEQ",
+ linux.TCP_REPAIR: "TCP_REPAIR",
+ linux.TCP_REPAIR_QUEUE: "TCP_REPAIR_QUEUE",
+ linux.TCP_REPAIR_WINDOW: "TCP_REPAIR_WINDOW",
+ linux.TCP_SAVED_SYN: "TCP_SAVED_SYN",
+ linux.TCP_SAVE_SYN: "TCP_SAVE_SYN",
+ linux.TCP_SYNCNT: "TCP_SYNCNT",
+ linux.TCP_THIN_DUPACK: "TCP_THIN_DUPACK",
+ linux.TCP_THIN_LINEAR_TIMEOUTS: "TCP_THIN_LINEAR_TIMEOUTS",
+ linux.TCP_TIMESTAMP: "TCP_TIMESTAMP",
+ linux.TCP_ULP: "TCP_ULP",
+ linux.TCP_WINDOW_CLAMP: "TCP_WINDOW_CLAMP",
+ },
+ linux.SOL_IPV6: {
+ linux.IPV6_V6ONLY: "IPV6_V6ONLY",
+ linux.IPV6_PATHMTU: "IPV6_PATHMTU",
+ linux.IPV6_TCLASS: "IPV6_TCLASS",
+ linux.IPV6_ADD_MEMBERSHIP: "IPV6_ADD_MEMBERSHIP",
+ linux.IPV6_DROP_MEMBERSHIP: "IPV6_DROP_MEMBERSHIP",
+ linux.IPV6_IPSEC_POLICY: "IPV6_IPSEC_POLICY",
+ linux.IPV6_JOIN_ANYCAST: "IPV6_JOIN_ANYCAST",
+ linux.IPV6_LEAVE_ANYCAST: "IPV6_LEAVE_ANYCAST",
+ linux.IPV6_PKTINFO: "IPV6_PKTINFO",
+ linux.IPV6_ROUTER_ALERT: "IPV6_ROUTER_ALERT",
+ linux.IPV6_XFRM_POLICY: "IPV6_XFRM_POLICY",
+ linux.MCAST_BLOCK_SOURCE: "MCAST_BLOCK_SOURCE",
+ linux.MCAST_JOIN_GROUP: "MCAST_JOIN_GROUP",
+ linux.MCAST_JOIN_SOURCE_GROUP: "MCAST_JOIN_SOURCE_GROUP",
+ linux.MCAST_LEAVE_GROUP: "MCAST_LEAVE_GROUP",
+ linux.MCAST_LEAVE_SOURCE_GROUP: "MCAST_LEAVE_SOURCE_GROUP",
+ linux.MCAST_UNBLOCK_SOURCE: "MCAST_UNBLOCK_SOURCE",
+ linux.IPV6_2292DSTOPTS: "IPV6_2292DSTOPTS",
+ linux.IPV6_2292HOPLIMIT: "IPV6_2292HOPLIMIT",
+ linux.IPV6_2292HOPOPTS: "IPV6_2292HOPOPTS",
+ linux.IPV6_2292PKTINFO: "IPV6_2292PKTINFO",
+ linux.IPV6_2292PKTOPTIONS: "IPV6_2292PKTOPTIONS",
+ linux.IPV6_2292RTHDR: "IPV6_2292RTHDR",
+ linux.IPV6_ADDR_PREFERENCES: "IPV6_ADDR_PREFERENCES",
+ linux.IPV6_AUTOFLOWLABEL: "IPV6_AUTOFLOWLABEL",
+ linux.IPV6_DONTFRAG: "IPV6_DONTFRAG",
+ linux.IPV6_DSTOPTS: "IPV6_DSTOPTS",
+ linux.IPV6_FLOWINFO: "IPV6_FLOWINFO",
+ linux.IPV6_FLOWINFO_SEND: "IPV6_FLOWINFO_SEND",
+ linux.IPV6_FLOWLABEL_MGR: "IPV6_FLOWLABEL_MGR",
+ linux.IPV6_FREEBIND: "IPV6_FREEBIND",
+ linux.IPV6_HOPOPTS: "IPV6_HOPOPTS",
+ linux.IPV6_MINHOPCOUNT: "IPV6_MINHOPCOUNT",
+ linux.IPV6_MTU: "IPV6_MTU",
+ linux.IPV6_MTU_DISCOVER: "IPV6_MTU_DISCOVER",
+ linux.IPV6_MULTICAST_ALL: "IPV6_MULTICAST_ALL",
+ linux.IPV6_MULTICAST_HOPS: "IPV6_MULTICAST_HOPS",
+ linux.IPV6_MULTICAST_IF: "IPV6_MULTICAST_IF",
+ linux.IPV6_MULTICAST_LOOP: "IPV6_MULTICAST_LOOP",
+ linux.IPV6_RECVDSTOPTS: "IPV6_RECVDSTOPTS",
+ linux.IPV6_RECVERR: "IPV6_RECVERR",
+ linux.IPV6_RECVFRAGSIZE: "IPV6_RECVFRAGSIZE",
+ linux.IPV6_RECVHOPLIMIT: "IPV6_RECVHOPLIMIT",
+ linux.IPV6_RECVHOPOPTS: "IPV6_RECVHOPOPTS",
+ linux.IPV6_RECVORIGDSTADDR: "IPV6_RECVORIGDSTADDR",
+ linux.IPV6_RECVPATHMTU: "IPV6_RECVPATHMTU",
+ linux.IPV6_RECVPKTINFO: "IPV6_RECVPKTINFO",
+ linux.IPV6_RECVRTHDR: "IPV6_RECVRTHDR",
+ linux.IPV6_RECVTCLASS: "IPV6_RECVTCLASS",
+ linux.IPV6_RTHDR: "IPV6_RTHDR",
+ linux.IPV6_RTHDRDSTOPTS: "IPV6_RTHDRDSTOPTS",
+ linux.IPV6_TRANSPARENT: "IPV6_TRANSPARENT",
+ linux.IPV6_UNICAST_HOPS: "IPV6_UNICAST_HOPS",
+ linux.IPV6_UNICAST_IF: "IPV6_UNICAST_IF",
+ linux.MCAST_MSFILTER: "MCAST_MSFILTER",
+ linux.IPV6_ADDRFORM: "IPV6_ADDRFORM",
+ },
+}
diff --git a/pkg/sentry/strace/strace.go b/pkg/sentry/strace/strace.go
index 3fc4a47fc..a796b2396 100644
--- a/pkg/sentry/strace/strace.go
+++ b/pkg/sentry/strace/strace.go
@@ -55,6 +55,14 @@ var ItimerTypes = abi.ValueSet{
linux.ITIMER_PROF: "ITIMER_PROF",
}
+func hexNum(num uint64) string {
+ return "0x" + strconv.FormatUint(num, 16)
+}
+
+func hexArg(arg arch.SyscallArgument) string {
+ return hexNum(arg.Uint64())
+}
+
func iovecs(t *kernel.Task, addr usermem.Addr, iovcnt int, printContent bool, maxBytes uint64) string {
if iovcnt < 0 || iovcnt > linux.UIO_MAXIOV {
return fmt.Sprintf("%#x (error decoding iovecs: invalid iovcnt)", addr)
@@ -389,6 +397,12 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo
output = append(output, path(t, args[arg].Pointer()))
case ExecveStringVector:
output = append(output, stringVector(t, args[arg].Pointer()))
+ case SetSockOptVal:
+ output = append(output, sockOptVal(t, args[arg-2].Uint64() /* level */, args[arg-1].Uint64() /* optName */, args[arg].Pointer() /* optVal */, args[arg+1].Uint64() /* optLen */, maximumBlobSize))
+ case SockOptLevel:
+ output = append(output, sockOptLevels.Parse(args[arg].Uint64()))
+ case SockOptName:
+ output = append(output, sockOptNames[args[arg-1].Uint64() /* level */].Parse(args[arg].Uint64()))
case SockAddr:
output = append(output, sockAddr(t, args[arg].Pointer(), uint32(args[arg+1].Uint64())))
case SockLen:
@@ -446,7 +460,7 @@ func (i *SyscallInfo) pre(t *kernel.Task, args arch.SyscallArguments, maximumBlo
case Hex:
fallthrough
default:
- output = append(output, "0x"+strconv.FormatUint(args[arg].Uint64(), 16))
+ output = append(output, hexArg(args[arg]))
}
}
@@ -507,6 +521,12 @@ func (i *SyscallInfo) post(t *kernel.Task, args arch.SyscallArguments, rval uint
output[arg] = capData(t, args[arg-1].Pointer(), args[arg].Pointer())
case PollFDs:
output[arg] = pollFDs(t, args[arg].Pointer(), uint(args[arg+1].Uint()), true)
+ case GetSockOptVal:
+ output[arg] = getSockOptVal(t, args[arg-2].Uint64() /* level */, args[arg-1].Uint64() /* optName */, args[arg].Pointer() /* optVal */, args[arg+1].Pointer() /* optLen */, maximumBlobSize, rval)
+ case SetSockOptVal:
+ // No need to print the value again. While it usually
+ // isn't, the string version of this arg can be long.
+ output[arg] = hexArg(args[arg])
}
}
}
diff --git a/pkg/sentry/strace/syscalls.go b/pkg/sentry/strace/syscalls.go
index 24e29a2ba..446d1e0f6 100644
--- a/pkg/sentry/strace/syscalls.go
+++ b/pkg/sentry/strace/syscalls.go
@@ -207,9 +207,27 @@ const (
// array is in the next argument.
PollFDs
- // SelectFDSet is an fd_set argument in select(2)/pselect(2). The number of
- // fds represented must be the first argument.
+ // SelectFDSet is an fd_set argument in select(2)/pselect(2). The
+ // number of FDs represented must be the first argument.
SelectFDSet
+
+ // GetSockOptVal is the optval argument in getsockopt(2).
+ //
+ // Formatted after syscall execution.
+ GetSockOptVal
+
+ // SetSockOptVal is the optval argument in setsockopt(2).
+ //
+ // Contents omitted after syscall execution.
+ SetSockOptVal
+
+ // SockOptLevel is the level argument in getsockopt(2) and
+ // setsockopt(2).
+ SockOptLevel
+
+ // SockOptLevel is the optname argument in getsockopt(2) and
+ // setsockopt(2).
+ SockOptName
)
// defaultFormat is the syscall argument format to use if the actual format is
diff --git a/pkg/sentry/syscalls/linux/linux64_amd64.go b/pkg/sentry/syscalls/linux/linux64_amd64.go
index 588f8b087..79066ad2a 100644
--- a/pkg/sentry/syscalls/linux/linux64_amd64.go
+++ b/pkg/sentry/syscalls/linux/linux64_amd64.go
@@ -228,21 +228,18 @@ var AMD64 = &kernel.SyscallTable{
185: syscalls.Error("security", syserror.ENOSYS, "Not implemented in Linux.", nil),
186: syscalls.Supported("gettid", Gettid),
187: syscalls.Supported("readahead", Readahead),
- // TODO(b/148303075): Enable set/getxattr (in their various
- // forms) once we also have list and removexattr. The JVM
- // assumes that if get/set exist, then list and remove do too.
- 188: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 189: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 190: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 191: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 192: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 193: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 194: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 195: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 196: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 197: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 198: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 199: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 188: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil),
+ 189: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil),
+ 190: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil),
+ 191: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil),
+ 192: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil),
+ 193: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil),
+ 194: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil),
+ 195: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil),
+ 196: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil),
+ 197: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil),
+ 198: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil),
+ 199: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil),
200: syscalls.Supported("tkill", Tkill),
201: syscalls.Supported("time", Time),
202: syscalls.PartiallySupported("futex", Futex, "Robust futexes not supported.", nil),
diff --git a/pkg/sentry/syscalls/linux/linux64_arm64.go b/pkg/sentry/syscalls/linux/linux64_arm64.go
index 06e5ee401..7421619de 100644
--- a/pkg/sentry/syscalls/linux/linux64_arm64.go
+++ b/pkg/sentry/syscalls/linux/linux64_arm64.go
@@ -36,26 +36,23 @@ var ARM64 = &kernel.SyscallTable{
},
AuditNumber: linux.AUDIT_ARCH_AARCH64,
Table: map[uintptr]kernel.Syscall{
- 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
- // TODO(b/148303075): Enable set/getxattr (in their various
- // forms) once we also have list and removexattr. The JVM
- // assumes that if get/set exist, then list and remove do too.
- 5: syscalls.ErrorWithEvent("setxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 6: syscalls.ErrorWithEvent("lsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 7: syscalls.ErrorWithEvent("fsetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 8: syscalls.ErrorWithEvent("getxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 9: syscalls.ErrorWithEvent("lgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 10: syscalls.ErrorWithEvent("fgetxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 11: syscalls.ErrorWithEvent("listxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 13: syscalls.ErrorWithEvent("llistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 13: syscalls.ErrorWithEvent("flistxattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 14: syscalls.ErrorWithEvent("removexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 15: syscalls.ErrorWithEvent("lremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
- 16: syscalls.ErrorWithEvent("fremovexattr", syserror.ENOTSUP, "Requires filesystem support.", []string{"gvisor.dev/issue/1636"}),
+ 0: syscalls.PartiallySupported("io_setup", IoSetup, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 1: syscalls.PartiallySupported("io_destroy", IoDestroy, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 2: syscalls.PartiallySupported("io_submit", IoSubmit, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 3: syscalls.PartiallySupported("io_cancel", IoCancel, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 4: syscalls.PartiallySupported("io_getevents", IoGetevents, "Generally supported with exceptions. User ring optimizations are not implemented.", []string{"gvisor.dev/issue/204"}),
+ 5: syscalls.PartiallySupported("setxattr", SetXattr, "Only supported for tmpfs.", nil),
+ 6: syscalls.PartiallySupported("lsetxattr", LSetXattr, "Only supported for tmpfs.", nil),
+ 7: syscalls.PartiallySupported("fsetxattr", FSetXattr, "Only supported for tmpfs.", nil),
+ 8: syscalls.PartiallySupported("getxattr", GetXattr, "Only supported for tmpfs.", nil),
+ 9: syscalls.PartiallySupported("lgetxattr", LGetXattr, "Only supported for tmpfs.", nil),
+ 10: syscalls.PartiallySupported("fgetxattr", FGetXattr, "Only supported for tmpfs.", nil),
+ 11: syscalls.PartiallySupported("listxattr", ListXattr, "Only supported for tmpfs", nil),
+ 12: syscalls.PartiallySupported("llistxattr", LListXattr, "Only supported for tmpfs", nil),
+ 13: syscalls.PartiallySupported("flistxattr", FListXattr, "Only supported for tmpfs", nil),
+ 14: syscalls.PartiallySupported("removexattr", RemoveXattr, "Only supported for tmpfs", nil),
+ 15: syscalls.PartiallySupported("lremovexattr", LRemoveXattr, "Only supported for tmpfs", nil),
+ 16: syscalls.PartiallySupported("fremovexattr", FRemoveXattr, "Only supported for tmpfs", nil),
17: syscalls.Supported("getcwd", Getcwd),
18: syscalls.CapError("lookup_dcookie", linux.CAP_SYS_ADMIN, "", nil),
19: syscalls.Supported("eventfd2", Eventfd2),
diff --git a/pkg/sentry/syscalls/linux/sys_xattr.go b/pkg/sentry/syscalls/linux/sys_xattr.go
index efb95555c..9d8140b8a 100644
--- a/pkg/sentry/syscalls/linux/sys_xattr.go
+++ b/pkg/sentry/syscalls/linux/sys_xattr.go
@@ -49,14 +49,11 @@ func FGetXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.Sys
}
defer f.DecRef()
- n, value, err := getXattr(t, f.Dirent, nameAddr, size)
+ n, err := getXattr(t, f.Dirent, nameAddr, valueAddr, size)
if err != nil {
return 0, nil, err
}
- if _, err := t.CopyOutBytes(valueAddr, []byte(value)); err != nil {
- return 0, nil, err
- }
return uintptr(n), nil, nil
}
@@ -71,41 +68,36 @@ func getXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink
return 0, nil, err
}
- valueLen := 0
- err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
+ n := 0
+ err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error {
if dirPath && !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
- n, value, err := getXattr(t, d, nameAddr, size)
- valueLen = n
- if err != nil {
- return err
- }
-
- _, err = t.CopyOutBytes(valueAddr, []byte(value))
+ n, err = getXattr(t, d, nameAddr, valueAddr, size)
return err
})
if err != nil {
return 0, nil, err
}
- return uintptr(valueLen), nil, nil
+
+ return uintptr(n), nil, nil
}
// getXattr implements getxattr(2) from the given *fs.Dirent.
-func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr, size uint64) (int, string, error) {
- if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil {
- return 0, "", err
- }
-
+func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, size uint64) (int, error) {
name, err := copyInXattrName(t, nameAddr)
if err != nil {
- return 0, "", err
+ return 0, err
+ }
+
+ if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Read: true}); err != nil {
+ return 0, err
}
// TODO(b/148380782): Support xattrs in namespaces other than "user".
if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
- return 0, "", syserror.EOPNOTSUPP
+ return 0, syserror.EOPNOTSUPP
}
// If getxattr(2) is called with size 0, the size of the value will be
@@ -118,18 +110,22 @@ func getXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr, size uint64)
value, err := d.Inode.GetXattr(t, name, requestedSize)
if err != nil {
- return 0, "", err
+ return 0, err
}
n := len(value)
if uint64(n) > requestedSize {
- return 0, "", syserror.ERANGE
+ return 0, syserror.ERANGE
}
// Don't copy out the attribute value if size is 0.
if size == 0 {
- return n, "", nil
+ return n, nil
+ }
+
+ if _, err = t.CopyOutBytes(valueAddr, []byte(value)); err != nil {
+ return 0, err
}
- return n, value, nil
+ return n, nil
}
// SetXattr implements linux syscall setxattr(2).
@@ -172,7 +168,7 @@ func setXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink
return 0, nil, err
}
- return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(root *fs.Dirent, d *fs.Dirent, _ uint) error {
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error {
if dirPath && !fs.IsDir(d.Inode.StableAttr) {
return syserror.ENOTDIR
}
@@ -187,12 +183,12 @@ func setXattr(t *kernel.Task, d *fs.Dirent, nameAddr, valueAddr usermem.Addr, si
return syserror.EINVAL
}
- if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil {
+ name, err := copyInXattrName(t, nameAddr)
+ if err != nil {
return err
}
- name, err := copyInXattrName(t, nameAddr)
- if err != nil {
+ if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil {
return err
}
@@ -226,12 +222,18 @@ func copyInXattrName(t *kernel.Task, nameAddr usermem.Addr) (string, error) {
return name, nil
}
+// Restrict xattrs to regular files and directories.
+//
+// TODO(b/148380782): In Linux, this restriction technically only applies to
+// xattrs in the "user.*" namespace. Make file type checks specific to the
+// namespace once we allow other xattr prefixes.
+func xattrFileTypeOk(i *fs.Inode) bool {
+ return fs.IsRegular(i.StableAttr) || fs.IsDir(i.StableAttr)
+}
+
func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error {
// Restrict xattrs to regular files and directories.
- //
- // In Linux, this restriction technically only applies to xattrs in the
- // "user.*" namespace, but we don't allow any other xattr prefixes anyway.
- if !fs.IsRegular(i.StableAttr) && !fs.IsDir(i.StableAttr) {
+ if !xattrFileTypeOk(i) {
if perms.Write {
return syserror.EPERM
}
@@ -240,3 +242,179 @@ func checkXattrPermissions(t *kernel.Task, i *fs.Inode, perms fs.PermMask) error
return i.CheckPermission(t, perms)
}
+
+// ListXattr implements linux syscall listxattr(2).
+func ListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return listXattrFromPath(t, args, true)
+}
+
+// LListXattr implements linux syscall llistxattr(2).
+func LListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return listXattrFromPath(t, args, false)
+}
+
+// FListXattr implements linux syscall flistxattr(2).
+func FListXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ listAddr := args[1].Pointer()
+ size := uint64(args[2].SizeT())
+
+ // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH.
+ f := t.GetFile(fd)
+ if f == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer f.DecRef()
+
+ n, err := listXattr(t, f.Dirent, listAddr, size)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return uintptr(n), nil, nil
+}
+
+func listXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) {
+ pathAddr := args[0].Pointer()
+ listAddr := args[1].Pointer()
+ size := uint64(args[2].SizeT())
+
+ path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ n := 0
+ err = fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error {
+ if dirPath && !fs.IsDir(d.Inode.StableAttr) {
+ return syserror.ENOTDIR
+ }
+
+ n, err = listXattr(t, d, listAddr, size)
+ return err
+ })
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return uintptr(n), nil, nil
+}
+
+func listXattr(t *kernel.Task, d *fs.Dirent, addr usermem.Addr, size uint64) (int, error) {
+ if !xattrFileTypeOk(d.Inode) {
+ return 0, nil
+ }
+
+ // If listxattr(2) is called with size 0, the buffer size needed to contain
+ // the xattr list will be returned successfully even if it is nonzero. In
+ // that case, we need to retrieve the entire list so we can compute and
+ // return the correct size.
+ requestedSize := size
+ if size == 0 || size > linux.XATTR_SIZE_MAX {
+ requestedSize = linux.XATTR_SIZE_MAX
+ }
+ xattrs, err := d.Inode.ListXattr(t, requestedSize)
+ if err != nil {
+ return 0, err
+ }
+
+ // TODO(b/148380782): support namespaces other than "user".
+ for x := range xattrs {
+ if !strings.HasPrefix(x, linux.XATTR_USER_PREFIX) {
+ delete(xattrs, x)
+ }
+ }
+
+ listSize := xattrListSize(xattrs)
+ if listSize > linux.XATTR_SIZE_MAX {
+ return 0, syserror.E2BIG
+ }
+ if uint64(listSize) > requestedSize {
+ return 0, syserror.ERANGE
+ }
+
+ // Don't copy out the attributes if size is 0.
+ if size == 0 {
+ return listSize, nil
+ }
+
+ buf := make([]byte, 0, listSize)
+ for x := range xattrs {
+ buf = append(buf, []byte(x)...)
+ buf = append(buf, 0)
+ }
+ if _, err := t.CopyOutBytes(addr, buf); err != nil {
+ return 0, err
+ }
+
+ return len(buf), nil
+}
+
+func xattrListSize(xattrs map[string]struct{}) int {
+ size := 0
+ for x := range xattrs {
+ size += len(x) + 1
+ }
+ return size
+}
+
+// RemoveXattr implements linux syscall removexattr(2).
+func RemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return removeXattrFromPath(t, args, true)
+}
+
+// LRemoveXattr implements linux syscall lremovexattr(2).
+func LRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ return removeXattrFromPath(t, args, false)
+}
+
+// FRemoveXattr implements linux syscall fremovexattr(2).
+func FRemoveXattr(t *kernel.Task, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
+ fd := args[0].Int()
+ nameAddr := args[1].Pointer()
+
+ // TODO(b/113957122): Return EBADF if the fd was opened with O_PATH.
+ f := t.GetFile(fd)
+ if f == nil {
+ return 0, nil, syserror.EBADF
+ }
+ defer f.DecRef()
+
+ return 0, nil, removeXattr(t, f.Dirent, nameAddr)
+}
+
+func removeXattrFromPath(t *kernel.Task, args arch.SyscallArguments, resolveSymlink bool) (uintptr, *kernel.SyscallControl, error) {
+ pathAddr := args[0].Pointer()
+ nameAddr := args[1].Pointer()
+
+ path, dirPath, err := copyInPath(t, pathAddr, false /* allowEmpty */)
+ if err != nil {
+ return 0, nil, err
+ }
+
+ return 0, nil, fileOpOn(t, linux.AT_FDCWD, path, resolveSymlink, func(_ *fs.Dirent, d *fs.Dirent, _ uint) error {
+ if dirPath && !fs.IsDir(d.Inode.StableAttr) {
+ return syserror.ENOTDIR
+ }
+
+ return removeXattr(t, d, nameAddr)
+ })
+}
+
+// removeXattr implements removexattr(2) from the given *fs.Dirent.
+func removeXattr(t *kernel.Task, d *fs.Dirent, nameAddr usermem.Addr) error {
+ name, err := copyInXattrName(t, nameAddr)
+ if err != nil {
+ return err
+ }
+
+ if err := checkXattrPermissions(t, d.Inode, fs.PermMask{Write: true}); err != nil {
+ return err
+ }
+
+ if !strings.HasPrefix(name, linux.XATTR_USER_PREFIX) {
+ return syserror.EOPNOTSUPP
+ }
+
+ return d.Inode.RemoveXattr(t, d, name)
+}
diff --git a/pkg/sentry/usage/memory.go b/pkg/sentry/usage/memory.go
index 538c645eb..4320ad17f 100644
--- a/pkg/sentry/usage/memory.go
+++ b/pkg/sentry/usage/memory.go
@@ -253,6 +253,10 @@ func (m *MemoryLocked) Copy() (MemoryStats, uint64) {
}
// MinimumTotalMemoryBytes is the minimum reported total system memory.
+//
+// This can be configured through options provided to the Sentry at start.
+// This number is purely synthetic. This is only set before the application
+// starts executing, and must not be modified.
var MinimumTotalMemoryBytes uint64 = 2 << 30 // 2 GB
// TotalMemory returns the "total usable memory" available.
diff --git a/pkg/tcpip/stack/ndp.go b/pkg/tcpip/stack/ndp.go
index 6123fda33..045409bda 100644
--- a/pkg/tcpip/stack/ndp.go
+++ b/pkg/tcpip/stack/ndp.go
@@ -448,6 +448,13 @@ func (ndp *ndpState) startDuplicateAddressDetection(addr tcpip.Address, ref *ref
remaining := ndp.configs.DupAddrDetectTransmits
if remaining == 0 {
ref.setKind(permanent)
+
+ // Consider DAD to have resolved even if no DAD messages were actually
+ // transmitted.
+ if ndpDisp := ndp.nic.stack.ndpDisp; ndpDisp != nil {
+ ndpDisp.OnDuplicateAddressDetectionStatus(ndp.nic.ID(), addr, true, nil)
+ }
+
return nil
}
@@ -906,22 +913,21 @@ func (ndp *ndpState) handleAutonomousPrefixInformation(pi header.NDPPrefixInform
return
}
- // We do not already have an address within the prefix, prefix. Do the
+ // We do not already have an address with the prefix prefix. Do the
// work as outlined by RFC 4862 section 5.5.3.d if n is configured
- // to auto-generated global addresses by SLAAC.
- ndp.newAutoGenAddress(prefix, pl, vl)
+ // to auto-generate global addresses by SLAAC.
+ if !ndp.configs.AutoGenGlobalAddresses {
+ return
+ }
+
+ ndp.doSLAAC(prefix, pl, vl)
}
-// newAutoGenAddress generates a new SLAAC address with the provided lifetimes
+// doSLAAC generates a new SLAAC address with the provided lifetimes
// for prefix.
//
// pl is the new preferred lifetime. vl is the new valid lifetime.
-func (ndp *ndpState) newAutoGenAddress(prefix tcpip.Subnet, pl, vl time.Duration) {
- // Are we configured to auto-generate new global addresses?
- if !ndp.configs.AutoGenGlobalAddresses {
- return
- }
-
+func (ndp *ndpState) doSLAAC(prefix tcpip.Subnet, pl, vl time.Duration) {
// If we do not already have an address for this prefix and the valid
// lifetime is 0, no need to do anything further, as per RFC 4862
// section 5.5.3.d.
@@ -1152,12 +1158,21 @@ func (ndp *ndpState) cleanupAutoGenAddrResourcesAndNotify(addr tcpip.Address) bo
//
// The NIC that ndp belongs to MUST be locked.
func (ndp *ndpState) cleanupHostOnlyState() {
+ linkLocalSubnet := header.IPv6LinkLocalPrefix.Subnet()
+ linkLocalAddrs := 0
for addr := range ndp.autoGenAddresses {
+ // RFC 4862 section 5 states that routers are also expected to generate a
+ // link-local address so we do not invalidate them.
+ if linkLocalSubnet.Contains(addr) {
+ linkLocalAddrs++
+ continue
+ }
+
ndp.invalidateAutoGenAddress(addr)
}
- if got := len(ndp.autoGenAddresses); got != 0 {
- log.Fatalf("ndp: still have auto-generated addresses after cleaning up, found = %d", got)
+ if got := len(ndp.autoGenAddresses); got != linkLocalAddrs {
+ log.Fatalf("ndp: still have non-linklocal auto-generated addresses after cleaning up; found = %d prefixes, of which %d are link-local", got, linkLocalAddrs)
}
for prefix := range ndp.onLinkPrefixes {
@@ -1165,7 +1180,7 @@ func (ndp *ndpState) cleanupHostOnlyState() {
}
if got := len(ndp.onLinkPrefixes); got != 0 {
- log.Fatalf("ndp: still have discovered on-link prefixes after cleaning up, found = %d", got)
+ log.Fatalf("ndp: still have discovered on-link prefixes after cleaning up; found = %d", got)
}
for router := range ndp.defaultRouters {
@@ -1173,7 +1188,7 @@ func (ndp *ndpState) cleanupHostOnlyState() {
}
if got := len(ndp.defaultRouters); got != 0 {
- log.Fatalf("ndp: still have discovered default routers after cleaning up, found = %d", got)
+ log.Fatalf("ndp: still have discovered default routers after cleaning up; found = %d", got)
}
}
diff --git a/pkg/tcpip/stack/ndp_test.go b/pkg/tcpip/stack/ndp_test.go
index 1e575bdaf..1f6f77439 100644
--- a/pkg/tcpip/stack/ndp_test.go
+++ b/pkg/tcpip/stack/ndp_test.go
@@ -42,6 +42,7 @@ const (
linkAddr1 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x06")
linkAddr2 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x07")
linkAddr3 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x08")
+ linkAddr4 = tcpip.LinkAddress("\x02\x02\x03\x04\x05\x09")
defaultTimeout = 100 * time.Millisecond
defaultAsyncEventTimeout = time.Second
)
@@ -50,6 +51,7 @@ var (
llAddr1 = header.LinkLocalAddr(linkAddr1)
llAddr2 = header.LinkLocalAddr(linkAddr2)
llAddr3 = header.LinkLocalAddr(linkAddr3)
+ llAddr4 = header.LinkLocalAddr(linkAddr4)
dstAddr = tcpip.FullAddress{
Addr: "\x0a\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01",
Port: 25,
@@ -84,39 +86,6 @@ func prefixSubnetAddr(offset uint8, linkAddr tcpip.LinkAddress) (tcpip.AddressWi
return prefix, subnet, addrForSubnet(subnet, linkAddr)
}
-// TestDADDisabled tests that an address successfully resolves immediately
-// when DAD is not enabled (the default for an empty stack.Options).
-func TestDADDisabled(t *testing.T) {
- opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- }
-
- e := channel.New(0, 1280, linkAddr1)
- s := stack.New(opts)
- if err := s.CreateNIC(1, e); err != nil {
- t.Fatalf("CreateNIC(_) = %s", err)
- }
-
- if err := s.AddAddress(1, header.IPv6ProtocolNumber, addr1); err != nil {
- t.Fatalf("AddAddress(_, %d, %s) = %s", header.IPv6ProtocolNumber, addr1, err)
- }
-
- // Should get the address immediately since we should not have performed
- // DAD on it.
- addr, err := s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
- if err != nil {
- t.Fatalf("stack.GetMainNICAddress(_, _) err = %s", err)
- }
- if addr.Address != addr1 {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = %s, want = %s", addr, addr1)
- }
-
- // We should not have sent any NDP NS messages.
- if got := s.Stats().ICMP.V6PacketsSent.NeighborSolicit.Value(); got != 0 {
- t.Fatalf("got NeighborSolicit = %d, want = 0", got)
- }
-}
-
// ndpDADEvent is a set of parameters that was passed to
// ndpDispatcher.OnDuplicateAddressDetectionStatus.
type ndpDADEvent struct {
@@ -298,6 +267,58 @@ func (n *ndpDispatcher) OnDHCPv6Configuration(nicID tcpip.NICID, configuration s
}
}
+// Check e to make sure that the event is for addr on nic with ID 1, and the
+// resolved flag set to resolved with the specified err.
+func checkDADEvent(e ndpDADEvent, nicID tcpip.NICID, addr tcpip.Address, resolved bool, err *tcpip.Error) string {
+ return cmp.Diff(ndpDADEvent{nicID: nicID, addr: addr, resolved: resolved, err: err}, e, cmp.AllowUnexported(e))
+}
+
+// TestDADDisabled tests that an address successfully resolves immediately
+// when DAD is not enabled (the default for an empty stack.Options).
+func TestDADDisabled(t *testing.T) {
+ const nicID = 1
+ ndpDisp := ndpDispatcher{
+ dadC: make(chan ndpDADEvent, 1),
+ }
+ opts := stack.Options{
+ NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NDPDisp: &ndpDisp,
+ }
+
+ e := channel.New(0, 1280, linkAddr1)
+ s := stack.New(opts)
+ if err := s.CreateNIC(nicID, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
+ }
+
+ if err := s.AddAddress(nicID, header.IPv6ProtocolNumber, addr1); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv6ProtocolNumber, addr1, err)
+ }
+
+ // Should get the address immediately since we should not have performed
+ // DAD on it.
+ select {
+ case e := <-ndpDisp.dadC:
+ if diff := checkDADEvent(e, nicID, addr1, true, nil); diff != "" {
+ t.Errorf("dad event mismatch (-want +got):\n%s", diff)
+ }
+ default:
+ t.Fatal("expected DAD event")
+ }
+ addr, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
+ if err != nil {
+ t.Fatalf("stack.GetMainNICAddress(%d, %d) err = %s", nicID, header.IPv6ProtocolNumber, err)
+ }
+ if addr.Address != addr1 {
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = %s, want = %s", nicID, header.IPv6ProtocolNumber, addr, addr1)
+ }
+
+ // We should not have sent any NDP NS messages.
+ if got := s.Stats().ICMP.V6PacketsSent.NeighborSolicit.Value(); got != 0 {
+ t.Fatalf("got NeighborSolicit = %d, want = 0", got)
+ }
+}
+
// TestDADResolve tests that an address successfully resolves after performing
// DAD for various values of DupAddrDetectTransmits and RetransmitTimer.
// Included in the subtests is a test to make sure that an invalid
@@ -379,17 +400,8 @@ func TestDADResolve(t *testing.T) {
// means something is wrong.
t.Fatal("timed out waiting for DAD resolution")
case e := <-ndpDisp.dadC:
- if e.err != nil {
- t.Fatal("got DAD error: ", e.err)
- }
- if e.nicID != nicID {
- t.Fatalf("got DAD event w/ nicID = %d, want = %d", e.nicID, nicID)
- }
- if e.addr != addr1 {
- t.Fatalf("got DAD event w/ addr = %s, want = %s", addr, addr1)
- }
- if !e.resolved {
- t.Fatal("got DAD event w/ resolved = false, want = true")
+ if diff := checkDADEvent(e, nicID, addr1, true, nil); diff != "" {
+ t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
}
addr, err = s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
@@ -443,6 +455,8 @@ func TestDADResolve(t *testing.T) {
// a node doing DAD for the same address), or if another node is detected to own
// the address already (receive an NA message for the tentative address).
func TestDADFail(t *testing.T) {
+ const nicID = 1
+
tests := []struct {
name string
makeBuf func(tgt tcpip.Address) buffer.Prependable
@@ -524,22 +538,22 @@ func TestDADFail(t *testing.T) {
e := channel.New(0, 1280, linkAddr1)
s := stack.New(opts)
- if err := s.CreateNIC(1, e); err != nil {
- t.Fatalf("CreateNIC(_) = %s", err)
+ if err := s.CreateNIC(nicID, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
- if err := s.AddAddress(1, header.IPv6ProtocolNumber, addr1); err != nil {
- t.Fatalf("AddAddress(_, %d, %s) = %s", header.IPv6ProtocolNumber, addr1, err)
+ if err := s.AddAddress(nicID, header.IPv6ProtocolNumber, addr1); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv6ProtocolNumber, addr1, err)
}
// Address should not be considered bound to the NIC yet
// (DAD ongoing).
- addr, err := s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (_, %v), want = (_, nil)", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID, header.IPv6ProtocolNumber, err)
}
if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID, header.IPv6ProtocolNumber, addr, want)
}
// Receive a packet to simulate multiple nodes owning or
@@ -563,25 +577,16 @@ func TestDADFail(t *testing.T) {
// something is wrong.
t.Fatal("timed out waiting for DAD failure")
case e := <-ndpDisp.dadC:
- if e.err != nil {
- t.Fatal("got DAD error: ", e.err)
- }
- if e.nicID != 1 {
- t.Fatalf("got DAD event w/ nicID = %d, want = 1", e.nicID)
- }
- if e.addr != addr1 {
- t.Fatalf("got DAD event w/ addr = %s, want = %s", addr, addr1)
- }
- if e.resolved {
- t.Fatal("got DAD event w/ resolved = true, want = false")
+ if diff := checkDADEvent(e, nicID, addr1, false, nil); diff != "" {
+ t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
}
- addr, err = s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err = s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (_, %v), want = (_, nil)", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID, header.IPv6ProtocolNumber, err)
}
if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID, header.IPv6ProtocolNumber, addr, want)
}
})
}
@@ -590,6 +595,8 @@ func TestDADFail(t *testing.T) {
// TestDADStop tests to make sure that the DAD process stops when an address is
// removed.
func TestDADStop(t *testing.T) {
+ const nicID = 1
+
ndpDisp := ndpDispatcher{
dadC: make(chan ndpDADEvent, 1),
}
@@ -605,26 +612,26 @@ func TestDADStop(t *testing.T) {
e := channel.New(0, 1280, linkAddr1)
s := stack.New(opts)
- if err := s.CreateNIC(1, e); err != nil {
- t.Fatalf("CreateNIC(_) = %s", err)
+ if err := s.CreateNIC(nicID, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
- if err := s.AddAddress(1, header.IPv6ProtocolNumber, addr1); err != nil {
- t.Fatalf("AddAddress(_, %d, %s) = %s", header.IPv6ProtocolNumber, addr1, err)
+ if err := s.AddAddress(nicID, header.IPv6ProtocolNumber, addr1); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID, header.IPv6ProtocolNumber, addr1, err)
}
// Address should not be considered bound to the NIC yet (DAD ongoing).
- addr, err := s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (_, %v), want = (_, nil)", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID, header.IPv6ProtocolNumber, err)
}
if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID, header.IPv6ProtocolNumber, addr, want)
}
// Remove the address. This should stop DAD.
- if err := s.RemoveAddress(1, addr1); err != nil {
- t.Fatalf("RemoveAddress(_, %s) = %s", addr1, err)
+ if err := s.RemoveAddress(nicID, addr1); err != nil {
+ t.Fatalf("RemoveAddress(%d, %s) = %s", nicID, addr1, err)
}
// Wait for DAD to fail (since the address was removed during DAD).
@@ -634,26 +641,16 @@ func TestDADStop(t *testing.T) {
// time + extra 1s buffer, something is wrong.
t.Fatal("timed out waiting for DAD failure")
case e := <-ndpDisp.dadC:
- if e.err != nil {
- t.Fatal("got DAD error: ", e.err)
- }
- if e.nicID != 1 {
- t.Fatalf("got DAD event w/ nicID = %d, want = 1", e.nicID)
- }
- if e.addr != addr1 {
- t.Fatalf("got DAD event w/ addr = %s, want = %s", addr, addr1)
- }
- if e.resolved {
- t.Fatal("got DAD event w/ resolved = true, want = false")
+ if diff := checkDADEvent(e, nicID, addr1, false, nil); diff != "" {
+ t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
-
}
- addr, err = s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err = s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (_, %v), want = (_, nil)", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID, header.IPv6ProtocolNumber, err)
}
if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID, header.IPv6ProtocolNumber, addr, want)
}
// Should not have sent more than 1 NS message.
@@ -679,6 +676,10 @@ func TestSetNDPConfigurationFailsForBadNICID(t *testing.T) {
// configurations without affecting the default NDP configurations or other
// interfaces' configurations.
func TestSetNDPConfigurations(t *testing.T) {
+ const nicID1 = 1
+ const nicID2 = 2
+ const nicID3 = 3
+
tests := []struct {
name string
dupAddrDetectTransmits uint8
@@ -702,7 +703,7 @@ func TestSetNDPConfigurations(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
ndpDisp := ndpDispatcher{
- dadC: make(chan ndpDADEvent),
+ dadC: make(chan ndpDADEvent, 1),
}
e := channel.New(0, 1280, linkAddr1)
s := stack.New(stack.Options{
@@ -710,17 +711,28 @@ func TestSetNDPConfigurations(t *testing.T) {
NDPDisp: &ndpDisp,
})
+ expectDADEvent := func(nicID tcpip.NICID, addr tcpip.Address) {
+ select {
+ case e := <-ndpDisp.dadC:
+ if diff := checkDADEvent(e, nicID, addr, true, nil); diff != "" {
+ t.Errorf("dad event mismatch (-want +got):\n%s", diff)
+ }
+ default:
+ t.Fatalf("expected DAD event for %s", addr)
+ }
+ }
+
// This NIC(1)'s NDP configurations will be updated to
// be different from the default.
- if err := s.CreateNIC(1, e); err != nil {
- t.Fatalf("CreateNIC(1) = %s", err)
+ if err := s.CreateNIC(nicID1, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID1, err)
}
// Created before updating NIC(1)'s NDP configurations
// but updating NIC(1)'s NDP configurations should not
// affect other existing NICs.
- if err := s.CreateNIC(2, e); err != nil {
- t.Fatalf("CreateNIC(2) = %s", err)
+ if err := s.CreateNIC(nicID2, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID2, err)
}
// Update the NDP configurations on NIC(1) to use DAD.
@@ -728,36 +740,38 @@ func TestSetNDPConfigurations(t *testing.T) {
DupAddrDetectTransmits: test.dupAddrDetectTransmits,
RetransmitTimer: test.retransmitTimer,
}
- if err := s.SetNDPConfigurations(1, configs); err != nil {
- t.Fatalf("got SetNDPConfigurations(1, _) = %s", err)
+ if err := s.SetNDPConfigurations(nicID1, configs); err != nil {
+ t.Fatalf("got SetNDPConfigurations(%d, _) = %s", nicID1, err)
}
// Created after updating NIC(1)'s NDP configurations
// but the stack's default NDP configurations should not
// have been updated.
- if err := s.CreateNIC(3, e); err != nil {
- t.Fatalf("CreateNIC(3) = %s", err)
+ if err := s.CreateNIC(nicID3, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID3, err)
}
// Add addresses for each NIC.
- if err := s.AddAddress(1, header.IPv6ProtocolNumber, addr1); err != nil {
- t.Fatalf("AddAddress(1, %d, %s) = %s", header.IPv6ProtocolNumber, addr1, err)
+ if err := s.AddAddress(nicID1, header.IPv6ProtocolNumber, addr1); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID1, header.IPv6ProtocolNumber, addr1, err)
}
- if err := s.AddAddress(2, header.IPv6ProtocolNumber, addr2); err != nil {
- t.Fatalf("AddAddress(2, %d, %s) = %s", header.IPv6ProtocolNumber, addr2, err)
+ if err := s.AddAddress(nicID2, header.IPv6ProtocolNumber, addr2); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID2, header.IPv6ProtocolNumber, addr2, err)
}
- if err := s.AddAddress(3, header.IPv6ProtocolNumber, addr3); err != nil {
- t.Fatalf("AddAddress(3, %d, %s) = %s", header.IPv6ProtocolNumber, addr3, err)
+ expectDADEvent(nicID2, addr2)
+ if err := s.AddAddress(nicID3, header.IPv6ProtocolNumber, addr3); err != nil {
+ t.Fatalf("AddAddress(%d, %d, %s) = %s", nicID3, header.IPv6ProtocolNumber, addr3, err)
}
+ expectDADEvent(nicID3, addr3)
// Address should not be considered bound to NIC(1) yet
// (DAD ongoing).
- addr, err := s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err := s.GetMainNICAddress(nicID1, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (_, %v), want = (_, nil)", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID1, header.IPv6ProtocolNumber, err)
}
if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID1, header.IPv6ProtocolNumber, addr, want)
}
// Should get the address on NIC(2) and NIC(3)
@@ -765,31 +779,31 @@ func TestSetNDPConfigurations(t *testing.T) {
// it as the stack was configured to not do DAD by
// default and we only updated the NDP configurations on
// NIC(1).
- addr, err = s.GetMainNICAddress(2, header.IPv6ProtocolNumber)
+ addr, err = s.GetMainNICAddress(nicID2, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("stack.GetMainNICAddress(2, _) err = %s", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID2, header.IPv6ProtocolNumber, err)
}
if addr.Address != addr2 {
- t.Fatalf("got stack.GetMainNICAddress(2, _) = %s, want = %s", addr, addr2)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = %s, want = %s", nicID2, header.IPv6ProtocolNumber, addr, addr2)
}
- addr, err = s.GetMainNICAddress(3, header.IPv6ProtocolNumber)
+ addr, err = s.GetMainNICAddress(nicID3, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("stack.GetMainNICAddress(3, _) err = %s", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID3, header.IPv6ProtocolNumber, err)
}
if addr.Address != addr3 {
- t.Fatalf("got stack.GetMainNICAddress(3, _) = %s, want = %s", addr, addr3)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = %s, want = %s", nicID3, header.IPv6ProtocolNumber, addr, addr3)
}
// Sleep until right (500ms before) before resolution to
// make sure the address didn't resolve on NIC(1) yet.
const delta = 500 * time.Millisecond
time.Sleep(time.Duration(test.dupAddrDetectTransmits)*test.expectedRetransmitTimer - delta)
- addr, err = s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err = s.GetMainNICAddress(nicID1, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (_, %v), want = (_, nil)", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID1, header.IPv6ProtocolNumber, err)
}
if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID1, header.IPv6ProtocolNumber, addr, want)
}
// Wait for DAD to resolve.
@@ -803,25 +817,16 @@ func TestSetNDPConfigurations(t *testing.T) {
// means something is wrong.
t.Fatal("timed out waiting for DAD resolution")
case e := <-ndpDisp.dadC:
- if e.err != nil {
- t.Fatal("got DAD error: ", e.err)
- }
- if e.nicID != 1 {
- t.Fatalf("got DAD event w/ nicID = %d, want = 1", e.nicID)
- }
- if e.addr != addr1 {
- t.Fatalf("got DAD event w/ addr = %s, want = %s", addr, addr1)
- }
- if !e.resolved {
- t.Fatal("got DAD event w/ resolved = false, want = true")
+ if diff := checkDADEvent(e, nicID1, addr1, true, nil); diff != "" {
+ t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
}
- addr, err = s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err = s.GetMainNICAddress(nicID1, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("stack.GetMainNICAddress(1, _) err = %s", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID1, header.IPv6ProtocolNumber, err)
}
if addr.Address != addr1 {
- t.Fatalf("got stack.GetMainNICAddress(1, _) = %s, want = %s", addr, addr1)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = %s, want = %s", nicID1, header.IPv6ProtocolNumber, addr, addr1)
}
})
}
@@ -2882,8 +2887,8 @@ func TestNDPRecursiveDNSServerDispatch(t *testing.T) {
}
// TestCleanupHostOnlyStateOnBecomingRouter tests that all discovered routers
-// and prefixes, and auto-generated addresses get invalidated when a NIC
-// becomes a router.
+// and prefixes, and non-linklocal auto-generated addresses are invalidated when
+// a NIC becomes a router.
func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
t.Parallel()
@@ -2898,6 +2903,14 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
prefix2, subnet2, e1Addr2 := prefixSubnetAddr(1, linkAddr1)
e2Addr1 := addrForSubnet(subnet1, linkAddr2)
e2Addr2 := addrForSubnet(subnet2, linkAddr2)
+ llAddrWithPrefix1 := tcpip.AddressWithPrefix{
+ Address: llAddr1,
+ PrefixLen: 64,
+ }
+ llAddrWithPrefix2 := tcpip.AddressWithPrefix{
+ Address: llAddr2,
+ PrefixLen: 64,
+ }
ndpDisp := ndpDispatcher{
routerC: make(chan ndpRouterEvent, maxEvents),
@@ -2907,7 +2920,8 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
autoGenAddrC: make(chan ndpAutoGenAddrEvent, maxEvents),
}
s := stack.New(stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ AutoGenIPv6LinkLocal: true,
NDPConfigs: stack.NDPConfigurations{
HandleRAs: true,
DiscoverDefaultRouters: true,
@@ -2917,16 +2931,6 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
NDPDisp: &ndpDisp,
})
- e1 := channel.New(0, 1280, linkAddr1)
- if err := s.CreateNIC(nicID1, e1); err != nil {
- t.Fatalf("CreateNIC(%d, _) = %s", nicID1, err)
- }
-
- e2 := channel.New(0, 1280, linkAddr2)
- if err := s.CreateNIC(nicID2, e2); err != nil {
- t.Fatalf("CreateNIC(%d, _) = %s", nicID2, err)
- }
-
expectRouterEvent := func() (bool, ndpRouterEvent) {
select {
case e := <-ndpDisp.routerC:
@@ -2957,18 +2961,30 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
return false, ndpAutoGenAddrEvent{}
}
- // Receive RAs on NIC(1) and NIC(2) from default routers (llAddr1 and
- // llAddr2) w/ PI (for prefix1 in RA from llAddr1 and prefix2 in RA from
- // llAddr2) to discover multiple routers and prefixes, and auto-gen
- // multiple addresses.
-
- e1.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr1, lifetimeSeconds, prefix1, true, true, lifetimeSeconds, lifetimeSeconds))
+ e1 := channel.New(0, 1280, linkAddr1)
+ if err := s.CreateNIC(nicID1, e1); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID1, err)
+ }
// We have other tests that make sure we receive the *correct* events
// on normal discovery of routers/prefixes, and auto-generated
// addresses. Here we just make sure we get an event and let other tests
// handle the correctness check.
+ expectAutoGenAddrEvent()
+
+ e2 := channel.New(0, 1280, linkAddr2)
+ if err := s.CreateNIC(nicID2, e2); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID2, err)
+ }
+ expectAutoGenAddrEvent()
+
+ // Receive RAs on NIC(1) and NIC(2) from default routers (llAddr3 and
+ // llAddr4) w/ PI (for prefix1 in RA from llAddr3 and prefix2 in RA from
+ // llAddr4) to discover multiple routers and prefixes, and auto-gen
+ // multiple addresses.
+
+ e1.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, lifetimeSeconds, prefix1, true, true, lifetimeSeconds, lifetimeSeconds))
if ok, _ := expectRouterEvent(); !ok {
- t.Errorf("expected router event for %s on NIC(%d)", llAddr1, nicID1)
+ t.Errorf("expected router event for %s on NIC(%d)", llAddr3, nicID1)
}
if ok, _ := expectPrefixEvent(); !ok {
t.Errorf("expected prefix event for %s on NIC(%d)", prefix1, nicID1)
@@ -2977,9 +2993,9 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
t.Errorf("expected auto-gen addr event for %s on NIC(%d)", e1Addr1, nicID1)
}
- e1.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, lifetimeSeconds, prefix2, true, true, lifetimeSeconds, lifetimeSeconds))
+ e1.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr4, lifetimeSeconds, prefix2, true, true, lifetimeSeconds, lifetimeSeconds))
if ok, _ := expectRouterEvent(); !ok {
- t.Errorf("expected router event for %s on NIC(%d)", llAddr2, nicID1)
+ t.Errorf("expected router event for %s on NIC(%d)", llAddr4, nicID1)
}
if ok, _ := expectPrefixEvent(); !ok {
t.Errorf("expected prefix event for %s on NIC(%d)", prefix2, nicID1)
@@ -2988,9 +3004,9 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
t.Errorf("expected auto-gen addr event for %s on NIC(%d)", e1Addr2, nicID1)
}
- e2.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr1, lifetimeSeconds, prefix1, true, true, lifetimeSeconds, lifetimeSeconds))
+ e2.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr3, lifetimeSeconds, prefix1, true, true, lifetimeSeconds, lifetimeSeconds))
if ok, _ := expectRouterEvent(); !ok {
- t.Errorf("expected router event for %s on NIC(%d)", llAddr1, nicID2)
+ t.Errorf("expected router event for %s on NIC(%d)", llAddr3, nicID2)
}
if ok, _ := expectPrefixEvent(); !ok {
t.Errorf("expected prefix event for %s on NIC(%d)", prefix1, nicID2)
@@ -2999,9 +3015,9 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
t.Errorf("expected auto-gen addr event for %s on NIC(%d)", e1Addr2, nicID2)
}
- e2.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr2, lifetimeSeconds, prefix2, true, true, lifetimeSeconds, lifetimeSeconds))
+ e2.InjectInbound(header.IPv6ProtocolNumber, raBufWithPI(llAddr4, lifetimeSeconds, prefix2, true, true, lifetimeSeconds, lifetimeSeconds))
if ok, _ := expectRouterEvent(); !ok {
- t.Errorf("expected router event for %s on NIC(%d)", llAddr2, nicID2)
+ t.Errorf("expected router event for %s on NIC(%d)", llAddr4, nicID2)
}
if ok, _ := expectPrefixEvent(); !ok {
t.Errorf("expected prefix event for %s on NIC(%d)", prefix2, nicID2)
@@ -3014,12 +3030,18 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
nicinfo := s.NICInfo()
nic1Addrs := nicinfo[nicID1].ProtocolAddresses
nic2Addrs := nicinfo[nicID2].ProtocolAddresses
+ if !containsV6Addr(nic1Addrs, llAddrWithPrefix1) {
+ t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", llAddrWithPrefix1, nicID1, nic1Addrs)
+ }
if !containsV6Addr(nic1Addrs, e1Addr1) {
t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", e1Addr1, nicID1, nic1Addrs)
}
if !containsV6Addr(nic1Addrs, e1Addr2) {
t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", e1Addr2, nicID1, nic1Addrs)
}
+ if !containsV6Addr(nic2Addrs, llAddrWithPrefix2) {
+ t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", llAddrWithPrefix2, nicID2, nic2Addrs)
+ }
if !containsV6Addr(nic2Addrs, e2Addr1) {
t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", e2Addr1, nicID2, nic2Addrs)
}
@@ -3071,10 +3093,10 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
}
expectedRouterEvents := map[ndpRouterEvent]int{
- {nicID: nicID1, addr: llAddr1, discovered: false}: 1,
- {nicID: nicID1, addr: llAddr2, discovered: false}: 1,
- {nicID: nicID2, addr: llAddr1, discovered: false}: 1,
- {nicID: nicID2, addr: llAddr2, discovered: false}: 1,
+ {nicID: nicID1, addr: llAddr3, discovered: false}: 1,
+ {nicID: nicID1, addr: llAddr4, discovered: false}: 1,
+ {nicID: nicID2, addr: llAddr3, discovered: false}: 1,
+ {nicID: nicID2, addr: llAddr4, discovered: false}: 1,
}
if diff := cmp.Diff(expectedRouterEvents, gotRouterEvents); diff != "" {
t.Errorf("router events mismatch (-want +got):\n%s", diff)
@@ -3102,12 +3124,18 @@ func TestCleanupHostOnlyStateOnBecomingRouter(t *testing.T) {
nicinfo = s.NICInfo()
nic1Addrs = nicinfo[nicID1].ProtocolAddresses
nic2Addrs = nicinfo[nicID2].ProtocolAddresses
+ if !containsV6Addr(nic1Addrs, llAddrWithPrefix1) {
+ t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", llAddrWithPrefix1, nicID1, nic1Addrs)
+ }
if containsV6Addr(nic1Addrs, e1Addr1) {
t.Errorf("still have %s in the list of addresses for NIC(%d): %+v", e1Addr1, nicID1, nic1Addrs)
}
if containsV6Addr(nic1Addrs, e1Addr2) {
t.Errorf("still have %s in the list of addresses for NIC(%d): %+v", e1Addr2, nicID1, nic1Addrs)
}
+ if !containsV6Addr(nic2Addrs, llAddrWithPrefix2) {
+ t.Errorf("missing %s from the list of addresses for NIC(%d): %+v", llAddrWithPrefix2, nicID2, nic2Addrs)
+ }
if containsV6Addr(nic2Addrs, e2Addr1) {
t.Errorf("still have %s in the list of addresses for NIC(%d): %+v", e2Addr1, nicID2, nic2Addrs)
}
diff --git a/pkg/tcpip/stack/nic.go b/pkg/tcpip/stack/nic.go
index 682e9c416..78d451cca 100644
--- a/pkg/tcpip/stack/nic.go
+++ b/pkg/tcpip/stack/nic.go
@@ -206,33 +206,9 @@ func (n *NIC) enable() *tcpip.Error {
// Do not auto-generate an IPv6 link-local address for loopback devices.
if n.stack.autoGenIPv6LinkLocal && !n.isLoopback() {
- var addr tcpip.Address
- if oIID := n.stack.opaqueIIDOpts; oIID.NICNameFromID != nil {
- addr = header.LinkLocalAddrWithOpaqueIID(oIID.NICNameFromID(n.ID(), n.name), 0, oIID.SecretKey)
- } else {
- l2addr := n.linkEP.LinkAddress()
-
- // Only attempt to generate the link-local address if we have a valid MAC
- // address.
- //
- // TODO(b/141011931): Validate a LinkEndpoint's link address (provided by
- // LinkEndpoint.LinkAddress) before reaching this point.
- if !header.IsValidUnicastEthernetAddress(l2addr) {
- return nil
- }
-
- addr = header.LinkLocalAddr(l2addr)
- }
-
- if _, err := n.addAddressLocked(tcpip.ProtocolAddress{
- Protocol: header.IPv6ProtocolNumber,
- AddressWithPrefix: tcpip.AddressWithPrefix{
- Address: addr,
- PrefixLen: header.IPv6LinkLocalPrefix.PrefixLen,
- },
- }, CanBePrimaryEndpoint, permanent, static, false /* deprecated */); err != nil {
- return err
- }
+ // The valid and preferred lifetime is infinite for the auto-generated
+ // link-local address.
+ n.mu.ndp.doSLAAC(header.IPv6LinkLocalPrefix.Subnet(), header.NDPInfiniteLifetime, header.NDPInfiniteLifetime)
}
// If we are operating as a router, then do not solicit routers since we
diff --git a/pkg/tcpip/stack/stack_test.go b/pkg/tcpip/stack/stack_test.go
index 243868f3a..24133e6f2 100644
--- a/pkg/tcpip/stack/stack_test.go
+++ b/pkg/tcpip/stack/stack_test.go
@@ -1894,112 +1894,6 @@ func TestNICForwarding(t *testing.T) {
}
}
-// TestNICAutoGenAddr tests the auto-generation of IPv6 link-local addresses
-// using the modified EUI-64 of the NIC's MAC address (or lack there-of if
-// disabled (default)). Note, DAD will be disabled in these tests.
-func TestNICAutoGenAddr(t *testing.T) {
- tests := []struct {
- name string
- autoGen bool
- linkAddr tcpip.LinkAddress
- iidOpts stack.OpaqueInterfaceIdentifierOptions
- shouldGen bool
- }{
- {
- "Disabled",
- false,
- linkAddr1,
- stack.OpaqueInterfaceIdentifierOptions{
- NICNameFromID: func(nicID tcpip.NICID, _ string) string {
- return fmt.Sprintf("nic%d", nicID)
- },
- },
- false,
- },
- {
- "Enabled",
- true,
- linkAddr1,
- stack.OpaqueInterfaceIdentifierOptions{},
- true,
- },
- {
- "Nil MAC",
- true,
- tcpip.LinkAddress([]byte(nil)),
- stack.OpaqueInterfaceIdentifierOptions{},
- false,
- },
- {
- "Empty MAC",
- true,
- tcpip.LinkAddress(""),
- stack.OpaqueInterfaceIdentifierOptions{},
- false,
- },
- {
- "Invalid MAC",
- true,
- tcpip.LinkAddress("\x01\x02\x03"),
- stack.OpaqueInterfaceIdentifierOptions{},
- false,
- },
- {
- "Multicast MAC",
- true,
- tcpip.LinkAddress("\x01\x02\x03\x04\x05\x06"),
- stack.OpaqueInterfaceIdentifierOptions{},
- false,
- },
- {
- "Unspecified MAC",
- true,
- tcpip.LinkAddress("\x00\x00\x00\x00\x00\x00"),
- stack.OpaqueInterfaceIdentifierOptions{},
- false,
- },
- }
-
- for _, test := range tests {
- t.Run(test.name, func(t *testing.T) {
- opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- OpaqueIIDOpts: test.iidOpts,
- }
-
- if test.autoGen {
- // Only set opts.AutoGenIPv6LinkLocal when test.autoGen is true because
- // opts.AutoGenIPv6LinkLocal should be false by default.
- opts.AutoGenIPv6LinkLocal = true
- }
-
- e := channel.New(10, 1280, test.linkAddr)
- s := stack.New(opts)
- if err := s.CreateNIC(1, e); err != nil {
- t.Fatalf("CreateNIC(_) = %s", err)
- }
-
- addr, err := s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
- if err != nil {
- t.Fatalf("stack.GetMainNICAddress(_, _) err = %s", err)
- }
-
- if test.shouldGen {
- // Should have auto-generated an address and resolved immediately (DAD
- // is disabled).
- if want := (tcpip.AddressWithPrefix{Address: header.LinkLocalAddr(test.linkAddr), PrefixLen: header.IPv6LinkLocalPrefix.PrefixLen}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = %s, want = %s", addr, want)
- }
- } else {
- // Should not have auto-generated an address.
- if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
- }
- }
- })
- }
-}
-
// TestNICContextPreservation tests that you can read out via stack.NICInfo the
// Context data you pass via NICContext.Context in stack.CreateNICWithOptions.
func TestNICContextPreservation(t *testing.T) {
@@ -2040,11 +1934,9 @@ func TestNICContextPreservation(t *testing.T) {
}
}
-// TestNICAutoGenAddrWithOpaque tests the auto-generation of IPv6 link-local
-// addresses with opaque interface identifiers. Link Local addresses should
-// always be generated with opaque IIDs if configured to use them, even if the
-// NIC has an invalid MAC address.
-func TestNICAutoGenAddrWithOpaque(t *testing.T) {
+// TestNICAutoGenLinkLocalAddr tests the auto-generation of IPv6 link-local
+// addresses.
+func TestNICAutoGenLinkLocalAddr(t *testing.T) {
const nicID = 1
var secretKey [header.OpaqueIIDSecretKeyMinBytes]byte
@@ -2056,108 +1948,185 @@ func TestNICAutoGenAddrWithOpaque(t *testing.T) {
t.Fatalf("expected rand.Read to read %d bytes, read %d bytes", header.OpaqueIIDSecretKeyMinBytes, n)
}
+ nicNameFunc := func(_ tcpip.NICID, name string) string {
+ return name
+ }
+
tests := []struct {
- name string
- nicName string
- autoGen bool
- linkAddr tcpip.LinkAddress
- secretKey []byte
+ name string
+ nicName string
+ autoGen bool
+ linkAddr tcpip.LinkAddress
+ iidOpts stack.OpaqueInterfaceIdentifierOptions
+ shouldGen bool
+ expectedAddr tcpip.Address
}{
{
name: "Disabled",
nicName: "nic1",
autoGen: false,
linkAddr: linkAddr1,
- secretKey: secretKey[:],
+ shouldGen: false,
},
{
- name: "Enabled",
- nicName: "nic1",
- autoGen: true,
- linkAddr: linkAddr1,
- secretKey: secretKey[:],
+ name: "Disabled without OIID options",
+ nicName: "nic1",
+ autoGen: false,
+ linkAddr: linkAddr1,
+ iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: nicNameFunc,
+ SecretKey: secretKey[:],
+ },
+ shouldGen: false,
+ },
+
+ // Tests for EUI64 based addresses.
+ {
+ name: "EUI64 Enabled",
+ autoGen: true,
+ linkAddr: linkAddr1,
+ shouldGen: true,
+ expectedAddr: header.LinkLocalAddr(linkAddr1),
},
- // These are all cases where we would not have generated a
- // link-local address if opaque IIDs were disabled.
{
- name: "Nil MAC and empty nicName",
- nicName: "",
+ name: "EUI64 Empty MAC",
autoGen: true,
- linkAddr: tcpip.LinkAddress([]byte(nil)),
- secretKey: secretKey[:1],
+ shouldGen: false,
},
{
- name: "Empty MAC and empty nicName",
+ name: "EUI64 Invalid MAC",
autoGen: true,
- linkAddr: tcpip.LinkAddress(""),
- secretKey: secretKey[:2],
+ linkAddr: "\x01\x02\x03",
+ shouldGen: false,
},
{
- name: "Invalid MAC",
- nicName: "test",
+ name: "EUI64 Multicast MAC",
autoGen: true,
- linkAddr: tcpip.LinkAddress("\x01\x02\x03"),
- secretKey: secretKey[:3],
+ linkAddr: "\x01\x02\x03\x04\x05\x06",
+ shouldGen: false,
},
{
- name: "Multicast MAC",
- nicName: "test2",
+ name: "EUI64 Unspecified MAC",
autoGen: true,
- linkAddr: tcpip.LinkAddress("\x01\x02\x03\x04\x05\x06"),
- secretKey: secretKey[:4],
+ linkAddr: "\x00\x00\x00\x00\x00\x00",
+ shouldGen: false,
+ },
+
+ // Tests for Opaque IID based addresses.
+ {
+ name: "OIID Enabled",
+ nicName: "nic1",
+ autoGen: true,
+ linkAddr: linkAddr1,
+ iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: nicNameFunc,
+ SecretKey: secretKey[:],
+ },
+ shouldGen: true,
+ expectedAddr: header.LinkLocalAddrWithOpaqueIID("nic1", 0, secretKey[:]),
+ },
+ // These are all cases where we would not have generated a
+ // link-local address if opaque IIDs were disabled.
+ {
+ name: "OIID Empty MAC and empty nicName",
+ autoGen: true,
+ iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: nicNameFunc,
+ SecretKey: secretKey[:1],
+ },
+ shouldGen: true,
+ expectedAddr: header.LinkLocalAddrWithOpaqueIID("", 0, secretKey[:1]),
},
{
- name: "Unspecified MAC and nil SecretKey",
+ name: "OIID Invalid MAC",
+ nicName: "test",
+ autoGen: true,
+ linkAddr: "\x01\x02\x03",
+ iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: nicNameFunc,
+ SecretKey: secretKey[:2],
+ },
+ shouldGen: true,
+ expectedAddr: header.LinkLocalAddrWithOpaqueIID("test", 0, secretKey[:2]),
+ },
+ {
+ name: "OIID Multicast MAC",
+ nicName: "test2",
+ autoGen: true,
+ linkAddr: "\x01\x02\x03\x04\x05\x06",
+ iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: nicNameFunc,
+ SecretKey: secretKey[:3],
+ },
+ shouldGen: true,
+ expectedAddr: header.LinkLocalAddrWithOpaqueIID("test2", 0, secretKey[:3]),
+ },
+ {
+ name: "OIID Unspecified MAC and nil SecretKey",
nicName: "test3",
autoGen: true,
- linkAddr: tcpip.LinkAddress("\x00\x00\x00\x00\x00\x00"),
+ linkAddr: "\x00\x00\x00\x00\x00\x00",
+ iidOpts: stack.OpaqueInterfaceIdentifierOptions{
+ NICNameFromID: nicNameFunc,
+ },
+ shouldGen: true,
+ expectedAddr: header.LinkLocalAddrWithOpaqueIID("test3", 0, nil),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
- opts := stack.Options{
- NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
- OpaqueIIDOpts: stack.OpaqueInterfaceIdentifierOptions{
- NICNameFromID: func(_ tcpip.NICID, nicName string) string {
- return nicName
- },
- SecretKey: test.secretKey,
- },
+ ndpDisp := ndpDispatcher{
+ autoGenAddrC: make(chan ndpAutoGenAddrEvent, 1),
}
-
- if test.autoGen {
- // Only set opts.AutoGenIPv6LinkLocal when
- // test.autoGen is true because
- // opts.AutoGenIPv6LinkLocal should be false by
- // default.
- opts.AutoGenIPv6LinkLocal = true
+ opts := stack.Options{
+ NetworkProtocols: []stack.NetworkProtocol{ipv6.NewProtocol()},
+ AutoGenIPv6LinkLocal: test.autoGen,
+ NDPDisp: &ndpDisp,
+ OpaqueIIDOpts: test.iidOpts,
}
- e := channel.New(10, 1280, test.linkAddr)
+ e := channel.New(0, 1280, test.linkAddr)
s := stack.New(opts)
nicOpts := stack.NICOptions{Name: test.nicName}
if err := s.CreateNICWithOptions(nicID, e, nicOpts); err != nil {
t.Fatalf("CreateNICWithOptions(%d, _, %+v) = %s", nicID, opts, err)
}
- addr, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
- if err != nil {
- t.Fatalf("stack.GetMainNICAddress(%d, _) err = %s", nicID, err)
- }
+ var expectedMainAddr tcpip.AddressWithPrefix
+
+ if test.shouldGen {
+ expectedMainAddr = tcpip.AddressWithPrefix{
+ Address: test.expectedAddr,
+ PrefixLen: header.IPv6LinkLocalPrefix.PrefixLen,
+ }
- if test.autoGen {
- // Should have auto-generated an address and
- // resolved immediately (DAD is disabled).
- if want := (tcpip.AddressWithPrefix{Address: header.LinkLocalAddrWithOpaqueIID(test.nicName, 0, test.secretKey), PrefixLen: header.IPv6LinkLocalPrefix.PrefixLen}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = %s, want = %s", addr, want)
+ // Should have auto-generated an address and resolved immediately (DAD
+ // is disabled).
+ select {
+ case e := <-ndpDisp.autoGenAddrC:
+ if diff := checkAutoGenAddrEvent(e, expectedMainAddr, newAddr); diff != "" {
+ t.Errorf("auto-gen addr event mismatch (-want +got):\n%s", diff)
+ }
+ default:
+ t.Fatal("expected addr auto gen event")
}
} else {
// Should not have auto-generated an address.
- if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
+ select {
+ case <-ndpDisp.autoGenAddrC:
+ t.Fatal("unexpectedly auto-generated an address")
+ default:
}
}
+
+ gotMainAddr, err := s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ if err != nil {
+ t.Fatalf("stack.GetMainNICAddress(_, _) err = %s", err)
+ }
+ if gotMainAddr != expectedMainAddr {
+ t.Fatalf("got stack.GetMainNICAddress(_, _) = %s, want = %s", gotMainAddr, expectedMainAddr)
+ }
})
}
}
@@ -2215,6 +2184,8 @@ func TestNoLinkLocalAutoGenForLoopbackNIC(t *testing.T) {
// TestNICAutoGenAddrDoesDAD tests that the successful auto-generation of IPv6
// link-local addresses will only be assigned after the DAD process resolves.
func TestNICAutoGenAddrDoesDAD(t *testing.T) {
+ const nicID = 1
+
ndpDisp := ndpDispatcher{
dadC: make(chan ndpDADEvent),
}
@@ -2226,20 +2197,20 @@ func TestNICAutoGenAddrDoesDAD(t *testing.T) {
NDPDisp: &ndpDisp,
}
- e := channel.New(10, 1280, linkAddr1)
+ e := channel.New(int(ndpConfigs.DupAddrDetectTransmits), 1280, linkAddr1)
s := stack.New(opts)
- if err := s.CreateNIC(1, e); err != nil {
- t.Fatalf("CreateNIC(_) = %s", err)
+ if err := s.CreateNIC(nicID, e); err != nil {
+ t.Fatalf("CreateNIC(%d, _) = %s", nicID, err)
}
// Address should not be considered bound to the
// NIC yet (DAD ongoing).
- addr, err := s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err := s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (_, %v), want = (_, nil)", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID, header.IPv6ProtocolNumber, err)
}
if want := (tcpip.AddressWithPrefix{}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = (%s, nil), want = (%s, nil)", addr, want)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID, header.IPv6ProtocolNumber, addr, want)
}
linkLocalAddr := header.LinkLocalAddr(linkAddr1)
@@ -2253,25 +2224,16 @@ func TestNICAutoGenAddrDoesDAD(t *testing.T) {
// means something is wrong.
t.Fatal("timed out waiting for DAD resolution")
case e := <-ndpDisp.dadC:
- if e.err != nil {
- t.Fatal("got DAD error: ", e.err)
- }
- if e.nicID != 1 {
- t.Fatalf("got DAD event w/ nicID = %d, want = 1", e.nicID)
- }
- if e.addr != linkLocalAddr {
- t.Fatalf("got DAD event w/ addr = %s, want = %s", addr, linkLocalAddr)
- }
- if !e.resolved {
- t.Fatal("got DAD event w/ resolved = false, want = true")
+ if diff := checkDADEvent(e, nicID, linkLocalAddr, true, nil); diff != "" {
+ t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
}
- addr, err = s.GetMainNICAddress(1, header.IPv6ProtocolNumber)
+ addr, err = s.GetMainNICAddress(nicID, header.IPv6ProtocolNumber)
if err != nil {
- t.Fatalf("stack.GetMainNICAddress(_, _) err = %s", err)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (_, %v), want = (_, nil)", nicID, header.IPv6ProtocolNumber, err)
}
if want := (tcpip.AddressWithPrefix{Address: linkLocalAddr, PrefixLen: header.IPv6LinkLocalPrefix.PrefixLen}); addr != want {
- t.Fatalf("got stack.GetMainNICAddress(_, _) = %s, want = %s", addr, want)
+ t.Fatalf("got stack.GetMainNICAddress(%d, %d) = (%s, nil), want = (%s, nil)", nicID, header.IPv6ProtocolNumber, addr, want)
}
}
@@ -2637,17 +2599,8 @@ func TestDoDADWhenNICEnabled(t *testing.T) {
case <-time.After(dadTransmits*retransmitTimer + defaultAsyncEventTimeout):
t.Fatal("timed out waiting for DAD resolution")
case e := <-ndpDisp.dadC:
- if e.err != nil {
- t.Fatal("got DAD error: ", e.err)
- }
- if e.nicID != nicID {
- t.Fatalf("got DAD event w/ nicID = %d, want = %d", e.nicID, nicID)
- }
- if e.addr != addr.AddressWithPrefix.Address {
- t.Fatalf("got DAD event w/ addr = %s, want = %s", e.addr, addr.AddressWithPrefix.Address)
- }
- if !e.resolved {
- t.Fatal("got DAD event w/ resolved = false, want = true")
+ if diff := checkDADEvent(e, nicID, addr.AddressWithPrefix.Address, true, nil); diff != "" {
+ t.Errorf("dad event mismatch (-want +got):\n%s", diff)
}
}
if addrs := s.AllAddresses()[nicID]; !containsV6Addr(addrs, addr.AddressWithPrefix) {