From e09e7bf72f3e0208c7f557d9931407ee8729ebb2 Mon Sep 17 00:00:00 2001 From: Brad Burlage Date: Mon, 11 Nov 2019 14:41:44 -0800 Subject: Add more extended features. PiperOrigin-RevId: 279820435 --- pkg/cpuid/cpuid.go | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) (limited to 'pkg/cpuid/cpuid.go') diff --git a/pkg/cpuid/cpuid.go b/pkg/cpuid/cpuid.go index 5d61dc2ff..d37047368 100644 --- a/pkg/cpuid/cpuid.go +++ b/pkg/cpuid/cpuid.go @@ -183,6 +183,33 @@ const ( X86FeatureAVX512VBMI X86FeatureUMIP X86FeaturePKU + X86FeatureOSPKE + X86FeatureWAITPKG + X86FeatureAVX512_VBMI2 + _ // ecx bit 7 is reserved + X86FeatureGFNI + X86FeatureVAES + X86FeatureVPCLMULQDQ + X86FeatureAVX512_VNNI + X86FeatureAVX512_BITALG + X86FeatureTME + X86FeatureAVX512_VPOPCNTDQ + _ // ecx bit 15 is reserved + X86FeatureLA57 + // ecx bits 17-21 are reserved + _ + _ + _ + _ + _ + X86FeatureRDPID + // ecx bits 23-24 are reserved + _ + _ + X86FeatureCLDEMOTE + _ // ecx bit 26 is reserved + X86FeatureMOVDIRI + X86FeatureMOVDIR64B ) // Block 4 constants are for xsave capabilities in CPUID.(EAX=0DH,ECX=01H):EAX. @@ -353,9 +380,24 @@ var x86FeatureStrings = map[Feature]string{ X86FeatureAVX512VL: "avx512vl", // Block 3. - X86FeatureAVX512VBMI: "avx512vbmi", - X86FeatureUMIP: "umip", - X86FeaturePKU: "pku", + X86FeatureAVX512VBMI: "avx512vbmi", + X86FeatureUMIP: "umip", + X86FeaturePKU: "pku", + X86FeatureOSPKE: "ospke", + X86FeatureWAITPKG: "waitpkg", + X86FeatureAVX512_VBMI2: "avx512_vbmi2", + X86FeatureGFNI: "gfni", + X86FeatureVAES: "vaes", + X86FeatureVPCLMULQDQ: "vpclmulqdq", + X86FeatureAVX512_VNNI: "avx512_vnni", + X86FeatureAVX512_BITALG: "avx512_bitalg", + X86FeatureTME: "tme", + X86FeatureAVX512_VPOPCNTDQ: "avx512_vpopcntdq", + X86FeatureLA57: "la57", + X86FeatureRDPID: "rdpid", + X86FeatureCLDEMOTE: "cldemote", + X86FeatureMOVDIRI: "movdiri", + X86FeatureMOVDIR64B: "movdir64b", // Block 4. X86FeatureXSAVEOPT: "xsaveopt", -- cgit v1.2.3 From 7b7c31820b83abcfe43f7170eff1f7953f3f27e2 Mon Sep 17 00:00:00 2001 From: Fabricio Voznika Date: Thu, 16 Jan 2020 12:28:44 -0800 Subject: Add remaining /proc/* and /proc/sys/* files Except for one under /proc/sys/net/ipv4/tcp_sack. /proc/pid/* is still incomplete. Updates #1195 PiperOrigin-RevId: 290120438 --- pkg/cpuid/cpuid.go | 44 ++-- pkg/sentry/fs/proc/cpuinfo.go | 8 +- pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 40 ++++ pkg/sentry/fsimpl/proc/BUILD | 11 +- pkg/sentry/fsimpl/proc/filesystem.go | 11 + pkg/sentry/fsimpl/proc/loadavg.go | 42 ---- pkg/sentry/fsimpl/proc/meminfo.go | 79 ------- pkg/sentry/fsimpl/proc/net.go | 338 ---------------------------- pkg/sentry/fsimpl/proc/net_test.go | 78 ------- pkg/sentry/fsimpl/proc/stat.go | 129 ----------- pkg/sentry/fsimpl/proc/sys.go | 51 ----- pkg/sentry/fsimpl/proc/task.go | 12 +- pkg/sentry/fsimpl/proc/tasks.go | 41 +++- pkg/sentry/fsimpl/proc/tasks_files.go | 245 ++++++++++++++++++++ pkg/sentry/fsimpl/proc/tasks_net.go | 337 +++++++++++++++++++++++++++ pkg/sentry/fsimpl/proc/tasks_sys.go | 143 ++++++++++++ pkg/sentry/fsimpl/proc/tasks_sys_test.go | 78 +++++++ pkg/sentry/fsimpl/proc/tasks_test.go | 3 + pkg/sentry/fsimpl/proc/version.go | 70 ------ 19 files changed, 922 insertions(+), 838 deletions(-) delete mode 100644 pkg/sentry/fsimpl/proc/loadavg.go delete mode 100644 pkg/sentry/fsimpl/proc/meminfo.go delete mode 100644 pkg/sentry/fsimpl/proc/net.go delete mode 100644 pkg/sentry/fsimpl/proc/net_test.go delete mode 100644 pkg/sentry/fsimpl/proc/stat.go delete mode 100644 pkg/sentry/fsimpl/proc/sys.go create mode 100644 pkg/sentry/fsimpl/proc/tasks_net.go create mode 100644 pkg/sentry/fsimpl/proc/tasks_sys.go create mode 100644 pkg/sentry/fsimpl/proc/tasks_sys_test.go delete mode 100644 pkg/sentry/fsimpl/proc/version.go (limited to 'pkg/cpuid/cpuid.go') diff --git a/pkg/cpuid/cpuid.go b/pkg/cpuid/cpuid.go index d37047368..cf50ee53f 100644 --- a/pkg/cpuid/cpuid.go +++ b/pkg/cpuid/cpuid.go @@ -657,30 +657,28 @@ func (fs *FeatureSet) FlagsString(cpuinfoOnly bool) string { return strings.Join(s, " ") } -// CPUInfo is to generate a section of one cpu in /proc/cpuinfo. This is a -// minimal /proc/cpuinfo, it is missing some fields like "microcode" that are +// WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is +// a minimal /proc/cpuinfo, it is missing some fields like "microcode" that are // not always printed in Linux. The bogomips field is simply made up. -func (fs FeatureSet) CPUInfo(cpu uint) string { - var b bytes.Buffer - fmt.Fprintf(&b, "processor\t: %d\n", cpu) - fmt.Fprintf(&b, "vendor_id\t: %s\n", fs.VendorID) - fmt.Fprintf(&b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family) - fmt.Fprintf(&b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model) - fmt.Fprintf(&b, "model name\t: %s\n", "unknown") // Unknown for now. - fmt.Fprintf(&b, "stepping\t: %s\n", "unknown") // Unknown for now. - fmt.Fprintf(&b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz) - fmt.Fprintln(&b, "fpu\t\t: yes") - fmt.Fprintln(&b, "fpu_exception\t: yes") - fmt.Fprintf(&b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID. - fmt.Fprintln(&b, "wp\t\t: yes") - fmt.Fprintf(&b, "flags\t\t: %s\n", fs.FlagsString(true)) - fmt.Fprintf(&b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway. - fmt.Fprintf(&b, "clflush size\t: %d\n", fs.CacheLine) - fmt.Fprintf(&b, "cache_alignment\t: %d\n", fs.CacheLine) - fmt.Fprintf(&b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48) - fmt.Fprintln(&b, "power management:") // This is always here, but can be blank. - fmt.Fprintln(&b, "") // The /proc/cpuinfo file ends with an extra newline. - return b.String() +func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) { + fmt.Fprintf(b, "processor\t: %d\n", cpu) + fmt.Fprintf(b, "vendor_id\t: %s\n", fs.VendorID) + fmt.Fprintf(b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family) + fmt.Fprintf(b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model) + fmt.Fprintf(b, "model name\t: %s\n", "unknown") // Unknown for now. + fmt.Fprintf(b, "stepping\t: %s\n", "unknown") // Unknown for now. + fmt.Fprintf(b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz) + fmt.Fprintln(b, "fpu\t\t: yes") + fmt.Fprintln(b, "fpu_exception\t: yes") + fmt.Fprintf(b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID. + fmt.Fprintln(b, "wp\t\t: yes") + fmt.Fprintf(b, "flags\t\t: %s\n", fs.FlagsString(true)) + fmt.Fprintf(b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway. + fmt.Fprintf(b, "clflush size\t: %d\n", fs.CacheLine) + fmt.Fprintf(b, "cache_alignment\t: %d\n", fs.CacheLine) + fmt.Fprintf(b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48) + fmt.Fprintln(b, "power management:") // This is always here, but can be blank. + fmt.Fprintln(b, "") // The /proc/cpuinfo file ends with an extra newline. } const ( diff --git a/pkg/sentry/fs/proc/cpuinfo.go b/pkg/sentry/fs/proc/cpuinfo.go index 3edf36780..6330337eb 100644 --- a/pkg/sentry/fs/proc/cpuinfo.go +++ b/pkg/sentry/fs/proc/cpuinfo.go @@ -15,6 +15,8 @@ package proc import ( + "bytes" + "gvisor.dev/gvisor/pkg/sentry/context" "gvisor.dev/gvisor/pkg/sentry/fs" "gvisor.dev/gvisor/pkg/sentry/kernel" @@ -27,9 +29,9 @@ func newCPUInfo(ctx context.Context, msrc *fs.MountSource) *fs.Inode { // Kernel is always initialized with a FeatureSet. panic("cpuinfo read with nil FeatureSet") } - contents := make([]byte, 0, 1024) + var buf bytes.Buffer for i, max := uint(0), k.ApplicationCores(); i < max; i++ { - contents = append(contents, []byte(features.CPUInfo(i))...) + features.WriteCPUInfoTo(i, &buf) } - return newStaticProcInode(ctx, msrc, contents) + return newStaticProcInode(ctx, msrc, buf.Bytes()) } diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 1d469a0db..6aff3d39a 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -510,3 +510,43 @@ type InodeSymlink struct { func (InodeSymlink) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { return nil, syserror.ELOOP } + +// StaticDirectory is a standard implementation of a directory with static +// contents. +// +// +stateify savable +type StaticDirectory struct { + InodeNotSymlink + InodeDirectoryNoNewChildren + InodeAttrs + InodeNoDynamicLookup + OrderedChildren +} + +var _ Inode = (*StaticDirectory)(nil) + +// NewStaticDir creates a new static directory and returns its dentry. +func NewStaticDir(creds *auth.Credentials, ino uint64, perm linux.FileMode, children map[string]*Dentry) *Dentry { + if perm&^linux.PermissionsMask != 0 { + panic(fmt.Sprintf("Only permission mask must be set: %x", perm&linux.PermissionsMask)) + } + + inode := &StaticDirectory{} + inode.InodeAttrs.Init(creds, ino, linux.ModeDirectory|perm) + + dentry := &Dentry{} + dentry.Init(inode) + + inode.OrderedChildren.Init(OrderedChildrenOptions{}) + links := inode.OrderedChildren.Populate(dentry, children) + inode.IncLinks(links) + + return dentry +} + +// Open implements kernfs.Inode. +func (s *StaticDirectory) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, flags uint32) (*vfs.FileDescription, error) { + fd := &GenericDirectoryFD{} + fd.Init(rp.Mount(), vfsd, &s.OrderedChildren, flags) + return fd.VFSFileDescription(), nil +} diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD index 1f44b3217..6cd18cec8 100644 --- a/pkg/sentry/fsimpl/proc/BUILD +++ b/pkg/sentry/fsimpl/proc/BUILD @@ -7,17 +7,13 @@ go_library( name = "proc", srcs = [ "filesystem.go", - "loadavg.go", - "meminfo.go", "mounts.go", - "net.go", - "stat.go", - "sys.go", "task.go", "task_files.go", "tasks.go", "tasks_files.go", - "version.go", + "tasks_net.go", + "tasks_sys.go", ], importpath = "gvisor.dev/gvisor/pkg/sentry/fsimpl/proc", deps = [ @@ -30,6 +26,7 @@ go_library( "//pkg/sentry/inet", "//pkg/sentry/kernel", "//pkg/sentry/kernel/auth", + "//pkg/sentry/kernel/time", "//pkg/sentry/limits", "//pkg/sentry/mm", "//pkg/sentry/socket", @@ -47,7 +44,7 @@ go_test( size = "small", srcs = [ "boot_test.go", - "net_test.go", + "tasks_sys_test.go", "tasks_test.go", ], embed = [":proc"], diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index d09182c77..e9cb7895f 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -67,3 +67,14 @@ func newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode d d.Init(inode) return d } + +type staticFile struct { + kernfs.DynamicBytesFile + vfs.StaticData +} + +var _ dynamicInode = (*staticFile)(nil) + +func newStaticFile(data string) *staticFile { + return &staticFile{StaticData: vfs.StaticData{Data: data}} +} diff --git a/pkg/sentry/fsimpl/proc/loadavg.go b/pkg/sentry/fsimpl/proc/loadavg.go deleted file mode 100644 index 5351d86e8..000000000 --- a/pkg/sentry/fsimpl/proc/loadavg.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" -) - -// loadavgData backs /proc/loadavg. -// -// +stateify savable -type loadavgData struct { - kernfs.DynamicBytesFile -} - -var _ dynamicInode = (*loadavgData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // TODO(b/62345059): Include real data in fields. - // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods. - // Column 4-5: currently running processes and the total number of processes. - // Column 6: the last process ID used. - fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0) - return nil -} diff --git a/pkg/sentry/fsimpl/proc/meminfo.go b/pkg/sentry/fsimpl/proc/meminfo.go deleted file mode 100644 index cbdd4f3fc..000000000 --- a/pkg/sentry/fsimpl/proc/meminfo.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/usermem" -) - -// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo. -// -// +stateify savable -type meminfoData struct { - kernfs.DynamicBytesFile - - // k is the owning Kernel. - k *kernel.Kernel -} - -var _ dynamicInode = (*meminfoData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { - mf := d.k.MemoryFile() - mf.UpdateUsage() - snapshot, totalUsage := usage.MemoryAccounting.Copy() - totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage) - anon := snapshot.Anonymous + snapshot.Tmpfs - file := snapshot.PageCache + snapshot.Mapped - // We don't actually have active/inactive LRUs, so just make up numbers. - activeFile := (file / 2) &^ (usermem.PageSize - 1) - inactiveFile := file - activeFile - - fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024) - memFree := (totalSize - totalUsage) / 1024 - // We use MemFree as MemAvailable because we don't swap. - // TODO(rahat): When reclaim is implemented the value of MemAvailable - // should change. - fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree) - fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree) - fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices - fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) - // Emulate a system with no swap, which disables inactivation of anon pages. - fmt.Fprintf(buf, "SwapCache: 0 kB\n") - fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024) - fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024) - fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024) - fmt.Fprintf(buf, "Inactive(anon): 0 kB\n") - fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024) - fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024) - fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263) - fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263) - fmt.Fprintf(buf, "SwapTotal: 0 kB\n") - fmt.Fprintf(buf, "SwapFree: 0 kB\n") - fmt.Fprintf(buf, "Dirty: 0 kB\n") - fmt.Fprintf(buf, "Writeback: 0 kB\n") - fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024) - fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know - fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024) - return nil -} diff --git a/pkg/sentry/fsimpl/proc/net.go b/pkg/sentry/fsimpl/proc/net.go deleted file mode 100644 index fd46eebf8..000000000 --- a/pkg/sentry/fsimpl/proc/net.go +++ /dev/null @@ -1,338 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/binary" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fs" - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/unix" - "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6. -// -// +stateify savable -type ifinet6 struct { - s inet.Stack -} - -var _ vfs.DynamicBytesSource = (*ifinet6)(nil) - -func (n *ifinet6) contents() []string { - var lines []string - nics := n.s.Interfaces() - for id, naddrs := range n.s.InterfaceAddrs() { - nic, ok := nics[id] - if !ok { - // NIC was added after NICNames was called. We'll just - // ignore it. - continue - } - - for _, a := range naddrs { - // IPv6 only. - if a.Family != linux.AF_INET6 { - continue - } - - // Fields: - // IPv6 address displayed in 32 hexadecimal chars without colons - // Netlink device number (interface index) in hexadecimal (use nic id) - // Prefix length in hexadecimal - // Scope value (use 0) - // Interface flags - // Device name - lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name)) - } - } - return lines -} - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error { - for _, l := range n.contents() { - buf.WriteString(l) - } - return nil -} - -// netDev implements vfs.DynamicBytesSource for /proc/net/dev. -// -// +stateify savable -type netDev struct { - s inet.Stack -} - -var _ vfs.DynamicBytesSource = (*netDev)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *netDev) Generate(ctx context.Context, buf *bytes.Buffer) error { - interfaces := n.s.Interfaces() - buf.WriteString("Inter-| Receive | Transmit\n") - buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n") - - for _, i := range interfaces { - // Implements the same format as - // net/core/net-procfs.c:dev_seq_printf_stats. - var stats inet.StatDev - if err := n.s.Statistics(&stats, i.Name); err != nil { - log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err) - continue - } - fmt.Fprintf( - buf, - "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n", - i.Name, - // Received - stats[0], // bytes - stats[1], // packets - stats[2], // errors - stats[3], // dropped - stats[4], // fifo - stats[5], // frame - stats[6], // compressed - stats[7], // multicast - // Transmitted - stats[8], // bytes - stats[9], // packets - stats[10], // errors - stats[11], // dropped - stats[12], // fifo - stats[13], // frame - stats[14], // compressed - stats[15], // multicast - ) - } - - return nil -} - -// netUnix implements vfs.DynamicBytesSource for /proc/net/unix. -// -// +stateify savable -type netUnix struct { - k *kernel.Kernel -} - -var _ vfs.DynamicBytesSource = (*netUnix)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *netUnix) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n") - for _, se := range n.k.ListSockets() { - s := se.Sock.Get() - if s == nil { - log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock) - continue - } - sfile := s.(*fs.File) - if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { - s.DecRef() - // Not a unix socket. - continue - } - sops := sfile.FileOperations.(*unix.SocketOperations) - - addr, err := sops.Endpoint().GetLocalAddress() - if err != nil { - log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err) - addr.Addr = "" - } - - sockFlags := 0 - if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { - if ce.Listening() { - // For unix domain sockets, linux reports a single flag - // value if the socket is listening, of __SO_ACCEPTCON. - sockFlags = linux.SO_ACCEPTCON - } - } - - // In the socket entry below, the value for the 'Num' field requires - // some consideration. Linux prints the address to the struct - // unix_sock representing a socket in the kernel, but may redact the - // value for unprivileged users depending on the kptr_restrict - // sysctl. - // - // One use for this field is to allow a privileged user to - // introspect into the kernel memory to determine information about - // a socket not available through procfs, such as the socket's peer. - // - // In gvisor, returning a pointer to our internal structures would - // be pointless, as it wouldn't match the memory layout for struct - // unix_sock, making introspection difficult. We could populate a - // struct unix_sock with the appropriate data, but even that - // requires consideration for which kernel version to emulate, as - // the definition of this struct changes over time. - // - // For now, we always redact this pointer. - fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d", - (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. - sfile.ReadRefs()-1, // RefCount, don't count our own ref. - 0, // Protocol, always 0 for UDS. - sockFlags, // Flags. - sops.Endpoint().Type(), // Type. - sops.State(), // State. - sfile.InodeID(), // Inode. - ) - - // Path - if len(addr.Addr) != 0 { - if addr.Addr[0] == 0 { - // Abstract path. - fmt.Fprintf(buf, " @%s", string(addr.Addr[1:])) - } else { - fmt.Fprintf(buf, " %s", string(addr.Addr)) - } - } - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - return nil -} - -// netTCP implements vfs.DynamicBytesSource for /proc/net/tcp. -// -// +stateify savable -type netTCP struct { - k *kernel.Kernel -} - -var _ vfs.DynamicBytesSource = (*netTCP)(nil) - -func (n *netTCP) Generate(ctx context.Context, buf *bytes.Buffer) error { - t := kernel.TaskFromContext(ctx) - buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") - for _, se := range n.k.ListSockets() { - s := se.Sock.Get() - if s == nil { - log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock) - continue - } - sfile := s.(*fs.File) - sops, ok := sfile.FileOperations.(socket.Socket) - if !ok { - panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) - } - if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) { - s.DecRef() - // Not tcp4 sockets. - continue - } - - // Linux's documentation for the fields below can be found at - // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. - // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). - // Note that the header doesn't contain labels for all the fields. - - // Field: sl; entry number. - fmt.Fprintf(buf, "%4d: ", se.ID) - - portBuf := make([]byte, 2) - - // Field: local_adddress. - var localAddr linux.SockAddrInet - if local, _, err := sops.GetSockName(t); err == nil { - localAddr = *local.(*linux.SockAddrInet) - } - binary.LittleEndian.PutUint16(portBuf, localAddr.Port) - fmt.Fprintf(buf, "%08X:%04X ", - binary.LittleEndian.Uint32(localAddr.Addr[:]), - portBuf) - - // Field: rem_address. - var remoteAddr linux.SockAddrInet - if remote, _, err := sops.GetPeerName(t); err == nil { - remoteAddr = *remote.(*linux.SockAddrInet) - } - binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port) - fmt.Fprintf(buf, "%08X:%04X ", - binary.LittleEndian.Uint32(remoteAddr.Addr[:]), - portBuf) - - // Field: state; socket state. - fmt.Fprintf(buf, "%02X ", sops.State()) - - // Field: tx_queue, rx_queue; number of packets in the transmit and - // receive queue. Unimplemented. - fmt.Fprintf(buf, "%08X:%08X ", 0, 0) - - // Field: tr, tm->when; timer active state and number of jiffies - // until timer expires. Unimplemented. - fmt.Fprintf(buf, "%02X:%08X ", 0, 0) - - // Field: retrnsmt; number of unrecovered RTO timeouts. - // Unimplemented. - fmt.Fprintf(buf, "%08X ", 0) - - // Field: uid. - uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) - if err != nil { - log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) - fmt.Fprintf(buf, "%5d ", 0) - } else { - fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) - } - - // Field: timeout; number of unanswered 0-window probes. - // Unimplemented. - fmt.Fprintf(buf, "%8d ", 0) - - // Field: inode. - fmt.Fprintf(buf, "%8d ", sfile.InodeID()) - - // Field: refcount. Don't count the ref we obtain while deferencing - // the weakref to this socket. - fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) - - // Field: Socket struct address. Redacted due to the same reason as - // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. - fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) - - // Field: retransmit timeout. Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: predicted tick of soft clock (delayed ACK control data). - // Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: sending congestion window, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. - // Unimplemented, report as large threshold. - fmt.Fprintf(buf, "%d", -1) - - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - - return nil -} diff --git a/pkg/sentry/fsimpl/proc/net_test.go b/pkg/sentry/fsimpl/proc/net_test.go deleted file mode 100644 index 20a77a8ca..000000000 --- a/pkg/sentry/fsimpl/proc/net_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context/contexttest" - "gvisor.dev/gvisor/pkg/sentry/inet" -) - -func newIPv6TestStack() *inet.TestStack { - s := inet.NewTestStack() - s.SupportsIPv6Flag = true - return s -} - -func TestIfinet6NoAddresses(t *testing.T) { - n := &ifinet6{s: newIPv6TestStack()} - var buf bytes.Buffer - n.Generate(contexttest.Context(t), &buf) - if buf.Len() > 0 { - t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{}) - } -} - -func TestIfinet6(t *testing.T) { - s := newIPv6TestStack() - s.InterfacesMap[1] = inet.Interface{Name: "eth0"} - s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - }, - } - s.InterfacesMap[2] = inet.Interface{Name: "eth1"} - s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), - }, - } - want := map[string]struct{}{ - "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, - "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, - } - - n := &ifinet6{s: s} - contents := n.contents() - if len(contents) != len(want) { - t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) - } - got := map[string]struct{}{} - for _, l := range contents { - got[l] = struct{}{} - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Got n.contents() = %v, want = %v", got, want) - } -} diff --git a/pkg/sentry/fsimpl/proc/stat.go b/pkg/sentry/fsimpl/proc/stat.go deleted file mode 100644 index 50894a534..000000000 --- a/pkg/sentry/fsimpl/proc/stat.go +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" -) - -// cpuStats contains the breakdown of CPU time for /proc/stat. -type cpuStats struct { - // user is time spent in userspace tasks with non-positive niceness. - user uint64 - - // nice is time spent in userspace tasks with positive niceness. - nice uint64 - - // system is time spent in non-interrupt kernel context. - system uint64 - - // idle is time spent idle. - idle uint64 - - // ioWait is time spent waiting for IO. - ioWait uint64 - - // irq is time spent in interrupt context. - irq uint64 - - // softirq is time spent in software interrupt context. - softirq uint64 - - // steal is involuntary wait time. - steal uint64 - - // guest is time spent in guests with non-positive niceness. - guest uint64 - - // guestNice is time spent in guests with positive niceness. - guestNice uint64 -} - -// String implements fmt.Stringer. -func (c cpuStats) String() string { - return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice) -} - -// statData implements vfs.DynamicBytesSource for /proc/stat. -// -// +stateify savable -type statData struct { - kernfs.DynamicBytesFile - - // k is the owning Kernel. - k *kernel.Kernel -} - -var _ dynamicInode = (*statData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (s *statData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // TODO(b/37226836): We currently export only zero CPU stats. We could - // at least provide some aggregate stats. - var cpu cpuStats - fmt.Fprintf(buf, "cpu %s\n", cpu) - - for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ { - fmt.Fprintf(buf, "cpu%d %s\n", c, cpu) - } - - // The total number of interrupts is dependent on the CPUs and PCI - // devices on the system. See arch_probe_nr_irqs. - // - // Since we don't report real interrupt stats, just choose an arbitrary - // value from a representative VM. - const numInterrupts = 256 - - // The Kernel doesn't handle real interrupts, so report all zeroes. - // TODO(b/37226836): We could count page faults as #PF. - fmt.Fprintf(buf, "intr 0") // total - for i := 0; i < numInterrupts; i++ { - fmt.Fprintf(buf, " 0") - } - fmt.Fprintf(buf, "\n") - - // Total number of context switches. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "ctxt 0\n") - - // CLOCK_REALTIME timestamp from boot, in seconds. - fmt.Fprintf(buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds()) - - // Total number of clones. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "processes 0\n") - - // Number of runnable tasks. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "procs_running 0\n") - - // Number of tasks waiting on IO. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "procs_blocked 0\n") - - // Number of each softirq handled. - fmt.Fprintf(buf, "softirq 0") // total - for i := 0; i < linux.NumSoftIRQ; i++ { - fmt.Fprintf(buf, " 0") - } - fmt.Fprintf(buf, "\n") - return nil -} diff --git a/pkg/sentry/fsimpl/proc/sys.go b/pkg/sentry/fsimpl/proc/sys.go deleted file mode 100644 index b88256e12..000000000 --- a/pkg/sentry/fsimpl/proc/sys.go +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// mmapMinAddrData implements vfs.DynamicBytesSource for -// /proc/sys/vm/mmap_min_addr. -// -// +stateify savable -type mmapMinAddrData struct { - k *kernel.Kernel -} - -var _ vfs.DynamicBytesSource = (*mmapMinAddrData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress()) - return nil -} - -// +stateify savable -type overcommitMemory struct{} - -var _ vfs.DynamicBytesSource = (*overcommitMemory)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *overcommitMemory) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "0\n") - return nil -} diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 11a64c777..5a384817f 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -50,15 +50,15 @@ func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNames //"fd": newFdDir(t, msrc), //"fdinfo": newFdInfoDir(t, msrc), //"gid_map": newGIDMap(t, msrc), - "io": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, newIO(task, isThreadGroup)), - "maps": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &mapsData{task: task}), + "io": newTaskOwnedFile(task, inoGen.NextIno(), 0400, newIO(task, isThreadGroup)), + "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}), //"mountinfo": seqfile.NewSeqFileInode(t, &mountInfoFile{t: t}, msrc), //"mounts": seqfile.NewSeqFileInode(t, &mountsFile{t: t}, msrc), //"ns": newNamespaceDir(t, msrc), - "smaps": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &smapsData{task: task}), - "stat": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &taskStatData{t: task, pidns: pidns, tgstats: isThreadGroup}), - "statm": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &statmData{t: task}), - "status": newTaskOwnedFile(task, inoGen.NextIno(), defaultPermission, &statusData{t: task, pidns: pidns}), + "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}), + "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{t: task, pidns: pidns, tgstats: isThreadGroup}), + "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{t: task}), + "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{t: task, pidns: pidns}), //"uid_map": newUIDMap(t, msrc), } if isThreadGroup { diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index d8f92d52f..72315d25c 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -15,6 +15,7 @@ package proc import ( + "bytes" "sort" "strconv" @@ -28,9 +29,8 @@ import ( ) const ( - defaultPermission = 0444 - selfName = "self" - threadSelfName = "thread-self" + selfName = "self" + threadSelfName = "thread-self" ) // InoGenerator generates unique inode numbers for a given filesystem. @@ -61,15 +61,15 @@ var _ kernfs.Inode = (*tasksInode)(nil) func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace) (*tasksInode, *kernfs.Dentry) { root := auth.NewRootCredentials(pidns.UserNamespace()) contents := map[string]*kernfs.Dentry{ - //"cpuinfo": newCPUInfo(ctx, msrc), - //"filesystems": seqfile.NewSeqFileInode(ctx, &filesystemsData{}, msrc), - "loadavg": newDentry(root, inoGen.NextIno(), defaultPermission, &loadavgData{}), - "meminfo": newDentry(root, inoGen.NextIno(), defaultPermission, &meminfoData{k: k}), - "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), defaultPermission, "self/mounts"), - "stat": newDentry(root, inoGen.NextIno(), defaultPermission, &statData{k: k}), - //"uptime": newUptime(ctx, msrc), - //"version": newVersionData(root, inoGen.NextIno(), k), - "version": newDentry(root, inoGen.NextIno(), defaultPermission, &versionData{k: k}), + "cpuinfo": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(cpuInfoData(k))), + //"filesystems": newDentry(root, inoGen.NextIno(), 0444, &filesystemsData{}), + "loadavg": newDentry(root, inoGen.NextIno(), 0444, &loadavgData{}), + "sys": newSysDir(root, inoGen), + "meminfo": newDentry(root, inoGen.NextIno(), 0444, &meminfoData{}), + "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), 0777, "self/mounts"), + "stat": newDentry(root, inoGen.NextIno(), 0444, &statData{}), + "uptime": newDentry(root, inoGen.NextIno(), 0444, &uptimeData{}), + "version": newDentry(root, inoGen.NextIno(), 0444, &versionData{}), } inode := &tasksInode{ @@ -216,3 +216,20 @@ func (i *tasksInode) Stat(vsfs *vfs.Filesystem) linux.Statx { return stat } + +func cpuInfoData(k *kernel.Kernel) string { + features := k.FeatureSet() + if features == nil { + // Kernel is always initialized with a FeatureSet. + panic("cpuinfo read with nil FeatureSet") + } + var buf bytes.Buffer + for i, max := uint(0), k.ApplicationCores(); i < max; i++ { + features.WriteCPUInfoTo(i, &buf) + } + return buf.String() +} + +func shmData(v uint64) dynamicInode { + return newStaticFile(strconv.FormatUint(v, 10)) +} diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index 91f30a798..ad3760e39 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -15,6 +15,7 @@ package proc import ( + "bytes" "fmt" "strconv" @@ -23,6 +24,9 @@ import ( "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" + "gvisor.dev/gvisor/pkg/sentry/kernel/time" + "gvisor.dev/gvisor/pkg/sentry/usage" + "gvisor.dev/gvisor/pkg/sentry/usermem" "gvisor.dev/gvisor/pkg/syserror" ) @@ -90,3 +94,244 @@ func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) { } return fmt.Sprintf("%d/task/%d", tgid, tid), nil } + +// cpuStats contains the breakdown of CPU time for /proc/stat. +type cpuStats struct { + // user is time spent in userspace tasks with non-positive niceness. + user uint64 + + // nice is time spent in userspace tasks with positive niceness. + nice uint64 + + // system is time spent in non-interrupt kernel context. + system uint64 + + // idle is time spent idle. + idle uint64 + + // ioWait is time spent waiting for IO. + ioWait uint64 + + // irq is time spent in interrupt context. + irq uint64 + + // softirq is time spent in software interrupt context. + softirq uint64 + + // steal is involuntary wait time. + steal uint64 + + // guest is time spent in guests with non-positive niceness. + guest uint64 + + // guestNice is time spent in guests with positive niceness. + guestNice uint64 +} + +// String implements fmt.Stringer. +func (c cpuStats) String() string { + return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice) +} + +// statData implements vfs.DynamicBytesSource for /proc/stat. +// +// +stateify savable +type statData struct { + kernfs.DynamicBytesFile + + // k is the owning Kernel. + k *kernel.Kernel +} + +var _ dynamicInode = (*statData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (s *statData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // TODO(b/37226836): We currently export only zero CPU stats. We could + // at least provide some aggregate stats. + var cpu cpuStats + fmt.Fprintf(buf, "cpu %s\n", cpu) + + for c, max := uint(0), s.k.ApplicationCores(); c < max; c++ { + fmt.Fprintf(buf, "cpu%d %s\n", c, cpu) + } + + // The total number of interrupts is dependent on the CPUs and PCI + // devices on the system. See arch_probe_nr_irqs. + // + // Since we don't report real interrupt stats, just choose an arbitrary + // value from a representative VM. + const numInterrupts = 256 + + // The Kernel doesn't handle real interrupts, so report all zeroes. + // TODO(b/37226836): We could count page faults as #PF. + fmt.Fprintf(buf, "intr 0") // total + for i := 0; i < numInterrupts; i++ { + fmt.Fprintf(buf, " 0") + } + fmt.Fprintf(buf, "\n") + + // Total number of context switches. + // TODO(b/37226836): Count this. + fmt.Fprintf(buf, "ctxt 0\n") + + // CLOCK_REALTIME timestamp from boot, in seconds. + fmt.Fprintf(buf, "btime %d\n", s.k.Timekeeper().BootTime().Seconds()) + + // Total number of clones. + // TODO(b/37226836): Count this. + fmt.Fprintf(buf, "processes 0\n") + + // Number of runnable tasks. + // TODO(b/37226836): Count this. + fmt.Fprintf(buf, "procs_running 0\n") + + // Number of tasks waiting on IO. + // TODO(b/37226836): Count this. + fmt.Fprintf(buf, "procs_blocked 0\n") + + // Number of each softirq handled. + fmt.Fprintf(buf, "softirq 0") // total + for i := 0; i < linux.NumSoftIRQ; i++ { + fmt.Fprintf(buf, " 0") + } + fmt.Fprintf(buf, "\n") + return nil +} + +// loadavgData backs /proc/loadavg. +// +// +stateify savable +type loadavgData struct { + kernfs.DynamicBytesFile +} + +var _ dynamicInode = (*loadavgData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // TODO(b/62345059): Include real data in fields. + // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods. + // Column 4-5: currently running processes and the total number of processes. + // Column 6: the last process ID used. + fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0) + return nil +} + +// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo. +// +// +stateify savable +type meminfoData struct { + kernfs.DynamicBytesFile + + // k is the owning Kernel. + k *kernel.Kernel +} + +var _ dynamicInode = (*meminfoData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { + mf := d.k.MemoryFile() + mf.UpdateUsage() + snapshot, totalUsage := usage.MemoryAccounting.Copy() + totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage) + anon := snapshot.Anonymous + snapshot.Tmpfs + file := snapshot.PageCache + snapshot.Mapped + // We don't actually have active/inactive LRUs, so just make up numbers. + activeFile := (file / 2) &^ (usermem.PageSize - 1) + inactiveFile := file - activeFile + + fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024) + memFree := (totalSize - totalUsage) / 1024 + // We use MemFree as MemAvailable because we don't swap. + // TODO(rahat): When reclaim is implemented the value of MemAvailable + // should change. + fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree) + fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree) + fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices + fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) + // Emulate a system with no swap, which disables inactivation of anon pages. + fmt.Fprintf(buf, "SwapCache: 0 kB\n") + fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024) + fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024) + fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024) + fmt.Fprintf(buf, "Inactive(anon): 0 kB\n") + fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024) + fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024) + fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263) + fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263) + fmt.Fprintf(buf, "SwapTotal: 0 kB\n") + fmt.Fprintf(buf, "SwapFree: 0 kB\n") + fmt.Fprintf(buf, "Dirty: 0 kB\n") + fmt.Fprintf(buf, "Writeback: 0 kB\n") + fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024) + fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know + fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024) + return nil +} + +// uptimeData implements vfs.DynamicBytesSource for /proc/uptime. +// +// +stateify savable +type uptimeData struct { + kernfs.DynamicBytesFile +} + +var _ dynamicInode = (*uptimeData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error { + k := kernel.KernelFromContext(ctx) + now := time.NowFromContext(ctx) + + // Pretend that we've spent zero time sleeping (second number). + fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds()) + return nil +} + +// versionData implements vfs.DynamicBytesSource for /proc/version. +// +// +stateify savable +type versionData struct { + kernfs.DynamicBytesFile + + // k is the owning Kernel. + k *kernel.Kernel +} + +var _ dynamicInode = (*versionData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (v *versionData) Generate(ctx context.Context, buf *bytes.Buffer) error { + init := v.k.GlobalInit() + if init == nil { + // Attempted to read before the init Task is created. This can + // only occur during startup, which should never need to read + // this file. + panic("Attempted to read version before initial Task is available") + } + + // /proc/version takes the form: + // + // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST) + // (COMPILER_VERSION) VERSION" + // + // where: + // - SYSNAME, RELEASE, and VERSION are the same as returned by + // sys_utsname + // - COMPILE_USER is the user that build the kernel + // - COMPILE_HOST is the hostname of the machine on which the kernel + // was built + // - COMPILER_VERSION is the version reported by the building compiler + // + // Since we don't really want to expose build information to + // applications, those fields are omitted. + // + // FIXME(mpratt): Using Version from the init task SyscallTable + // disregards the different version a task may have (e.g., in a uts + // namespace). + ver := init.Leader().SyscallTable().Version + fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version) + return nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_net.go b/pkg/sentry/fsimpl/proc/tasks_net.go new file mode 100644 index 000000000..06dc43c26 --- /dev/null +++ b/pkg/sentry/fsimpl/proc/tasks_net.go @@ -0,0 +1,337 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/binary" + "gvisor.dev/gvisor/pkg/log" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fs" + "gvisor.dev/gvisor/pkg/sentry/inet" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/socket" + "gvisor.dev/gvisor/pkg/sentry/socket/unix" + "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" + "gvisor.dev/gvisor/pkg/sentry/vfs" +) + +// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6. +// +// +stateify savable +type ifinet6 struct { + s inet.Stack +} + +var _ vfs.DynamicBytesSource = (*ifinet6)(nil) + +func (n *ifinet6) contents() []string { + var lines []string + nics := n.s.Interfaces() + for id, naddrs := range n.s.InterfaceAddrs() { + nic, ok := nics[id] + if !ok { + // NIC was added after NICNames was called. We'll just ignore it. + continue + } + + for _, a := range naddrs { + // IPv6 only. + if a.Family != linux.AF_INET6 { + continue + } + + // Fields: + // IPv6 address displayed in 32 hexadecimal chars without colons + // Netlink device number (interface index) in hexadecimal (use nic id) + // Prefix length in hexadecimal + // Scope value (use 0) + // Interface flags + // Device name + lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name)) + } + } + return lines +} + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error { + for _, l := range n.contents() { + buf.WriteString(l) + } + return nil +} + +// netDev implements vfs.DynamicBytesSource for /proc/net/dev. +// +// +stateify savable +type netDev struct { + s inet.Stack +} + +var _ vfs.DynamicBytesSource = (*netDev)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *netDev) Generate(ctx context.Context, buf *bytes.Buffer) error { + interfaces := n.s.Interfaces() + buf.WriteString("Inter-| Receive | Transmit\n") + buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n") + + for _, i := range interfaces { + // Implements the same format as + // net/core/net-procfs.c:dev_seq_printf_stats. + var stats inet.StatDev + if err := n.s.Statistics(&stats, i.Name); err != nil { + log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err) + continue + } + fmt.Fprintf( + buf, + "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n", + i.Name, + // Received + stats[0], // bytes + stats[1], // packets + stats[2], // errors + stats[3], // dropped + stats[4], // fifo + stats[5], // frame + stats[6], // compressed + stats[7], // multicast + // Transmitted + stats[8], // bytes + stats[9], // packets + stats[10], // errors + stats[11], // dropped + stats[12], // fifo + stats[13], // frame + stats[14], // compressed + stats[15], // multicast + ) + } + + return nil +} + +// netUnix implements vfs.DynamicBytesSource for /proc/net/unix. +// +// +stateify savable +type netUnix struct { + k *kernel.Kernel +} + +var _ vfs.DynamicBytesSource = (*netUnix)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (n *netUnix) Generate(ctx context.Context, buf *bytes.Buffer) error { + buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n") + for _, se := range n.k.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock) + continue + } + sfile := s.(*fs.File) + if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX { + s.DecRef() + // Not a unix socket. + continue + } + sops := sfile.FileOperations.(*unix.SocketOperations) + + addr, err := sops.Endpoint().GetLocalAddress() + if err != nil { + log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err) + addr.Addr = "" + } + + sockFlags := 0 + if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { + if ce.Listening() { + // For unix domain sockets, linux reports a single flag + // value if the socket is listening, of __SO_ACCEPTCON. + sockFlags = linux.SO_ACCEPTCON + } + } + + // In the socket entry below, the value for the 'Num' field requires + // some consideration. Linux prints the address to the struct + // unix_sock representing a socket in the kernel, but may redact the + // value for unprivileged users depending on the kptr_restrict + // sysctl. + // + // One use for this field is to allow a privileged user to + // introspect into the kernel memory to determine information about + // a socket not available through procfs, such as the socket's peer. + // + // In gvisor, returning a pointer to our internal structures would + // be pointless, as it wouldn't match the memory layout for struct + // unix_sock, making introspection difficult. We could populate a + // struct unix_sock with the appropriate data, but even that + // requires consideration for which kernel version to emulate, as + // the definition of this struct changes over time. + // + // For now, we always redact this pointer. + fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d", + (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. + sfile.ReadRefs()-1, // RefCount, don't count our own ref. + 0, // Protocol, always 0 for UDS. + sockFlags, // Flags. + sops.Endpoint().Type(), // Type. + sops.State(), // State. + sfile.InodeID(), // Inode. + ) + + // Path + if len(addr.Addr) != 0 { + if addr.Addr[0] == 0 { + // Abstract path. + fmt.Fprintf(buf, " @%s", string(addr.Addr[1:])) + } else { + fmt.Fprintf(buf, " %s", string(addr.Addr)) + } + } + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + return nil +} + +// netTCP implements vfs.DynamicBytesSource for /proc/net/tcp. +// +// +stateify savable +type netTCP struct { + k *kernel.Kernel +} + +var _ vfs.DynamicBytesSource = (*netTCP)(nil) + +func (n *netTCP) Generate(ctx context.Context, buf *bytes.Buffer) error { + t := kernel.TaskFromContext(ctx) + buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") + for _, se := range n.k.ListSockets() { + s := se.Sock.Get() + if s == nil { + log.Debugf("Couldn't resolve weakref %+v in socket table, racing with destruction?", se.Sock) + continue + } + sfile := s.(*fs.File) + sops, ok := sfile.FileOperations.(socket.Socket) + if !ok { + panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile)) + } + if family, stype, _ := sops.Type(); !(family == linux.AF_INET && stype == linux.SOCK_STREAM) { + s.DecRef() + // Not tcp4 sockets. + continue + } + + // Linux's documentation for the fields below can be found at + // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. + // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). + // Note that the header doesn't contain labels for all the fields. + + // Field: sl; entry number. + fmt.Fprintf(buf, "%4d: ", se.ID) + + portBuf := make([]byte, 2) + + // Field: local_adddress. + var localAddr linux.SockAddrInet + if local, _, err := sops.GetSockName(t); err == nil { + localAddr = *local.(*linux.SockAddrInet) + } + binary.LittleEndian.PutUint16(portBuf, localAddr.Port) + fmt.Fprintf(buf, "%08X:%04X ", + binary.LittleEndian.Uint32(localAddr.Addr[:]), + portBuf) + + // Field: rem_address. + var remoteAddr linux.SockAddrInet + if remote, _, err := sops.GetPeerName(t); err == nil { + remoteAddr = *remote.(*linux.SockAddrInet) + } + binary.LittleEndian.PutUint16(portBuf, remoteAddr.Port) + fmt.Fprintf(buf, "%08X:%04X ", + binary.LittleEndian.Uint32(remoteAddr.Addr[:]), + portBuf) + + // Field: state; socket state. + fmt.Fprintf(buf, "%02X ", sops.State()) + + // Field: tx_queue, rx_queue; number of packets in the transmit and + // receive queue. Unimplemented. + fmt.Fprintf(buf, "%08X:%08X ", 0, 0) + + // Field: tr, tm->when; timer active state and number of jiffies + // until timer expires. Unimplemented. + fmt.Fprintf(buf, "%02X:%08X ", 0, 0) + + // Field: retrnsmt; number of unrecovered RTO timeouts. + // Unimplemented. + fmt.Fprintf(buf, "%08X ", 0) + + // Field: uid. + uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx) + if err != nil { + log.Warningf("Failed to retrieve unstable attr for socket file: %v", err) + fmt.Fprintf(buf, "%5d ", 0) + } else { + fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(t.UserNamespace()).OrOverflow())) + } + + // Field: timeout; number of unanswered 0-window probes. + // Unimplemented. + fmt.Fprintf(buf, "%8d ", 0) + + // Field: inode. + fmt.Fprintf(buf, "%8d ", sfile.InodeID()) + + // Field: refcount. Don't count the ref we obtain while deferencing + // the weakref to this socket. + fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1) + + // Field: Socket struct address. Redacted due to the same reason as + // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. + fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) + + // Field: retransmit timeout. Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: predicted tick of soft clock (delayed ACK control data). + // Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: sending congestion window, Unimplemented. + fmt.Fprintf(buf, "%d ", 0) + + // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. + // Unimplemented, report as large threshold. + fmt.Fprintf(buf, "%d", -1) + + fmt.Fprintf(buf, "\n") + + s.DecRef() + } + + return nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go new file mode 100644 index 000000000..aabf2bf0c --- /dev/null +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -0,0 +1,143 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "fmt" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context" + "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" + "gvisor.dev/gvisor/pkg/sentry/kernel" + "gvisor.dev/gvisor/pkg/sentry/kernel/auth" +) + +// newSysDir returns the dentry corresponding to /proc/sys directory. +func newSysDir(root *auth.Credentials, inoGen InoGenerator) *kernfs.Dentry { + return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "kernel": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "hostname": newDentry(root, inoGen.NextIno(), 0444, &hostnameData{}), + "shmall": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMALL)), + "shmmax": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMAX)), + "shmmni": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMNI)), + }), + "vm": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{}), + "overcommit_memory": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0\n")), + }), + "net": newSysNetDir(root, inoGen), + }) +} + +// newSysNetDir returns the dentry corresponding to /proc/sys/net directory. +func newSysNetDir(root *auth.Credentials, inoGen InoGenerator) *kernfs.Dentry { + return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "net": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + // Add tcp_sack. + // TODO(gvisor.dev/issue/1195): tcp_sack allows write(2) + // "tcp_sack": newTCPSackInode(ctx, msrc, s), + + // The following files are simple stubs until they are implemented in + // netstack, most of these files are configuration related. We use the + // value closest to the actual netstack behavior or any empty file, all + // of these files will have mode 0444 (read-only for all users). + "ip_local_port_range": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("16000 65535")), + "ip_local_reserved_ports": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), + "ipfrag_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("30")), + "ip_nonlocal_bind": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "ip_no_pmtu_disc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + + // tcp_allowed_congestion_control tell the user what they are able to + // do as an unprivledged process so we leave it empty. + "tcp_allowed_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), + "tcp_available_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")), + "tcp_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")), + + // Many of the following stub files are features netstack doesn't + // support. The unsupported features return "0" to indicate they are + // disabled. + "tcp_base_mss": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1280")), + "tcp_dsack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_early_retrans": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_fack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_fastopen": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_fastopen_key": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), + "tcp_invalid_ratelimit": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_keepalive_intvl": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_keepalive_probes": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_keepalive_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("7200")), + "tcp_mtu_probing": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_no_metrics_save": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + "tcp_probe_interval": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_probe_threshold": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "tcp_retries1": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")), + "tcp_retries2": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("15")), + "tcp_rfc1337": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + "tcp_slow_start_after_idle": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + "tcp_synack_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")), + "tcp_syn_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")), + "tcp_timestamps": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), + }), + "core": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ + "default_qdisc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("pfifo_fast")), + "message_burst": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("10")), + "message_cost": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")), + "optmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), + "rmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), + "rmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), + "somaxconn": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("128")), + "wmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), + "wmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), + }), + }), + }) +} + +// mmapMinAddrData implements vfs.DynamicBytesSource for +// /proc/sys/vm/mmap_min_addr. +// +// +stateify savable +type mmapMinAddrData struct { + kernfs.DynamicBytesFile + + k *kernel.Kernel +} + +var _ dynamicInode = (*mmapMinAddrData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error { + fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress()) + return nil +} + +// hostnameData implements vfs.DynamicBytesSource for /proc/sys/kernel/hostname. +// +// +stateify savable +type hostnameData struct { + kernfs.DynamicBytesFile +} + +var _ dynamicInode = (*hostnameData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (*hostnameData) Generate(ctx context.Context, buf *bytes.Buffer) error { + utsns := kernel.UTSNamespaceFromContext(ctx) + buf.WriteString(utsns.HostName()) + buf.WriteString("\n") + return nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks_sys_test.go b/pkg/sentry/fsimpl/proc/tasks_sys_test.go new file mode 100644 index 000000000..20a77a8ca --- /dev/null +++ b/pkg/sentry/fsimpl/proc/tasks_sys_test.go @@ -0,0 +1,78 @@ +// Copyright 2019 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package proc + +import ( + "bytes" + "reflect" + "testing" + + "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/sentry/context/contexttest" + "gvisor.dev/gvisor/pkg/sentry/inet" +) + +func newIPv6TestStack() *inet.TestStack { + s := inet.NewTestStack() + s.SupportsIPv6Flag = true + return s +} + +func TestIfinet6NoAddresses(t *testing.T) { + n := &ifinet6{s: newIPv6TestStack()} + var buf bytes.Buffer + n.Generate(contexttest.Context(t), &buf) + if buf.Len() > 0 { + t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{}) + } +} + +func TestIfinet6(t *testing.T) { + s := newIPv6TestStack() + s.InterfacesMap[1] = inet.Interface{Name: "eth0"} + s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ + { + Family: linux.AF_INET6, + PrefixLen: 128, + Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), + }, + } + s.InterfacesMap[2] = inet.Interface{Name: "eth1"} + s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ + { + Family: linux.AF_INET6, + PrefixLen: 128, + Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), + }, + } + want := map[string]struct{}{ + "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, + "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, + } + + n := &ifinet6{s: s} + contents := n.contents() + if len(contents) != len(want) { + t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) + } + got := map[string]struct{}{} + for _, l := range contents { + got[l] = struct{}{} + } + + if !reflect.DeepEqual(got, want) { + t.Errorf("Got n.contents() = %v, want = %v", got, want) + } +} diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index ca8c87ec2..76eafe593 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -69,12 +69,15 @@ func checkDots(dirs []vfs.Dirent) ([]vfs.Dirent, error) { func checkTasksStaticFiles(gots []vfs.Dirent) ([]vfs.Dirent, error) { wants := map[string]vfs.Dirent{ + "cpuinfo": {Type: linux.DT_REG}, "loadavg": {Type: linux.DT_REG}, "meminfo": {Type: linux.DT_REG}, "mounts": {Type: linux.DT_LNK}, "self": selfLink, "stat": {Type: linux.DT_REG}, + "sys": {Type: linux.DT_DIR}, "thread-self": threadSelfLink, + "uptime": {Type: linux.DT_REG}, "version": {Type: linux.DT_REG}, } return checkFiles(gots, wants) diff --git a/pkg/sentry/fsimpl/proc/version.go b/pkg/sentry/fsimpl/proc/version.go deleted file mode 100644 index 367f2396b..000000000 --- a/pkg/sentry/fsimpl/proc/version.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/sentry/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" -) - -// versionData implements vfs.DynamicBytesSource for /proc/version. -// -// +stateify savable -type versionData struct { - kernfs.DynamicBytesFile - - // k is the owning Kernel. - k *kernel.Kernel -} - -var _ dynamicInode = (*versionData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (v *versionData) Generate(ctx context.Context, buf *bytes.Buffer) error { - init := v.k.GlobalInit() - if init == nil { - // Attempted to read before the init Task is created. This can - // only occur during startup, which should never need to read - // this file. - panic("Attempted to read version before initial Task is available") - } - - // /proc/version takes the form: - // - // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST) - // (COMPILER_VERSION) VERSION" - // - // where: - // - SYSNAME, RELEASE, and VERSION are the same as returned by - // sys_utsname - // - COMPILE_USER is the user that build the kernel - // - COMPILE_HOST is the hostname of the machine on which the kernel - // was built - // - COMPILER_VERSION is the version reported by the building compiler - // - // Since we don't really want to expose build information to - // applications, those fields are omitted. - // - // FIXME(mpratt): Using Version from the init task SyscallTable - // disregards the different version a task may have (e.g., in a uts - // namespace). - ver := init.Leader().SyscallTable().Version - fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version) - return nil -} -- cgit v1.2.3 From 9cbf5a3dcc71d85cd857f117d4b7189a101be9c1 Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Fri, 25 Oct 2019 03:09:59 +0000 Subject: Enable pkg/cpuid support on arm64. Fixes #1255 Signed-off-by: Haibo Xu Change-Id: I8614e6f3ee321c2989567e4e712aa8f28cc9db14 --- pkg/cpuid/BUILD | 9 +- pkg/cpuid/cpuid.go | 1098 +----------------------------------- pkg/cpuid/cpuid_arm64.go | 461 ++++++++++++++++ pkg/cpuid/cpuid_arm64_test.go | 55 ++ pkg/cpuid/cpuid_parse_test.go | 142 ----- pkg/cpuid/cpuid_parse_x86_test.go | 144 +++++ pkg/cpuid/cpuid_test.go | 241 -------- pkg/cpuid/cpuid_x86.go | 1100 +++++++++++++++++++++++++++++++++++++ pkg/cpuid/cpuid_x86_test.go | 243 ++++++++ 9 files changed, 2018 insertions(+), 1475 deletions(-) create mode 100644 pkg/cpuid/cpuid_arm64.go create mode 100644 pkg/cpuid/cpuid_arm64_test.go delete mode 100644 pkg/cpuid/cpuid_parse_test.go create mode 100644 pkg/cpuid/cpuid_parse_x86_test.go delete mode 100644 pkg/cpuid/cpuid_test.go create mode 100644 pkg/cpuid/cpuid_x86.go create mode 100644 pkg/cpuid/cpuid_x86_test.go (limited to 'pkg/cpuid/cpuid.go') diff --git a/pkg/cpuid/BUILD b/pkg/cpuid/BUILD index 43a432190..d6cb1a549 100644 --- a/pkg/cpuid/BUILD +++ b/pkg/cpuid/BUILD @@ -7,6 +7,8 @@ go_library( srcs = [ "cpu_amd64.s", "cpuid.go", + "cpuid_arm64.go", + "cpuid_x86.go", ], visibility = ["//:sandbox"], deps = ["//pkg/log"], @@ -15,7 +17,10 @@ go_library( go_test( name = "cpuid_test", size = "small", - srcs = ["cpuid_test.go"], + srcs = [ + "cpuid_arm64_test.go", + "cpuid_x86_test.go", + ], library = ":cpuid", ) @@ -23,7 +28,7 @@ go_test( name = "cpuid_parse_test", size = "small", srcs = [ - "cpuid_parse_test.go", + "cpuid_parse_x86_test.go", ], library = ":cpuid", tags = ["manual"], diff --git a/pkg/cpuid/cpuid.go b/pkg/cpuid/cpuid.go index cf50ee53f..f7f9dbf86 100644 --- a/pkg/cpuid/cpuid.go +++ b/pkg/cpuid/cpuid.go @@ -1,4 +1,4 @@ -// Copyright 2018 The gVisor Authors. +// Copyright 2019 The gVisor Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build i386 amd64 - // Package cpuid provides basic functionality for creating and adjusting CPU // feature sets. // @@ -21,1100 +19,20 @@ // known platform, or HostFeatureSet()) and then add, remove, and test for // features as desired. // -// For example: Test for hardware extended state saving, and if we don't have -// it, don't expose AVX, which cannot be saved with fxsave. +// For example: on x86, test for hardware extended state saving, and if +// we don't have it, don't expose AVX, which cannot be saved with fxsave. // // if !HostFeatureSet().HasFeature(X86FeatureXSAVE) { // exposedFeatures.Remove(X86FeatureAVX) // } package cpuid -import ( - "bytes" - "fmt" - "io/ioutil" - "strconv" - "strings" - - "gvisor.dev/gvisor/pkg/log" -) - -// Common references for CPUID leaves and bits: -// -// Intel: -// * Intel SDM Volume 2, Chapter 3.2 "CPUID" (more up-to-date) -// * Intel Application Note 485 (more detailed) -// -// AMD: -// * AMD64 APM Volume 3, Appendix 3 "Obtaining Processor Information ..." - // Feature is a unique identifier for a particular cpu feature. We just use an -// int as a feature number on x86. +// int as a feature number on x86 and arm64. // -// Features are numbered according to "blocks". Each block is 32 bits, and -// feature bits from the same source (cpuid leaf/level) are in the same block. -type Feature int - -// block is a collection of 32 Feature bits. -type block int - -const blockSize = 32 - -// Feature bits are numbered according to "blocks". Each block is 32 bits, and +// On x86, features are numbered according to "blocks". Each block is 32 bits, and // feature bits from the same source (cpuid leaf/level) are in the same block. -func featureID(b block, bit int) Feature { - return Feature(32*int(b) + bit) -} - -// Block 0 constants are all of the "basic" feature bits returned by a cpuid in -// ecx with eax=1. -const ( - X86FeatureSSE3 Feature = iota - X86FeaturePCLMULDQ - X86FeatureDTES64 - X86FeatureMONITOR - X86FeatureDSCPL - X86FeatureVMX - X86FeatureSMX - X86FeatureEST - X86FeatureTM2 - X86FeatureSSSE3 // Not a typo, "supplemental" SSE3. - X86FeatureCNXTID - X86FeatureSDBG - X86FeatureFMA - X86FeatureCX16 - X86FeatureXTPR - X86FeaturePDCM - _ // ecx bit 16 is reserved. - X86FeaturePCID - X86FeatureDCA - X86FeatureSSE4_1 - X86FeatureSSE4_2 - X86FeatureX2APIC - X86FeatureMOVBE - X86FeaturePOPCNT - X86FeatureTSCD - X86FeatureAES - X86FeatureXSAVE - X86FeatureOSXSAVE - X86FeatureAVX - X86FeatureF16C - X86FeatureRDRAND - _ // ecx bit 31 is reserved. -) - -// Block 1 constants are all of the "basic" feature bits returned by a cpuid in -// edx with eax=1. -const ( - X86FeatureFPU Feature = 32 + iota - X86FeatureVME - X86FeatureDE - X86FeaturePSE - X86FeatureTSC - X86FeatureMSR - X86FeaturePAE - X86FeatureMCE - X86FeatureCX8 - X86FeatureAPIC - _ // edx bit 10 is reserved. - X86FeatureSEP - X86FeatureMTRR - X86FeaturePGE - X86FeatureMCA - X86FeatureCMOV - X86FeaturePAT - X86FeaturePSE36 - X86FeaturePSN - X86FeatureCLFSH - _ // edx bit 20 is reserved. - X86FeatureDS - X86FeatureACPI - X86FeatureMMX - X86FeatureFXSR - X86FeatureSSE - X86FeatureSSE2 - X86FeatureSS - X86FeatureHTT - X86FeatureTM - X86FeatureIA64 - X86FeaturePBE -) - -// Block 2 bits are the "structured extended" features returned in ebx for -// eax=7, ecx=0. -const ( - X86FeatureFSGSBase Feature = 2*32 + iota - X86FeatureTSC_ADJUST - _ // ebx bit 2 is reserved. - X86FeatureBMI1 - X86FeatureHLE - X86FeatureAVX2 - X86FeatureFDP_EXCPTN_ONLY - X86FeatureSMEP - X86FeatureBMI2 - X86FeatureERMS - X86FeatureINVPCID - X86FeatureRTM - X86FeatureCQM - X86FeatureFPCSDS - X86FeatureMPX - X86FeatureRDT - X86FeatureAVX512F - X86FeatureAVX512DQ - X86FeatureRDSEED - X86FeatureADX - X86FeatureSMAP - X86FeatureAVX512IFMA - X86FeaturePCOMMIT - X86FeatureCLFLUSHOPT - X86FeatureCLWB - X86FeatureIPT // Intel processor trace. - X86FeatureAVX512PF - X86FeatureAVX512ER - X86FeatureAVX512CD - X86FeatureSHA - X86FeatureAVX512BW - X86FeatureAVX512VL -) - -// Block 3 bits are the "extended" features returned in ecx for eax=7, ecx=0. -const ( - X86FeaturePREFETCHWT1 Feature = 3*32 + iota - X86FeatureAVX512VBMI - X86FeatureUMIP - X86FeaturePKU - X86FeatureOSPKE - X86FeatureWAITPKG - X86FeatureAVX512_VBMI2 - _ // ecx bit 7 is reserved - X86FeatureGFNI - X86FeatureVAES - X86FeatureVPCLMULQDQ - X86FeatureAVX512_VNNI - X86FeatureAVX512_BITALG - X86FeatureTME - X86FeatureAVX512_VPOPCNTDQ - _ // ecx bit 15 is reserved - X86FeatureLA57 - // ecx bits 17-21 are reserved - _ - _ - _ - _ - _ - X86FeatureRDPID - // ecx bits 23-24 are reserved - _ - _ - X86FeatureCLDEMOTE - _ // ecx bit 26 is reserved - X86FeatureMOVDIRI - X86FeatureMOVDIR64B -) - -// Block 4 constants are for xsave capabilities in CPUID.(EAX=0DH,ECX=01H):EAX. -// The CPUID leaf is available only if 'X86FeatureXSAVE' is present. -const ( - X86FeatureXSAVEOPT Feature = 4*32 + iota - X86FeatureXSAVEC - X86FeatureXGETBV1 - X86FeatureXSAVES - // EAX[31:4] are reserved. -) - -// Block 5 constants are the extended feature bits in -// CPUID.(EAX=0x80000001):ECX. -const ( - X86FeatureLAHF64 Feature = 5*32 + iota - X86FeatureCMP_LEGACY - X86FeatureSVM - X86FeatureEXTAPIC - X86FeatureCR8_LEGACY - X86FeatureLZCNT - X86FeatureSSE4A - X86FeatureMISALIGNSSE - X86FeaturePREFETCHW - X86FeatureOSVW - X86FeatureIBS - X86FeatureXOP - X86FeatureSKINIT - X86FeatureWDT - _ // ecx bit 14 is reserved. - X86FeatureLWP - X86FeatureFMA4 - X86FeatureTCE - _ // ecx bit 18 is reserved. - _ // ecx bit 19 is reserved. - _ // ecx bit 20 is reserved. - X86FeatureTBM - X86FeatureTOPOLOGY - X86FeaturePERFCTR_CORE - X86FeaturePERFCTR_NB - _ // ecx bit 25 is reserved. - X86FeatureBPEXT - X86FeaturePERFCTR_TSC - X86FeaturePERFCTR_LLC - X86FeatureMWAITX - // ECX[31:30] are reserved. -) - -// Block 6 constants are the extended feature bits in -// CPUID.(EAX=0x80000001):EDX. // -// These are sparse, and so the bit positions are assigned manually. -const ( - // On AMD, EDX[24:23] | EDX[17:12] | EDX[9:0] are duplicate features - // also defined in block 1 (in identical bit positions). Those features - // are not listed here. - block6DuplicateMask = 0x183f3ff - - X86FeatureSYSCALL Feature = 6*32 + 11 - X86FeatureNX Feature = 6*32 + 20 - X86FeatureMMXEXT Feature = 6*32 + 22 - X86FeatureFXSR_OPT Feature = 6*32 + 25 - X86FeatureGBPAGES Feature = 6*32 + 26 - X86FeatureRDTSCP Feature = 6*32 + 27 - X86FeatureLM Feature = 6*32 + 29 - X86Feature3DNOWEXT Feature = 6*32 + 30 - X86Feature3DNOW Feature = 6*32 + 31 -) - -// linuxBlockOrder defines the order in which linux organizes the feature -// blocks. Linux also tracks feature bits in 32-bit blocks, but in an order -// which doesn't match well here, so for the /proc/cpuinfo generation we simply -// re-map the blocks to Linux's ordering and then go through the bits in each -// block. -var linuxBlockOrder = []block{1, 6, 0, 5, 2, 4, 3} - -// To make emulation of /proc/cpuinfo easy, these names match the names of the -// basic features in Linux defined in arch/x86/kernel/cpu/capflags.c. -var x86FeatureStrings = map[Feature]string{ - // Block 0. - X86FeatureSSE3: "pni", - X86FeaturePCLMULDQ: "pclmulqdq", - X86FeatureDTES64: "dtes64", - X86FeatureMONITOR: "monitor", - X86FeatureDSCPL: "ds_cpl", - X86FeatureVMX: "vmx", - X86FeatureSMX: "smx", - X86FeatureEST: "est", - X86FeatureTM2: "tm2", - X86FeatureSSSE3: "ssse3", - X86FeatureCNXTID: "cid", - X86FeatureSDBG: "sdbg", - X86FeatureFMA: "fma", - X86FeatureCX16: "cx16", - X86FeatureXTPR: "xtpr", - X86FeaturePDCM: "pdcm", - X86FeaturePCID: "pcid", - X86FeatureDCA: "dca", - X86FeatureSSE4_1: "sse4_1", - X86FeatureSSE4_2: "sse4_2", - X86FeatureX2APIC: "x2apic", - X86FeatureMOVBE: "movbe", - X86FeaturePOPCNT: "popcnt", - X86FeatureTSCD: "tsc_deadline_timer", - X86FeatureAES: "aes", - X86FeatureXSAVE: "xsave", - X86FeatureAVX: "avx", - X86FeatureF16C: "f16c", - X86FeatureRDRAND: "rdrand", - - // Block 1. - X86FeatureFPU: "fpu", - X86FeatureVME: "vme", - X86FeatureDE: "de", - X86FeaturePSE: "pse", - X86FeatureTSC: "tsc", - X86FeatureMSR: "msr", - X86FeaturePAE: "pae", - X86FeatureMCE: "mce", - X86FeatureCX8: "cx8", - X86FeatureAPIC: "apic", - X86FeatureSEP: "sep", - X86FeatureMTRR: "mtrr", - X86FeaturePGE: "pge", - X86FeatureMCA: "mca", - X86FeatureCMOV: "cmov", - X86FeaturePAT: "pat", - X86FeaturePSE36: "pse36", - X86FeaturePSN: "pn", - X86FeatureCLFSH: "clflush", - X86FeatureDS: "dts", - X86FeatureACPI: "acpi", - X86FeatureMMX: "mmx", - X86FeatureFXSR: "fxsr", - X86FeatureSSE: "sse", - X86FeatureSSE2: "sse2", - X86FeatureSS: "ss", - X86FeatureHTT: "ht", - X86FeatureTM: "tm", - X86FeatureIA64: "ia64", - X86FeaturePBE: "pbe", - - // Block 2. - X86FeatureFSGSBase: "fsgsbase", - X86FeatureTSC_ADJUST: "tsc_adjust", - X86FeatureBMI1: "bmi1", - X86FeatureHLE: "hle", - X86FeatureAVX2: "avx2", - X86FeatureSMEP: "smep", - X86FeatureBMI2: "bmi2", - X86FeatureERMS: "erms", - X86FeatureINVPCID: "invpcid", - X86FeatureRTM: "rtm", - X86FeatureCQM: "cqm", - X86FeatureMPX: "mpx", - X86FeatureRDT: "rdt_a", - X86FeatureAVX512F: "avx512f", - X86FeatureAVX512DQ: "avx512dq", - X86FeatureRDSEED: "rdseed", - X86FeatureADX: "adx", - X86FeatureSMAP: "smap", - X86FeatureCLWB: "clwb", - X86FeatureAVX512PF: "avx512pf", - X86FeatureAVX512ER: "avx512er", - X86FeatureAVX512CD: "avx512cd", - X86FeatureSHA: "sha_ni", - X86FeatureAVX512BW: "avx512bw", - X86FeatureAVX512VL: "avx512vl", - - // Block 3. - X86FeatureAVX512VBMI: "avx512vbmi", - X86FeatureUMIP: "umip", - X86FeaturePKU: "pku", - X86FeatureOSPKE: "ospke", - X86FeatureWAITPKG: "waitpkg", - X86FeatureAVX512_VBMI2: "avx512_vbmi2", - X86FeatureGFNI: "gfni", - X86FeatureVAES: "vaes", - X86FeatureVPCLMULQDQ: "vpclmulqdq", - X86FeatureAVX512_VNNI: "avx512_vnni", - X86FeatureAVX512_BITALG: "avx512_bitalg", - X86FeatureTME: "tme", - X86FeatureAVX512_VPOPCNTDQ: "avx512_vpopcntdq", - X86FeatureLA57: "la57", - X86FeatureRDPID: "rdpid", - X86FeatureCLDEMOTE: "cldemote", - X86FeatureMOVDIRI: "movdiri", - X86FeatureMOVDIR64B: "movdir64b", - - // Block 4. - X86FeatureXSAVEOPT: "xsaveopt", - X86FeatureXSAVEC: "xsavec", - X86FeatureXGETBV1: "xgetbv1", - X86FeatureXSAVES: "xsaves", - - // Block 5. - X86FeatureLAHF64: "lahf_lm", // LAHF/SAHF in long mode - X86FeatureCMP_LEGACY: "cmp_legacy", - X86FeatureSVM: "svm", - X86FeatureEXTAPIC: "extapic", - X86FeatureCR8_LEGACY: "cr8_legacy", - X86FeatureLZCNT: "abm", // Advanced bit manipulation - X86FeatureSSE4A: "sse4a", - X86FeatureMISALIGNSSE: "misalignsse", - X86FeaturePREFETCHW: "3dnowprefetch", - X86FeatureOSVW: "osvw", - X86FeatureIBS: "ibs", - X86FeatureXOP: "xop", - X86FeatureSKINIT: "skinit", - X86FeatureWDT: "wdt", - X86FeatureLWP: "lwp", - X86FeatureFMA4: "fma4", - X86FeatureTCE: "tce", - X86FeatureTBM: "tbm", - X86FeatureTOPOLOGY: "topoext", - X86FeaturePERFCTR_CORE: "perfctr_core", - X86FeaturePERFCTR_NB: "perfctr_nb", - X86FeatureBPEXT: "bpext", - X86FeaturePERFCTR_TSC: "ptsc", - X86FeaturePERFCTR_LLC: "perfctr_llc", - X86FeatureMWAITX: "mwaitx", - - // Block 6. - X86FeatureSYSCALL: "syscall", - X86FeatureNX: "nx", - X86FeatureMMXEXT: "mmxext", - X86FeatureFXSR_OPT: "fxsr_opt", - X86FeatureGBPAGES: "pdpe1gb", - X86FeatureRDTSCP: "rdtscp", - X86FeatureLM: "lm", - X86Feature3DNOWEXT: "3dnowext", - X86Feature3DNOW: "3dnow", -} - -// These flags are parse only---they can be used for setting / unsetting the -// flags, but will not get printed out in /proc/cpuinfo. -var x86FeatureParseOnlyStrings = map[Feature]string{ - // Block 0. - X86FeatureOSXSAVE: "osxsave", - - // Block 2. - X86FeatureFDP_EXCPTN_ONLY: "fdp_excptn_only", - X86FeatureFPCSDS: "fpcsds", - X86FeatureIPT: "pt", - X86FeatureCLFLUSHOPT: "clfushopt", - - // Block 3. - X86FeaturePREFETCHWT1: "prefetchwt1", -} - -// intelCacheDescriptors describe the caches and TLBs on the system. They are -// returned in the registers for eax=2. Intel only. -type intelCacheDescriptor uint8 - -// Valid cache/TLB descriptors. All descriptors can be found in Intel SDM Vol. -// 2, Ch. 3.2, "CPUID", Table 3-12 "Encoding of CPUID Leaf 2 Descriptors". -const ( - intelNullDescriptor intelCacheDescriptor = 0 - intelNoTLBDescriptor intelCacheDescriptor = 0xfe - intelNoCacheDescriptor intelCacheDescriptor = 0xff - - // Most descriptors omitted for brevity as they are currently unused. -) - -// CacheType describes the type of a cache, as returned in eax[4:0] for eax=4. -type CacheType uint8 - -const ( - // cacheNull indicates that there are no more entries. - cacheNull CacheType = iota - - // CacheData is a data cache. - CacheData - - // CacheInstruction is an instruction cache. - CacheInstruction - - // CacheUnified is a unified instruction and data cache. - CacheUnified -) - -// Cache describes the parameters of a single cache on the system. -// -// +stateify savable -type Cache struct { - // Level is the hierarchical level of this cache (L1, L2, etc). - Level uint32 - - // Type is the type of cache. - Type CacheType - - // FullyAssociative indicates that entries may be placed in any block. - FullyAssociative bool - - // Partitions is the number of physical partitions in the cache. - Partitions uint32 - - // Ways is the number of ways of associativity in the cache. - Ways uint32 - - // Sets is the number of sets in the cache. - Sets uint32 - - // InvalidateHierarchical indicates that WBINVD/INVD from threads - // sharing this cache acts upon lower level caches for threads sharing - // this cache. - InvalidateHierarchical bool - - // Inclusive indicates that this cache is inclusive of lower cache - // levels. - Inclusive bool - - // DirectMapped indicates that this cache is directly mapped from - // address, rather than using a hash function. - DirectMapped bool -} - -// Just a way to wrap cpuid function numbers. -type cpuidFunction uint32 - -// The constants below are the lower or "standard" cpuid functions, ordered as -// defined by the hardware. -const ( - vendorID cpuidFunction = iota // Returns vendor ID and largest standard function. - featureInfo // Returns basic feature bits and processor signature. - intelCacheDescriptors // Returns list of cache descriptors. Intel only. - intelSerialNumber // Returns processor serial number (obsolete on new hardware). Intel only. - intelDeterministicCacheParams // Returns deterministic cache information. Intel only. - monitorMwaitParams // Returns information about monitor/mwait instructions. - powerParams // Returns information about power management and thermal sensors. - extendedFeatureInfo // Returns extended feature bits. - _ // Function 0x8 is reserved. - intelDCAParams // Returns direct cache access information. Intel only. - intelPMCInfo // Returns information about performance monitoring features. Intel only. - intelX2APICInfo // Returns core/logical processor topology. Intel only. - _ // Function 0xc is reserved. - xSaveInfo // Returns information about extended state management. -) - -// The "extended" functions start at 0x80000000. -const ( - extendedFunctionInfo cpuidFunction = 0x80000000 + iota // Returns highest available extended function in eax. - extendedFeatures // Returns some extended feature bits in edx and ecx. -) - -// These are the extended floating point state features. They are used to -// enumerate floating point features in XCR0, XSTATE_BV, etc. -const ( - XSAVEFeatureX87 = 1 << 0 - XSAVEFeatureSSE = 1 << 1 - XSAVEFeatureAVX = 1 << 2 - XSAVEFeatureBNDREGS = 1 << 3 - XSAVEFeatureBNDCSR = 1 << 4 - XSAVEFeatureAVX512op = 1 << 5 - XSAVEFeatureAVX512zmm0 = 1 << 6 - XSAVEFeatureAVX512zmm16 = 1 << 7 - XSAVEFeaturePKRU = 1 << 9 -) - -var cpuFreqMHz float64 - -// x86FeaturesFromString includes features from x86FeatureStrings and -// x86FeatureParseOnlyStrings. -var x86FeaturesFromString = make(map[string]Feature) - -// FeatureFromString returns the Feature associated with the given feature -// string plus a bool to indicate if it could find the feature. -func FeatureFromString(s string) (Feature, bool) { - f, b := x86FeaturesFromString[s] - return f, b -} - -// String implements fmt.Stringer. -func (f Feature) String() string { - if s := f.flagString(false); s != "" { - return s - } - - block := int(f) / 32 - bit := int(f) % 32 - return fmt.Sprintf("", f, block, bit) -} - -func (f Feature) flagString(cpuinfoOnly bool) string { - if s, ok := x86FeatureStrings[f]; ok { - return s - } - if !cpuinfoOnly { - return x86FeatureParseOnlyStrings[f] - } - return "" -} - -// FeatureSet is a set of Features for a CPU. -// -// +stateify savable -type FeatureSet struct { - // Set is the set of features that are enabled in this FeatureSet. - Set map[Feature]bool - - // VendorID is the 12-char string returned in ebx:edx:ecx for eax=0. - VendorID string - - // ExtendedFamily is part of the processor signature. - ExtendedFamily uint8 - - // ExtendedModel is part of the processor signature. - ExtendedModel uint8 - - // ProcessorType is part of the processor signature. - ProcessorType uint8 - - // Family is part of the processor signature. - Family uint8 - - // Model is part of the processor signature. - Model uint8 - - // SteppingID is part of the processor signature. - SteppingID uint8 - - // Caches describes the caches on the CPU. - Caches []Cache - - // CacheLine is the size of a cache line in bytes. - // - // All caches use the same line size. This is not enforced in the CPUID - // encoding, but is true on all known x86 processors. - CacheLine uint32 -} - -// FlagsString prints out supported CPU flags. If cpuinfoOnly is true, it is -// equivalent to the "flags" field in /proc/cpuinfo. -func (fs *FeatureSet) FlagsString(cpuinfoOnly bool) string { - var s []string - for _, b := range linuxBlockOrder { - for i := 0; i < blockSize; i++ { - if f := featureID(b, i); fs.Set[f] { - if fstr := f.flagString(cpuinfoOnly); fstr != "" { - s = append(s, fstr) - } - } - } - } - return strings.Join(s, " ") -} - -// WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is -// a minimal /proc/cpuinfo, it is missing some fields like "microcode" that are -// not always printed in Linux. The bogomips field is simply made up. -func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) { - fmt.Fprintf(b, "processor\t: %d\n", cpu) - fmt.Fprintf(b, "vendor_id\t: %s\n", fs.VendorID) - fmt.Fprintf(b, "cpu family\t: %d\n", ((fs.ExtendedFamily<<4)&0xff)|fs.Family) - fmt.Fprintf(b, "model\t\t: %d\n", ((fs.ExtendedModel<<4)&0xff)|fs.Model) - fmt.Fprintf(b, "model name\t: %s\n", "unknown") // Unknown for now. - fmt.Fprintf(b, "stepping\t: %s\n", "unknown") // Unknown for now. - fmt.Fprintf(b, "cpu MHz\t\t: %.3f\n", cpuFreqMHz) - fmt.Fprintln(b, "fpu\t\t: yes") - fmt.Fprintln(b, "fpu_exception\t: yes") - fmt.Fprintf(b, "cpuid level\t: %d\n", uint32(xSaveInfo)) // Same as ax in vendorID. - fmt.Fprintln(b, "wp\t\t: yes") - fmt.Fprintf(b, "flags\t\t: %s\n", fs.FlagsString(true)) - fmt.Fprintf(b, "bogomips\t: %.02f\n", cpuFreqMHz) // It's bogus anyway. - fmt.Fprintf(b, "clflush size\t: %d\n", fs.CacheLine) - fmt.Fprintf(b, "cache_alignment\t: %d\n", fs.CacheLine) - fmt.Fprintf(b, "address sizes\t: %d bits physical, %d bits virtual\n", 46, 48) - fmt.Fprintln(b, "power management:") // This is always here, but can be blank. - fmt.Fprintln(b, "") // The /proc/cpuinfo file ends with an extra newline. -} - -const ( - amdVendorID = "AuthenticAMD" - intelVendorID = "GenuineIntel" -) - -// AMD returns true if fs describes an AMD CPU. -func (fs *FeatureSet) AMD() bool { - return fs.VendorID == amdVendorID -} - -// Intel returns true if fs describes an Intel CPU. -func (fs *FeatureSet) Intel() bool { - return fs.VendorID == intelVendorID -} - -// ErrIncompatible is returned by FeatureSet.HostCompatible if fs is not a -// subset of the host feature set. -type ErrIncompatible struct { - message string -} - -// Error implements error. -func (e ErrIncompatible) Error() string { - return e.message -} - -// CheckHostCompatible returns nil if fs is a subset of the host feature set. -func (fs *FeatureSet) CheckHostCompatible() error { - hfs := HostFeatureSet() - - if diff := fs.Subtract(hfs); diff != nil { - return ErrIncompatible{fmt.Sprintf("CPU feature set %v incompatible with host feature set %v (missing: %v)", fs.FlagsString(false), hfs.FlagsString(false), diff)} - } - - // The size of a cache line must match, as it is critical to correctly - // utilizing CLFLUSH. Other cache properties are allowed to change, as - // they are not important to correctness. - if fs.CacheLine != hfs.CacheLine { - return ErrIncompatible{fmt.Sprintf("CPU cache line size %d incompatible with host cache line size %d", fs.CacheLine, hfs.CacheLine)} - } - - return nil -} - -// Helper to convert 3 regs into 12-byte vendor ID. -func vendorIDFromRegs(bx, cx, dx uint32) string { - bytes := make([]byte, 0, 12) - for i := uint(0); i < 4; i++ { - b := byte(bx >> (i * 8)) - bytes = append(bytes, b) - } - - for i := uint(0); i < 4; i++ { - b := byte(dx >> (i * 8)) - bytes = append(bytes, b) - } - - for i := uint(0); i < 4; i++ { - b := byte(cx >> (i * 8)) - bytes = append(bytes, b) - } - return string(bytes) -} - -// ExtendedStateSize returns the number of bytes needed to save the "extended -// state" for this processor and the boundary it must be aligned to. Extended -// state includes floating point registers, and other cpu state that's not -// associated with the normal task context. -// -// Note: We can save some space here with an optimization where we use a -// smaller chunk of memory depending on features that are actually enabled. -// Currently we just use the largest possible size for simplicity (which is -// about 2.5K worst case, with avx512). -func (fs *FeatureSet) ExtendedStateSize() (size, align uint) { - if fs.UseXsave() { - // Leaf 0 of xsaveinfo function returns the size for currently - // enabled xsave features in ebx, the maximum size if all valid - // features are saved with xsave in ecx, and valid XCR0 bits in - // edx:eax. - _, _, maxSize, _ := HostID(uint32(xSaveInfo), 0) - return uint(maxSize), 64 - } - - // If we don't support xsave, we fall back to fxsave, which requires - // 512 bytes aligned to 16 bytes. - return 512, 16 -} - -// ValidXCR0Mask returns the bits that may be set to 1 in control register -// XCR0. -func (fs *FeatureSet) ValidXCR0Mask() uint64 { - if !fs.UseXsave() { - return 0 - } - eax, _, _, edx := HostID(uint32(xSaveInfo), 0) - return uint64(edx)<<32 | uint64(eax) -} - -// vendorIDRegs returns the 3 register values used to construct the 12-byte -// vendor ID string for eax=0. -func (fs *FeatureSet) vendorIDRegs() (bx, dx, cx uint32) { - for i := uint(0); i < 4; i++ { - bx |= uint32(fs.VendorID[i]) << (i * 8) - } - - for i := uint(0); i < 4; i++ { - dx |= uint32(fs.VendorID[i+4]) << (i * 8) - } - - for i := uint(0); i < 4; i++ { - cx |= uint32(fs.VendorID[i+8]) << (i * 8) - } - return -} - -// signature returns the signature dword that's returned in eax when eax=1. -func (fs *FeatureSet) signature() uint32 { - var s uint32 - s |= uint32(fs.SteppingID & 0xf) - s |= uint32(fs.Model&0xf) << 4 - s |= uint32(fs.Family&0xf) << 8 - s |= uint32(fs.ProcessorType&0x3) << 12 - s |= uint32(fs.ExtendedModel&0xf) << 16 - s |= uint32(fs.ExtendedFamily&0xff) << 20 - return s -} - -// Helper to deconstruct signature dword. -func signatureSplit(v uint32) (ef, em, pt, f, m, sid uint8) { - sid = uint8(v & 0xf) - m = uint8(v>>4) & 0xf - f = uint8(v>>8) & 0xf - pt = uint8(v>>12) & 0x3 - em = uint8(v>>16) & 0xf - ef = uint8(v >> 20) - return -} - -// Helper to convert blockwise feature bit masks into a set of features. Masks -// must be provided in order for each block, without skipping them. If a block -// does not matter for this feature set, 0 is specified. -func setFromBlockMasks(blocks ...uint32) map[Feature]bool { - s := make(map[Feature]bool) - for b, blockMask := range blocks { - for i := 0; i < blockSize; i++ { - if blockMask&1 != 0 { - s[featureID(block(b), i)] = true - } - blockMask >>= 1 - } - } - return s -} - -// blockMask returns the 32-bit mask associated with a block of features. -func (fs *FeatureSet) blockMask(b block) uint32 { - var mask uint32 - for i := 0; i < blockSize; i++ { - if fs.Set[featureID(b, i)] { - mask |= 1 << uint(i) - } - } - return mask -} - -// Remove removes a Feature from a FeatureSet. It ignores features -// that are not in the FeatureSet. -func (fs *FeatureSet) Remove(feature Feature) { - delete(fs.Set, feature) -} - -// Add adds a Feature to a FeatureSet. It ignores duplicate features. -func (fs *FeatureSet) Add(feature Feature) { - fs.Set[feature] = true -} - -// HasFeature tests whether or not a feature is in the given feature set. -func (fs *FeatureSet) HasFeature(feature Feature) bool { - return fs.Set[feature] -} - -// Subtract returns the features present in fs that are not present in other. -// If all features in fs are present in other, Subtract returns nil. -func (fs *FeatureSet) Subtract(other *FeatureSet) (diff map[Feature]bool) { - for f := range fs.Set { - if !other.Set[f] { - if diff == nil { - diff = make(map[Feature]bool) - } - diff[f] = true - } - } - - return -} - -// EmulateID emulates a cpuid instruction based on the feature set. -func (fs *FeatureSet) EmulateID(origAx, origCx uint32) (ax, bx, cx, dx uint32) { - switch cpuidFunction(origAx) { - case vendorID: - ax = uint32(xSaveInfo) // 0xd (xSaveInfo) is the highest function we support. - bx, dx, cx = fs.vendorIDRegs() - case featureInfo: - // CLFLUSH line size is encoded in quadwords. Other fields in bx unsupported. - bx = (fs.CacheLine / 8) << 8 - cx = fs.blockMask(block(0)) - dx = fs.blockMask(block(1)) - ax = fs.signature() - case intelCacheDescriptors: - if !fs.Intel() { - // Reserved on non-Intel. - return 0, 0, 0, 0 - } - - // "The least-significant byte in register EAX (register AL) - // will always return 01H. Software should ignore this value - // and not interpret it as an informational descriptor." - SDM - // - // We only support reporting cache parameters via - // intelDeterministicCacheParams; report as much here. - // - // We do not support exposing TLB information at all. - ax = 1 | (uint32(intelNoCacheDescriptor) << 8) - case intelDeterministicCacheParams: - if !fs.Intel() { - // Reserved on non-Intel. - return 0, 0, 0, 0 - } - - // cx is the index of the cache to describe. - if int(origCx) >= len(fs.Caches) { - return uint32(cacheNull), 0, 0, 0 - } - c := fs.Caches[origCx] - - ax = uint32(c.Type) - ax |= c.Level << 5 - ax |= 1 << 8 // Always claim the cache is "self-initializing". - if c.FullyAssociative { - ax |= 1 << 9 - } - // Processor topology not supported. - - bx = fs.CacheLine - 1 - bx |= (c.Partitions - 1) << 12 - bx |= (c.Ways - 1) << 22 - - cx = c.Sets - 1 - - if !c.InvalidateHierarchical { - dx |= 1 - } - if c.Inclusive { - dx |= 1 << 1 - } - if !c.DirectMapped { - dx |= 1 << 2 - } - case xSaveInfo: - if !fs.UseXsave() { - return 0, 0, 0, 0 - } - return HostID(uint32(xSaveInfo), origCx) - case extendedFeatureInfo: - if origCx != 0 { - break // Only leaf 0 is supported. - } - bx = fs.blockMask(block(2)) - cx = fs.blockMask(block(3)) - case extendedFunctionInfo: - // We only support showing the extended features. - ax = uint32(extendedFeatures) - cx = 0 - case extendedFeatures: - cx = fs.blockMask(block(5)) - dx = fs.blockMask(block(6)) - if fs.AMD() { - // AMD duplicates some block 1 features in block 6. - dx |= fs.blockMask(block(1)) & block6DuplicateMask - } - } - - return -} - -// UseXsave returns the choice of fp state saving instruction. -func (fs *FeatureSet) UseXsave() bool { - return fs.HasFeature(X86FeatureXSAVE) && fs.HasFeature(X86FeatureOSXSAVE) -} - -// UseXsaveopt returns true if 'fs' supports the "xsaveopt" instruction. -func (fs *FeatureSet) UseXsaveopt() bool { - return fs.UseXsave() && fs.HasFeature(X86FeatureXSAVEOPT) -} - -// HostID executes a native CPUID instruction. -func HostID(axArg, cxArg uint32) (ax, bx, cx, dx uint32) - -// HostFeatureSet uses cpuid to get host values and construct a feature set -// that matches that of the host machine. Note that there are several places -// where there appear to be some unnecessary assignments between register names -// (ax, bx, cx, or dx) and featureBlockN variables. This is to explicitly show -// where the different feature blocks come from, to make the code easier to -// inspect and read. -func HostFeatureSet() *FeatureSet { - // eax=0 gets max supported feature and vendor ID. - _, bx, cx, dx := HostID(0, 0) - vendorID := vendorIDFromRegs(bx, cx, dx) - - // eax=1 gets basic features in ecx:edx. - ax, bx, cx, dx := HostID(1, 0) - featureBlock0 := cx - featureBlock1 := dx - ef, em, pt, f, m, sid := signatureSplit(ax) - cacheLine := 8 * (bx >> 8) & 0xff - - // eax=4, ecx=i gets details about cache index i. Only supported on Intel. - var caches []Cache - if vendorID == intelVendorID { - // ecx selects the cache index until a null type is returned. - for i := uint32(0); ; i++ { - ax, bx, cx, dx := HostID(4, i) - t := CacheType(ax & 0xf) - if t == cacheNull { - break - } - - lineSize := (bx & 0xfff) + 1 - if lineSize != cacheLine { - panic(fmt.Sprintf("Mismatched cache line size: %d vs %d", lineSize, cacheLine)) - } - - caches = append(caches, Cache{ - Type: t, - Level: (ax >> 5) & 0x7, - FullyAssociative: ((ax >> 9) & 1) == 1, - Partitions: ((bx >> 12) & 0x3ff) + 1, - Ways: ((bx >> 22) & 0x3ff) + 1, - Sets: cx + 1, - InvalidateHierarchical: (dx & 1) == 0, - Inclusive: ((dx >> 1) & 1) == 1, - DirectMapped: ((dx >> 2) & 1) == 0, - }) - } - } - - // eax=7, ecx=0 gets extended features in ecx:ebx. - _, bx, cx, _ = HostID(7, 0) - featureBlock2 := bx - featureBlock3 := cx - - // Leaf 0xd is supported only if CPUID.1:ECX.XSAVE[bit 26] is set. - var featureBlock4 uint32 - if (featureBlock0 & (1 << 26)) != 0 { - featureBlock4, _, _, _ = HostID(uint32(xSaveInfo), 1) - } - - // eax=0x80000000 gets supported extended levels. We use this to - // determine if there are any non-zero block 4 or block 6 bits to find. - var featureBlock5, featureBlock6 uint32 - if ax, _, _, _ := HostID(uint32(extendedFunctionInfo), 0); ax >= uint32(extendedFeatures) { - // eax=0x80000001 gets AMD added feature bits. - _, _, cx, dx = HostID(uint32(extendedFeatures), 0) - featureBlock5 = cx - // Ignore features duplicated from block 1 on AMD. These bits - // are reserved on Intel. - featureBlock6 = dx &^ block6DuplicateMask - } - - set := setFromBlockMasks(featureBlock0, featureBlock1, featureBlock2, featureBlock3, featureBlock4, featureBlock5, featureBlock6) - return &FeatureSet{ - Set: set, - VendorID: vendorID, - ExtendedFamily: ef, - ExtendedModel: em, - ProcessorType: pt, - Family: f, - Model: m, - SteppingID: sid, - CacheLine: cacheLine, - Caches: caches, - } -} - -// Reads max cpu frequency from host /proc/cpuinfo. Must run before -// whitelisting. This value is used to create the fake /proc/cpuinfo from a -// FeatureSet. -func initCPUFreq() { - cpuinfob, err := ioutil.ReadFile("/proc/cpuinfo") - if err != nil { - // Leave it as 0... The standalone VDSO bails out in the same - // way. - log.Warningf("Could not read /proc/cpuinfo: %v", err) - return - } - cpuinfo := string(cpuinfob) - - // We get the value straight from host /proc/cpuinfo. On machines with - // frequency scaling enabled, this will only get the current value - // which will likely be inaccurate. This is fine on machines with - // frequency scaling disabled. - for _, line := range strings.Split(cpuinfo, "\n") { - if strings.Contains(line, "cpu MHz") { - splitMHz := strings.Split(line, ":") - if len(splitMHz) < 2 { - log.Warningf("Could not read /proc/cpuinfo: malformed cpu MHz line") - return - } - - // If there was a problem, leave cpuFreqMHz as 0. - var err error - cpuFreqMHz, err = strconv.ParseFloat(strings.TrimSpace(splitMHz[1]), 64) - if err != nil { - log.Warningf("Could not parse cpu MHz value %v: %v", splitMHz[1], err) - cpuFreqMHz = 0 - return - } - return - } - } - log.Warningf("Could not parse /proc/cpuinfo, it is empty or does not contain cpu MHz") -} - -func initFeaturesFromString() { - for f, s := range x86FeatureStrings { - x86FeaturesFromString[s] = f - } - for f, s := range x86FeatureParseOnlyStrings { - x86FeaturesFromString[s] = f - } -} - -func init() { - // initCpuFreq must be run before whitelists are enabled. - initCPUFreq() - initFeaturesFromString() -} +// On arm64, features are numbered according to the ELF HWCAP definition. +// arch/arm64/include/uapi/asm/hwcap.h +type Feature int diff --git a/pkg/cpuid/cpuid_arm64.go b/pkg/cpuid/cpuid_arm64.go new file mode 100644 index 000000000..6d71290c9 --- /dev/null +++ b/pkg/cpuid/cpuid_arm64.go @@ -0,0 +1,461 @@ +// Copyright 2020 The gVisor Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build arm64 + +package cpuid + +import ( + "bytes" + "encoding/binary" + "fmt" + "io/ioutil" + "strconv" + "strings" + + "gvisor.dev/gvisor/pkg/log" +) + +// ARM64 doesn't have a 'cpuid' equivalent, which means it have no architected +// discovery mechanism for hardware features available to userspace code at EL0. +// The kernel exposes the presence of these features to userspace through a set +// of flags(HWCAP/HWCAP2) bits, exposed in the auxilliary vector. +// Ref Documentation/arm64/elf_hwcaps.rst for more info. +// +// Currently, only the HWCAP bits are supported. + +const ( + // Single and double precision float point types. + ARM64FeatureFP Feature = iota + + // Advanced SIMD with single and double precision + // float point arithmetic. + ARM64FeatureASIMD + + // The generic timer is configured to generate + // events at a frequency of approximately 100KHz. + ARM64FeatureEVTSTRM + + // AES instructions(AESE/AESD/AESMC/AESIMC). + ARM64FeatureAES + + // AES instructions(PMULL/PMULL2). + ARM64FeaturePMULL + + // SHA1 instructions(SHA1C/SHA1P/SHA1M etc). + ARM64FeatureSHA1 + + // SHA2 instructions(SHA256H/SHA256H2/SHA256SU0 etc). + ARM64FeatureSHA2 + + // CRC32 instructions(CRC32B/CRC32H/CRC32W etc). + ARM64FeatureCRC32 + + // Atomic instructions(LDADD/LDCLR/LDEOR/LDSET etc). + ARM64FeatureATOMICS + + // Half precision float point arithmetic. + ARM64FeatureFPHP + + // ASIMD with half precision float point arithmetic. + ARM64FeatureASIMDHP + + // EL0 access to certain ID registers is available. + ARM64FeatureCPUID + + // SQRDMLAH and SQRDMLSH instructions implemented. + ARM64FeatureASIMDRDM + + // The FJCVTZS instruction is implemented. + ARM64FeatureJSCVT + + // The FCMLA and FCADD instructions are implemented. + ARM64FeatureFCMA + + // The LDAPRB/LDAPRH/LDAPR instructions are implemented. + ARM64FeatureLRCPC + + // DC instruction(DC CVAP) supported. + ARM64FeatureDCPOP + + // SHA3 instructions(EOR3/RAX1/XAR/BCAX) implemented. + ARM64FeatureSHA3 + + // SM3 instructions(SM3SS1/SM3TT1A/SM3TT1B) implemented. + ARM64FeatureSM3 + + // SM4 instructions(SM4E/SM4EKEY) implemented. + ARM64FeatureSM4 + + // Dot Product instructions(UDOT/SDOT) implemented. + ARM64FeatureASIMDDP + + // SHA2 instructions(SHA512H/SHA512H2/SHA512SU0) implemented. + ARM64FeatureSHA512 + + // Scalable Vector Extension implemented. + ARM64FeatureSVE + + // FMLAL and FMLSL instructions are implemented. + ARM64FeatureASIMDFHM +) + +// ELF auxiliary vector tags +const ( + _AT_NULL = 0 // End of vector + _AT_HWCAP = 16 // hardware capability bit vector + _AT_HWCAP2 = 26 // hardware capability bit vector 2 +) + +// These should not be changed after they are initialized. +var hwCap uint + +// To make emulation of /proc/cpuinfo easy, these names match the names of the +// basic features in Linux defined in arch/arm64/kernel/cpuinfo.c. +var arm64FeatureStrings = map[Feature]string{ + ARM64FeatureFP: "fp", + ARM64FeatureASIMD: "asimd", + ARM64FeatureEVTSTRM: "evtstrm", + ARM64FeatureAES: "aes", + ARM64FeaturePMULL: "pmull", + ARM64FeatureSHA1: "sha1", + ARM64FeatureSHA2: "sha2", + ARM64FeatureCRC32: "crc32", + ARM64FeatureATOMICS: "atomics", + ARM64FeatureFPHP: "fphp", + ARM64FeatureASIMDHP: "asimdhp", + ARM64FeatureCPUID: "cpuid", + ARM64FeatureASIMDRDM: "asimdrdm", + ARM64FeatureJSCVT: "jscvt", + ARM64FeatureFCMA: "fcma", + ARM64FeatureLRCPC: "lrcpc", + ARM64FeatureDCPOP: "dcpop", + ARM64FeatureSHA3: "sha3", + ARM64FeatureSM3: "sm3", + ARM64FeatureSM4: "sm4", + ARM64FeatureASIMDDP: "asimddp", + ARM64FeatureSHA512: "sha512", + ARM64FeatureSVE: "sve", + ARM64FeatureASIMDFHM: "asimdfhm", +} + +var ( + cpuFreqMHz float64 + cpuImplHex uint64 + cpuArchDec uint64 + cpuVarHex uint64 + cpuPartHex uint64 + cpuRevDec uint64 +) + +// arm64FeaturesFromString includes features from arm64FeatureStrings. +var arm64FeaturesFromString = make(map[string]Feature) + +// FeatureFromString returns the Feature associated with the given feature +// string plus a bool to indicate if it could find the feature. +func FeatureFromString(s string) (Feature, bool) { + f, b := arm64FeaturesFromString[s] + return f, b +} + +// String implements fmt.Stringer. +func (f Feature) String() string { + if s := f.flagString(); s != "" { + return s + } + + return fmt.Sprintf("", f) +} + +func (f Feature) flagString() string { + if s, ok := arm64FeatureStrings[f]; ok { + return s + } + + return "" +} + +// FeatureSet is a set of Features for a CPU. +// +// +stateify savable +type FeatureSet struct { + // Set is the set of features that are enabled in this FeatureSet. + Set map[Feature]bool + + // CPUImplementer is part of the processor signature. + CPUImplementer uint8 + + // CPUArchitecture is part of the processor signature. + CPUArchitecture uint8 + + // CPUVariant is part of the processor signature. + CPUVariant uint8 + + // CPUPartnum is part of the processor signature. + CPUPartnum uint16 + + // CPURevision is part of the processor signature. + CPURevision uint8 +} + +// CheckHostCompatible returns nil if fs is a subset of the host feature set. +// Noop on arm64. +func (fs *FeatureSet) CheckHostCompatible() error { + return nil +} + +// ExtendedStateSize returns the number of bytes needed to save the "extended +// state" for this processor and the boundary it must be aligned to. Extended +// state includes floating point(NEON) registers, and other cpu state that's not +// associated with the normal task context. +func (fs *FeatureSet) ExtendedStateSize() (size, align uint) { + // ARMv8 provide 32x128bits NEON registers. + // + // Ref arch/arm64/include/uapi/asm/ptrace.h + // struct user_fpsimd_state { + // __uint128_t vregs[32]; + // __u32 fpsr; + // __u32 fpcr; + // __u32 __reserved[2]; + // }; + return 528, 16 +} + +// HasFeature tests whether or not a feature is in the given feature set. +func (fs *FeatureSet) HasFeature(feature Feature) bool { + return fs.Set[feature] +} + +// UseXsaveopt returns true if 'fs' supports the "xsaveopt" instruction. +// Noop on arm64. +func (fs *FeatureSet) UseXsave() bool { + return false +} + +// FlagsString prints out supported CPU "flags" field in /proc/cpuinfo. +func (fs *FeatureSet) FlagsString() string { + var s []string + for f, _ := range arm64FeatureStrings { + if fs.Set[f] { + if fstr := f.flagString(); fstr != "" { + s = append(s, fstr) + } + } + } + return strings.Join(s, " ") +} + +// WriteCPUInfoTo is to generate a section of one cpu in /proc/cpuinfo. This is +// a minimal /proc/cpuinfo, and the bogomips field is simply made up. +func (fs FeatureSet) WriteCPUInfoTo(cpu uint, b *bytes.Buffer) { + fmt.Fprintf(b, "processor\t: %d\n", cpu) + fmt.Fprintf(b, "BogoMIPS\t: %.02f\n", cpuFreqMHz) // It's bogus anyway. + fmt.Fprintf(b, "Features\t\t: %s\n", fs.FlagsString()) + fmt.Fprintf(b, "CPU implementer\t: 0x%x\n", cpuImplHex) + fmt.Fprintf(b, "CPU architecture\t: %d\n", cpuArchDec) + fmt.Fprintf(b, "CPU variant\t: 0x%x\n", cpuVarHex) + fmt.Fprintf(b, "CPU part\t: 0x%x\n", cpuPartHex) + fmt.Fprintf(b, "CPU revision\t: %d\n", cpuRevDec) + fmt.Fprintln(b, "") // The /proc/cpuinfo file ends with an extra newline. +} + +// HostFeatureSet uses hwCap to get host values and construct a feature set +// that matches that of the host machine. +func HostFeatureSet() *FeatureSet { + s := make(map[Feature]bool) + + for f, _ := range arm64FeatureStrings { + if hwCap&(1<