summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/proc
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/proc')
-rwxr-xr-xpkg/sentry/fsimpl/proc/filesystem.go100
-rwxr-xr-xpkg/sentry/fsimpl/proc/proc_state_autogen.go646
-rwxr-xr-xpkg/sentry/fsimpl/proc/subtasks.go135
-rwxr-xr-xpkg/sentry/fsimpl/proc/task.go234
-rwxr-xr-xpkg/sentry/fsimpl/proc/task_fds.go302
-rwxr-xr-xpkg/sentry/fsimpl/proc/task_files.go761
-rwxr-xr-xpkg/sentry/fsimpl/proc/task_net.go808
-rwxr-xr-xpkg/sentry/fsimpl/proc/tasks.go257
-rwxr-xr-xpkg/sentry/fsimpl/proc/tasks_files.go380
-rwxr-xr-xpkg/sentry/fsimpl/proc/tasks_sys.go211
10 files changed, 3834 insertions, 0 deletions
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
new file mode 100755
index 000000000..104fc9030
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -0,0 +1,100 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package proc implements a partial in-memory file system for procfs.
+package proc
+
+import (
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+)
+
+// Name is the default filesystem name.
+const Name = "proc"
+
+// FilesystemType is the factory class for procfs.
+//
+// +stateify savable
+type FilesystemType struct{}
+
+var _ vfs.FilesystemType = (*FilesystemType)(nil)
+
+// Name implements vfs.FilesystemType.Name.
+func (FilesystemType) Name() string {
+ return Name
+}
+
+// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
+func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
+ k := kernel.KernelFromContext(ctx)
+ if k == nil {
+ return nil, nil, fmt.Errorf("procfs requires a kernel")
+ }
+ pidns := kernel.PIDNamespaceFromContext(ctx)
+ if pidns == nil {
+ return nil, nil, fmt.Errorf("procfs requires a PID namespace")
+ }
+
+ procfs := &kernfs.Filesystem{}
+ procfs.VFSFilesystem().Init(vfsObj, &ft, procfs)
+
+ var cgroups map[string]string
+ if opts.InternalData != nil {
+ data := opts.InternalData.(*InternalData)
+ cgroups = data.Cgroups
+ }
+
+ _, dentry := newTasksInode(procfs, k, pidns, cgroups)
+ return procfs.VFSFilesystem(), dentry.VFSDentry(), nil
+}
+
+// dynamicInode is an overfitted interface for common Inodes with
+// dynamicByteSource types used in procfs.
+type dynamicInode interface {
+ kernfs.Inode
+ vfs.DynamicBytesSource
+
+ Init(creds *auth.Credentials, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode)
+}
+
+func newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
+ inode.Init(creds, ino, inode, perm)
+
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+ return d
+}
+
+type staticFile struct {
+ kernfs.DynamicBytesFile
+ vfs.StaticData
+}
+
+var _ dynamicInode = (*staticFile)(nil)
+
+func newStaticFile(data string) *staticFile {
+ return &staticFile{StaticData: vfs.StaticData{Data: data}}
+}
+
+// InternalData contains internal data passed in to the procfs mount via
+// vfs.GetFilesystemOptions.InternalData.
+type InternalData struct {
+ Cgroups map[string]string
+}
diff --git a/pkg/sentry/fsimpl/proc/proc_state_autogen.go b/pkg/sentry/fsimpl/proc/proc_state_autogen.go
new file mode 100755
index 000000000..26c7165d3
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/proc_state_autogen.go
@@ -0,0 +1,646 @@
+// automatically generated by stateify.
+
+package proc
+
+import (
+ "gvisor.dev/gvisor/pkg/state"
+)
+
+func (x *FilesystemType) beforeSave() {}
+func (x *FilesystemType) save(m state.Map) {
+ x.beforeSave()
+}
+
+func (x *FilesystemType) afterLoad() {}
+func (x *FilesystemType) load(m state.Map) {
+}
+
+func (x *subtasksInode) beforeSave() {}
+func (x *subtasksInode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Save("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Save("InodeAttrs", &x.InodeAttrs)
+ m.Save("OrderedChildren", &x.OrderedChildren)
+ m.Save("AlwaysValid", &x.AlwaysValid)
+ m.Save("task", &x.task)
+ m.Save("pidns", &x.pidns)
+ m.Save("inoGen", &x.inoGen)
+ m.Save("cgroupControllers", &x.cgroupControllers)
+}
+
+func (x *subtasksInode) afterLoad() {}
+func (x *subtasksInode) load(m state.Map) {
+ m.Load("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Load("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Load("InodeAttrs", &x.InodeAttrs)
+ m.Load("OrderedChildren", &x.OrderedChildren)
+ m.Load("AlwaysValid", &x.AlwaysValid)
+ m.Load("task", &x.task)
+ m.Load("pidns", &x.pidns)
+ m.Load("inoGen", &x.inoGen)
+ m.Load("cgroupControllers", &x.cgroupControllers)
+}
+
+func (x *taskInode) beforeSave() {}
+func (x *taskInode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Save("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Save("InodeNoDynamicLookup", &x.InodeNoDynamicLookup)
+ m.Save("InodeAttrs", &x.InodeAttrs)
+ m.Save("OrderedChildren", &x.OrderedChildren)
+ m.Save("task", &x.task)
+}
+
+func (x *taskInode) afterLoad() {}
+func (x *taskInode) load(m state.Map) {
+ m.Load("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Load("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Load("InodeNoDynamicLookup", &x.InodeNoDynamicLookup)
+ m.Load("InodeAttrs", &x.InodeAttrs)
+ m.Load("OrderedChildren", &x.OrderedChildren)
+ m.Load("task", &x.task)
+}
+
+func (x *fdDirInode) beforeSave() {}
+func (x *fdDirInode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Save("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Save("InodeAttrs", &x.InodeAttrs)
+ m.Save("OrderedChildren", &x.OrderedChildren)
+ m.Save("AlwaysValid", &x.AlwaysValid)
+ m.Save("fdDir", &x.fdDir)
+}
+
+func (x *fdDirInode) afterLoad() {}
+func (x *fdDirInode) load(m state.Map) {
+ m.Load("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Load("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Load("InodeAttrs", &x.InodeAttrs)
+ m.Load("OrderedChildren", &x.OrderedChildren)
+ m.Load("AlwaysValid", &x.AlwaysValid)
+ m.Load("fdDir", &x.fdDir)
+}
+
+func (x *fdSymlink) beforeSave() {}
+func (x *fdSymlink) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeAttrs", &x.InodeAttrs)
+ m.Save("InodeNoopRefCount", &x.InodeNoopRefCount)
+ m.Save("InodeSymlink", &x.InodeSymlink)
+ m.Save("task", &x.task)
+ m.Save("fd", &x.fd)
+}
+
+func (x *fdSymlink) afterLoad() {}
+func (x *fdSymlink) load(m state.Map) {
+ m.Load("InodeAttrs", &x.InodeAttrs)
+ m.Load("InodeNoopRefCount", &x.InodeNoopRefCount)
+ m.Load("InodeSymlink", &x.InodeSymlink)
+ m.Load("task", &x.task)
+ m.Load("fd", &x.fd)
+}
+
+func (x *fdInfoDirInode) beforeSave() {}
+func (x *fdInfoDirInode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Save("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Save("InodeAttrs", &x.InodeAttrs)
+ m.Save("OrderedChildren", &x.OrderedChildren)
+ m.Save("AlwaysValid", &x.AlwaysValid)
+ m.Save("fdDir", &x.fdDir)
+}
+
+func (x *fdInfoDirInode) afterLoad() {}
+func (x *fdInfoDirInode) load(m state.Map) {
+ m.Load("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Load("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Load("InodeAttrs", &x.InodeAttrs)
+ m.Load("OrderedChildren", &x.OrderedChildren)
+ m.Load("AlwaysValid", &x.AlwaysValid)
+ m.Load("fdDir", &x.fdDir)
+}
+
+func (x *fdInfoData) beforeSave() {}
+func (x *fdInfoData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("AtomicRefCount", &x.AtomicRefCount)
+ m.Save("task", &x.task)
+ m.Save("fd", &x.fd)
+}
+
+func (x *fdInfoData) afterLoad() {}
+func (x *fdInfoData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("AtomicRefCount", &x.AtomicRefCount)
+ m.Load("task", &x.task)
+ m.Load("fd", &x.fd)
+}
+
+func (x *auxvData) beforeSave() {}
+func (x *auxvData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *auxvData) afterLoad() {}
+func (x *auxvData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *cmdlineData) beforeSave() {}
+func (x *cmdlineData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+ m.Save("arg", &x.arg)
+}
+
+func (x *cmdlineData) afterLoad() {}
+func (x *cmdlineData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+ m.Load("arg", &x.arg)
+}
+
+func (x *commInode) beforeSave() {}
+func (x *commInode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *commInode) afterLoad() {}
+func (x *commInode) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *commData) beforeSave() {}
+func (x *commData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *commData) afterLoad() {}
+func (x *commData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *idMapData) beforeSave() {}
+func (x *idMapData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+ m.Save("gids", &x.gids)
+}
+
+func (x *idMapData) afterLoad() {}
+func (x *idMapData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+ m.Load("gids", &x.gids)
+}
+
+func (x *mapsData) beforeSave() {}
+func (x *mapsData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *mapsData) afterLoad() {}
+func (x *mapsData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *smapsData) beforeSave() {}
+func (x *smapsData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *smapsData) afterLoad() {}
+func (x *smapsData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *taskStatData) beforeSave() {}
+func (x *taskStatData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+ m.Save("tgstats", &x.tgstats)
+ m.Save("pidns", &x.pidns)
+}
+
+func (x *taskStatData) afterLoad() {}
+func (x *taskStatData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+ m.Load("tgstats", &x.tgstats)
+ m.Load("pidns", &x.pidns)
+}
+
+func (x *statmData) beforeSave() {}
+func (x *statmData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *statmData) afterLoad() {}
+func (x *statmData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *statusData) beforeSave() {}
+func (x *statusData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+ m.Save("pidns", &x.pidns)
+}
+
+func (x *statusData) afterLoad() {}
+func (x *statusData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+ m.Load("pidns", &x.pidns)
+}
+
+func (x *ioData) beforeSave() {}
+func (x *ioData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("ioUsage", &x.ioUsage)
+}
+
+func (x *ioData) afterLoad() {}
+func (x *ioData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("ioUsage", &x.ioUsage)
+}
+
+func (x *oomScoreAdj) beforeSave() {}
+func (x *oomScoreAdj) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *oomScoreAdj) afterLoad() {}
+func (x *oomScoreAdj) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *exeSymlink) beforeSave() {}
+func (x *exeSymlink) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeAttrs", &x.InodeAttrs)
+ m.Save("InodeNoopRefCount", &x.InodeNoopRefCount)
+ m.Save("InodeSymlink", &x.InodeSymlink)
+ m.Save("task", &x.task)
+}
+
+func (x *exeSymlink) afterLoad() {}
+func (x *exeSymlink) load(m state.Map) {
+ m.Load("InodeAttrs", &x.InodeAttrs)
+ m.Load("InodeNoopRefCount", &x.InodeNoopRefCount)
+ m.Load("InodeSymlink", &x.InodeSymlink)
+ m.Load("task", &x.task)
+}
+
+func (x *mountInfoData) beforeSave() {}
+func (x *mountInfoData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *mountInfoData) afterLoad() {}
+func (x *mountInfoData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *mountsData) beforeSave() {}
+func (x *mountsData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("task", &x.task)
+}
+
+func (x *mountsData) afterLoad() {}
+func (x *mountsData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("task", &x.task)
+}
+
+func (x *ifinet6) beforeSave() {}
+func (x *ifinet6) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("stack", &x.stack)
+}
+
+func (x *ifinet6) afterLoad() {}
+func (x *ifinet6) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("stack", &x.stack)
+}
+
+func (x *netDevData) beforeSave() {}
+func (x *netDevData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("stack", &x.stack)
+}
+
+func (x *netDevData) afterLoad() {}
+func (x *netDevData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("stack", &x.stack)
+}
+
+func (x *netUnixData) beforeSave() {}
+func (x *netUnixData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("kernel", &x.kernel)
+}
+
+func (x *netUnixData) afterLoad() {}
+func (x *netUnixData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("kernel", &x.kernel)
+}
+
+func (x *netTCPData) beforeSave() {}
+func (x *netTCPData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("kernel", &x.kernel)
+}
+
+func (x *netTCPData) afterLoad() {}
+func (x *netTCPData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("kernel", &x.kernel)
+}
+
+func (x *netTCP6Data) beforeSave() {}
+func (x *netTCP6Data) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("kernel", &x.kernel)
+}
+
+func (x *netTCP6Data) afterLoad() {}
+func (x *netTCP6Data) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("kernel", &x.kernel)
+}
+
+func (x *netUDPData) beforeSave() {}
+func (x *netUDPData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("kernel", &x.kernel)
+}
+
+func (x *netUDPData) afterLoad() {}
+func (x *netUDPData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("kernel", &x.kernel)
+}
+
+func (x *netSnmpData) beforeSave() {}
+func (x *netSnmpData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("stack", &x.stack)
+}
+
+func (x *netSnmpData) afterLoad() {}
+func (x *netSnmpData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("stack", &x.stack)
+}
+
+func (x *netRouteData) beforeSave() {}
+func (x *netRouteData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("stack", &x.stack)
+}
+
+func (x *netRouteData) afterLoad() {}
+func (x *netRouteData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("stack", &x.stack)
+}
+
+func (x *netStatData) beforeSave() {}
+func (x *netStatData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("stack", &x.stack)
+}
+
+func (x *netStatData) afterLoad() {}
+func (x *netStatData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("stack", &x.stack)
+}
+
+func (x *tasksInode) beforeSave() {}
+func (x *tasksInode) save(m state.Map) {
+ x.beforeSave()
+ m.Save("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Save("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Save("InodeAttrs", &x.InodeAttrs)
+ m.Save("OrderedChildren", &x.OrderedChildren)
+ m.Save("AlwaysValid", &x.AlwaysValid)
+ m.Save("inoGen", &x.inoGen)
+ m.Save("pidns", &x.pidns)
+ m.Save("selfSymlink", &x.selfSymlink)
+ m.Save("threadSelfSymlink", &x.threadSelfSymlink)
+ m.Save("cgroupControllers", &x.cgroupControllers)
+}
+
+func (x *tasksInode) afterLoad() {}
+func (x *tasksInode) load(m state.Map) {
+ m.Load("InodeNotSymlink", &x.InodeNotSymlink)
+ m.Load("InodeDirectoryNoNewChildren", &x.InodeDirectoryNoNewChildren)
+ m.Load("InodeAttrs", &x.InodeAttrs)
+ m.Load("OrderedChildren", &x.OrderedChildren)
+ m.Load("AlwaysValid", &x.AlwaysValid)
+ m.Load("inoGen", &x.inoGen)
+ m.Load("pidns", &x.pidns)
+ m.Load("selfSymlink", &x.selfSymlink)
+ m.Load("threadSelfSymlink", &x.threadSelfSymlink)
+ m.Load("cgroupControllers", &x.cgroupControllers)
+}
+
+func (x *statData) beforeSave() {}
+func (x *statData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *statData) afterLoad() {}
+func (x *statData) load(m state.Map) {
+ m.Load("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *loadavgData) beforeSave() {}
+func (x *loadavgData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *loadavgData) afterLoad() {}
+func (x *loadavgData) load(m state.Map) {
+ m.Load("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *meminfoData) beforeSave() {}
+func (x *meminfoData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *meminfoData) afterLoad() {}
+func (x *meminfoData) load(m state.Map) {
+ m.Load("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *uptimeData) beforeSave() {}
+func (x *uptimeData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *uptimeData) afterLoad() {}
+func (x *uptimeData) load(m state.Map) {
+ m.Load("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *versionData) beforeSave() {}
+func (x *versionData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *versionData) afterLoad() {}
+func (x *versionData) load(m state.Map) {
+ m.Load("dynamicBytesFileSetAttr", &x.dynamicBytesFileSetAttr)
+}
+
+func (x *filesystemsData) beforeSave() {}
+func (x *filesystemsData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+}
+
+func (x *filesystemsData) afterLoad() {}
+func (x *filesystemsData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+}
+
+func (x *mmapMinAddrData) beforeSave() {}
+func (x *mmapMinAddrData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("k", &x.k)
+}
+
+func (x *mmapMinAddrData) afterLoad() {}
+func (x *mmapMinAddrData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Load("k", &x.k)
+}
+
+func (x *hostnameData) beforeSave() {}
+func (x *hostnameData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+}
+
+func (x *hostnameData) afterLoad() {}
+func (x *hostnameData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+}
+
+func (x *tcpSackData) beforeSave() {}
+func (x *tcpSackData) save(m state.Map) {
+ x.beforeSave()
+ m.Save("DynamicBytesFile", &x.DynamicBytesFile)
+ m.Save("stack", &x.stack)
+ m.Save("enabled", &x.enabled)
+}
+
+func (x *tcpSackData) afterLoad() {}
+func (x *tcpSackData) load(m state.Map) {
+ m.Load("DynamicBytesFile", &x.DynamicBytesFile)
+ m.LoadWait("stack", &x.stack)
+ m.Load("enabled", &x.enabled)
+}
+
+func init() {
+ state.Register("pkg/sentry/fsimpl/proc.FilesystemType", (*FilesystemType)(nil), state.Fns{Save: (*FilesystemType).save, Load: (*FilesystemType).load})
+ state.Register("pkg/sentry/fsimpl/proc.subtasksInode", (*subtasksInode)(nil), state.Fns{Save: (*subtasksInode).save, Load: (*subtasksInode).load})
+ state.Register("pkg/sentry/fsimpl/proc.taskInode", (*taskInode)(nil), state.Fns{Save: (*taskInode).save, Load: (*taskInode).load})
+ state.Register("pkg/sentry/fsimpl/proc.fdDirInode", (*fdDirInode)(nil), state.Fns{Save: (*fdDirInode).save, Load: (*fdDirInode).load})
+ state.Register("pkg/sentry/fsimpl/proc.fdSymlink", (*fdSymlink)(nil), state.Fns{Save: (*fdSymlink).save, Load: (*fdSymlink).load})
+ state.Register("pkg/sentry/fsimpl/proc.fdInfoDirInode", (*fdInfoDirInode)(nil), state.Fns{Save: (*fdInfoDirInode).save, Load: (*fdInfoDirInode).load})
+ state.Register("pkg/sentry/fsimpl/proc.fdInfoData", (*fdInfoData)(nil), state.Fns{Save: (*fdInfoData).save, Load: (*fdInfoData).load})
+ state.Register("pkg/sentry/fsimpl/proc.auxvData", (*auxvData)(nil), state.Fns{Save: (*auxvData).save, Load: (*auxvData).load})
+ state.Register("pkg/sentry/fsimpl/proc.cmdlineData", (*cmdlineData)(nil), state.Fns{Save: (*cmdlineData).save, Load: (*cmdlineData).load})
+ state.Register("pkg/sentry/fsimpl/proc.commInode", (*commInode)(nil), state.Fns{Save: (*commInode).save, Load: (*commInode).load})
+ state.Register("pkg/sentry/fsimpl/proc.commData", (*commData)(nil), state.Fns{Save: (*commData).save, Load: (*commData).load})
+ state.Register("pkg/sentry/fsimpl/proc.idMapData", (*idMapData)(nil), state.Fns{Save: (*idMapData).save, Load: (*idMapData).load})
+ state.Register("pkg/sentry/fsimpl/proc.mapsData", (*mapsData)(nil), state.Fns{Save: (*mapsData).save, Load: (*mapsData).load})
+ state.Register("pkg/sentry/fsimpl/proc.smapsData", (*smapsData)(nil), state.Fns{Save: (*smapsData).save, Load: (*smapsData).load})
+ state.Register("pkg/sentry/fsimpl/proc.taskStatData", (*taskStatData)(nil), state.Fns{Save: (*taskStatData).save, Load: (*taskStatData).load})
+ state.Register("pkg/sentry/fsimpl/proc.statmData", (*statmData)(nil), state.Fns{Save: (*statmData).save, Load: (*statmData).load})
+ state.Register("pkg/sentry/fsimpl/proc.statusData", (*statusData)(nil), state.Fns{Save: (*statusData).save, Load: (*statusData).load})
+ state.Register("pkg/sentry/fsimpl/proc.ioData", (*ioData)(nil), state.Fns{Save: (*ioData).save, Load: (*ioData).load})
+ state.Register("pkg/sentry/fsimpl/proc.oomScoreAdj", (*oomScoreAdj)(nil), state.Fns{Save: (*oomScoreAdj).save, Load: (*oomScoreAdj).load})
+ state.Register("pkg/sentry/fsimpl/proc.exeSymlink", (*exeSymlink)(nil), state.Fns{Save: (*exeSymlink).save, Load: (*exeSymlink).load})
+ state.Register("pkg/sentry/fsimpl/proc.mountInfoData", (*mountInfoData)(nil), state.Fns{Save: (*mountInfoData).save, Load: (*mountInfoData).load})
+ state.Register("pkg/sentry/fsimpl/proc.mountsData", (*mountsData)(nil), state.Fns{Save: (*mountsData).save, Load: (*mountsData).load})
+ state.Register("pkg/sentry/fsimpl/proc.ifinet6", (*ifinet6)(nil), state.Fns{Save: (*ifinet6).save, Load: (*ifinet6).load})
+ state.Register("pkg/sentry/fsimpl/proc.netDevData", (*netDevData)(nil), state.Fns{Save: (*netDevData).save, Load: (*netDevData).load})
+ state.Register("pkg/sentry/fsimpl/proc.netUnixData", (*netUnixData)(nil), state.Fns{Save: (*netUnixData).save, Load: (*netUnixData).load})
+ state.Register("pkg/sentry/fsimpl/proc.netTCPData", (*netTCPData)(nil), state.Fns{Save: (*netTCPData).save, Load: (*netTCPData).load})
+ state.Register("pkg/sentry/fsimpl/proc.netTCP6Data", (*netTCP6Data)(nil), state.Fns{Save: (*netTCP6Data).save, Load: (*netTCP6Data).load})
+ state.Register("pkg/sentry/fsimpl/proc.netUDPData", (*netUDPData)(nil), state.Fns{Save: (*netUDPData).save, Load: (*netUDPData).load})
+ state.Register("pkg/sentry/fsimpl/proc.netSnmpData", (*netSnmpData)(nil), state.Fns{Save: (*netSnmpData).save, Load: (*netSnmpData).load})
+ state.Register("pkg/sentry/fsimpl/proc.netRouteData", (*netRouteData)(nil), state.Fns{Save: (*netRouteData).save, Load: (*netRouteData).load})
+ state.Register("pkg/sentry/fsimpl/proc.netStatData", (*netStatData)(nil), state.Fns{Save: (*netStatData).save, Load: (*netStatData).load})
+ state.Register("pkg/sentry/fsimpl/proc.tasksInode", (*tasksInode)(nil), state.Fns{Save: (*tasksInode).save, Load: (*tasksInode).load})
+ state.Register("pkg/sentry/fsimpl/proc.statData", (*statData)(nil), state.Fns{Save: (*statData).save, Load: (*statData).load})
+ state.Register("pkg/sentry/fsimpl/proc.loadavgData", (*loadavgData)(nil), state.Fns{Save: (*loadavgData).save, Load: (*loadavgData).load})
+ state.Register("pkg/sentry/fsimpl/proc.meminfoData", (*meminfoData)(nil), state.Fns{Save: (*meminfoData).save, Load: (*meminfoData).load})
+ state.Register("pkg/sentry/fsimpl/proc.uptimeData", (*uptimeData)(nil), state.Fns{Save: (*uptimeData).save, Load: (*uptimeData).load})
+ state.Register("pkg/sentry/fsimpl/proc.versionData", (*versionData)(nil), state.Fns{Save: (*versionData).save, Load: (*versionData).load})
+ state.Register("pkg/sentry/fsimpl/proc.filesystemsData", (*filesystemsData)(nil), state.Fns{Save: (*filesystemsData).save, Load: (*filesystemsData).load})
+ state.Register("pkg/sentry/fsimpl/proc.mmapMinAddrData", (*mmapMinAddrData)(nil), state.Fns{Save: (*mmapMinAddrData).save, Load: (*mmapMinAddrData).load})
+ state.Register("pkg/sentry/fsimpl/proc.hostnameData", (*hostnameData)(nil), state.Fns{Save: (*hostnameData).save, Load: (*hostnameData).load})
+ state.Register("pkg/sentry/fsimpl/proc.tcpSackData", (*tcpSackData)(nil), state.Fns{Save: (*tcpSackData).save, Load: (*tcpSackData).load})
+}
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
new file mode 100755
index 000000000..a21313666
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -0,0 +1,135 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "sort"
+ "strconv"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// subtasksInode represents the inode for /proc/[pid]/task/ directory.
+//
+// +stateify savable
+type subtasksInode struct {
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren
+ kernfs.InodeAttrs
+ kernfs.OrderedChildren
+ kernfs.AlwaysValid
+
+ task *kernel.Task
+ pidns *kernel.PIDNamespace
+ inoGen InoGenerator
+ cgroupControllers map[string]string
+}
+
+var _ kernfs.Inode = (*subtasksInode)(nil)
+
+func newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, inoGen InoGenerator, cgroupControllers map[string]string) *kernfs.Dentry {
+ subInode := &subtasksInode{
+ task: task,
+ pidns: pidns,
+ inoGen: inoGen,
+ cgroupControllers: cgroupControllers,
+ }
+ // Note: credentials are overridden by taskOwnedInode.
+ subInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+ subInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+
+ inode := &taskOwnedInode{Inode: subInode, owner: task}
+ dentry := &kernfs.Dentry{}
+ dentry.Init(inode)
+
+ return dentry
+}
+
+// Lookup implements kernfs.inodeDynamicLookup.
+func (i *subtasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
+ tid, err := strconv.ParseUint(name, 10, 32)
+ if err != nil {
+ return nil, syserror.ENOENT
+ }
+
+ subTask := i.pidns.TaskWithID(kernel.ThreadID(tid))
+ if subTask == nil {
+ return nil, syserror.ENOENT
+ }
+ if subTask.ThreadGroup() != i.task.ThreadGroup() {
+ return nil, syserror.ENOENT
+ }
+
+ subTaskDentry := newTaskInode(i.inoGen, subTask, i.pidns, false, i.cgroupControllers)
+ return subTaskDentry.VFSDentry(), nil
+}
+
+// IterDirents implements kernfs.inodeDynamicLookup.
+func (i *subtasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
+ tasks := i.task.ThreadGroup().MemberIDs(i.pidns)
+ if len(tasks) == 0 {
+ return offset, syserror.ENOENT
+ }
+
+ tids := make([]int, 0, len(tasks))
+ for _, tid := range tasks {
+ tids = append(tids, int(tid))
+ }
+
+ sort.Ints(tids)
+ for _, tid := range tids[relOffset:] {
+ dirent := vfs.Dirent{
+ Name: strconv.FormatUint(uint64(tid), 10),
+ Type: linux.DT_DIR,
+ Ino: i.inoGen.NextIno(),
+ NextOff: offset + 1,
+ }
+ if err := cb.Handle(dirent); err != nil {
+ return offset, err
+ }
+ offset++
+ }
+ return offset, nil
+}
+
+// Open implements kernfs.Inode.
+func (i *subtasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ fd := &kernfs.GenericDirectoryFD{}
+ fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
+ return fd.VFSFileDescription(), nil
+}
+
+// Stat implements kernfs.Inode.
+func (i *subtasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ stat, err := i.InodeAttrs.Stat(vsfs, opts)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+ if opts.Mask&linux.STATX_NLINK != 0 {
+ stat.Nlink += uint32(i.task.ThreadGroup().Count())
+ }
+ return stat, nil
+}
+
+// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
+func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
+ return syserror.EPERM
+}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
new file mode 100755
index 000000000..888afc0fd
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -0,0 +1,234 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+// taskInode represents the inode for /proc/PID/ directory.
+//
+// +stateify savable
+type taskInode struct {
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren
+ kernfs.InodeNoDynamicLookup
+ kernfs.InodeAttrs
+ kernfs.OrderedChildren
+
+ task *kernel.Task
+}
+
+var _ kernfs.Inode = (*taskInode)(nil)
+
+func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) *kernfs.Dentry {
+ contents := map[string]*kernfs.Dentry{
+ "auxv": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &auxvData{task: task}),
+ "cmdline": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}),
+ "comm": newComm(task, inoGen.NextIno(), 0444),
+ "environ": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}),
+ "exe": newExeSymlink(task, inoGen.NextIno()),
+ "fd": newFDDirInode(task, inoGen),
+ "fdinfo": newFDInfoDirInode(task, inoGen),
+ "gid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: true}),
+ "io": newTaskOwnedFile(task, inoGen.NextIno(), 0400, newIO(task, isThreadGroup)),
+ "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}),
+ "mountinfo": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mountInfoData{task: task}),
+ "mounts": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mountsData{task: task}),
+ "net": newTaskNetDir(task, inoGen),
+ "ns": newTaskOwnedDir(task, inoGen.NextIno(), 0511, map[string]*kernfs.Dentry{
+ "net": newNamespaceSymlink(task, inoGen.NextIno(), "net"),
+ "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"),
+ "user": newNamespaceSymlink(task, inoGen.NextIno(), "user"),
+ }),
+ "oom_score": newTaskOwnedFile(task, inoGen.NextIno(), 0444, newStaticFile("0\n")),
+ "oom_score_adj": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &oomScoreAdj{task: task}),
+ "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}),
+ "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
+ "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}),
+ "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
+ "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}),
+ }
+ if isThreadGroup {
+ contents["task"] = newSubtasks(task, pidns, inoGen, cgroupControllers)
+ }
+ if len(cgroupControllers) > 0 {
+ contents["cgroup"] = newTaskOwnedFile(task, inoGen.NextIno(), 0444, newCgroupData(cgroupControllers))
+ }
+
+ taskInode := &taskInode{task: task}
+ // Note: credentials are overridden by taskOwnedInode.
+ taskInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+
+ inode := &taskOwnedInode{Inode: taskInode, owner: task}
+ dentry := &kernfs.Dentry{}
+ dentry.Init(inode)
+
+ taskInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ links := taskInode.OrderedChildren.Populate(dentry, contents)
+ taskInode.IncLinks(links)
+
+ return dentry
+}
+
+// Valid implements kernfs.inodeDynamicLookup. This inode remains valid as long
+// as the task is still running. When it's dead, another tasks with the same
+// PID could replace it.
+func (i *taskInode) Valid(ctx context.Context) bool {
+ return i.task.ExitState() != kernel.TaskExitDead
+}
+
+// Open implements kernfs.Inode.
+func (i *taskInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ fd := &kernfs.GenericDirectoryFD{}
+ fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
+ return fd.VFSFileDescription(), nil
+}
+
+// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
+func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
+ return syserror.EPERM
+}
+
+// taskOwnedInode implements kernfs.Inode and overrides inode owner with task
+// effective user and group.
+type taskOwnedInode struct {
+ kernfs.Inode
+
+ // owner is the task that owns this inode.
+ owner *kernel.Task
+}
+
+var _ kernfs.Inode = (*taskOwnedInode)(nil)
+
+func newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
+ // Note: credentials are overridden by taskOwnedInode.
+ inode.Init(task.Credentials(), ino, inode, perm)
+
+ taskInode := &taskOwnedInode{Inode: inode, owner: task}
+ d := &kernfs.Dentry{}
+ d.Init(taskInode)
+ return d
+}
+
+func newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry {
+ dir := &kernfs.StaticDirectory{}
+
+ // Note: credentials are overridden by taskOwnedInode.
+ dir.Init(task.Credentials(), ino, perm)
+
+ inode := &taskOwnedInode{Inode: dir, owner: task}
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+
+ dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ links := dir.OrderedChildren.Populate(d, children)
+ dir.IncLinks(links)
+
+ return d
+}
+
+// Stat implements kernfs.Inode.
+func (i *taskOwnedInode) Stat(fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ stat, err := i.Inode.Stat(fs, opts)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+ if opts.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
+ uid, gid := i.getOwner(linux.FileMode(stat.Mode))
+ if opts.Mask&linux.STATX_UID != 0 {
+ stat.UID = uint32(uid)
+ }
+ if opts.Mask&linux.STATX_GID != 0 {
+ stat.GID = uint32(gid)
+ }
+ }
+ return stat, nil
+}
+
+// CheckPermissions implements kernfs.Inode.
+func (i *taskOwnedInode) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
+ mode := i.Mode()
+ uid, gid := i.getOwner(mode)
+ return vfs.GenericCheckPermissions(creds, ats, mode, uid, gid)
+}
+
+func (i *taskOwnedInode) getOwner(mode linux.FileMode) (auth.KUID, auth.KGID) {
+ // By default, set the task owner as the file owner.
+ creds := i.owner.Credentials()
+ uid := creds.EffectiveKUID
+ gid := creds.EffectiveKGID
+
+ // Linux doesn't apply dumpability adjustments to world readable/executable
+ // directories so that applications can stat /proc/PID to determine the
+ // effective UID of a process. See fs/proc/base.c:task_dump_owner.
+ if mode.FileType() == linux.ModeDirectory && mode.Permissions() == 0555 {
+ return uid, gid
+ }
+
+ // If the task is not dumpable, then root (in the namespace preferred)
+ // owns the file.
+ m := getMM(i.owner)
+ if m == nil {
+ return auth.RootKUID, auth.RootKGID
+ }
+ if m.Dumpability() != mm.UserDumpable {
+ uid = auth.RootKUID
+ if kuid := creds.UserNamespace.MapToKUID(auth.RootUID); kuid.Ok() {
+ uid = kuid
+ }
+ gid = auth.RootKGID
+ if kgid := creds.UserNamespace.MapToKGID(auth.RootGID); kgid.Ok() {
+ gid = kgid
+ }
+ }
+ return uid, gid
+}
+
+func newIO(t *kernel.Task, isThreadGroup bool) *ioData {
+ if isThreadGroup {
+ return &ioData{ioUsage: t.ThreadGroup()}
+ }
+ return &ioData{ioUsage: t}
+}
+
+// newCgroupData creates inode that shows cgroup information.
+// From man 7 cgroups: "For each cgroup hierarchy of which the process is a
+// member, there is one entry containing three colon-separated fields:
+// hierarchy-ID:controller-list:cgroup-path"
+func newCgroupData(controllers map[string]string) dynamicInode {
+ var buf bytes.Buffer
+
+ // The hierarchy ids must be positive integers (for cgroup v1), but the
+ // exact number does not matter, so long as they are unique. We can
+ // just use a counter, but since linux sorts this file in descending
+ // order, we must count down to preserve this behavior.
+ i := len(controllers)
+ for name, dir := range controllers {
+ fmt.Fprintf(&buf, "%d:%s:%s\n", i, name, dir)
+ i--
+ }
+ return newStaticFile(buf.String())
+}
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
new file mode 100755
index 000000000..046265eca
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -0,0 +1,302 @@
+// Copyright 2020 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "sort"
+ "strconv"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/refs"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+func getTaskFD(t *kernel.Task, fd int32) (*vfs.FileDescription, kernel.FDFlags) {
+ var (
+ file *vfs.FileDescription
+ flags kernel.FDFlags
+ )
+ t.WithMuLocked(func(t *kernel.Task) {
+ if fdt := t.FDTable(); fdt != nil {
+ file, flags = fdt.GetVFS2(fd)
+ }
+ })
+ return file, flags
+}
+
+func taskFDExists(t *kernel.Task, fd int32) bool {
+ file, _ := getTaskFD(t, fd)
+ if file == nil {
+ return false
+ }
+ file.DecRef()
+ return true
+}
+
+type fdDir struct {
+ inoGen InoGenerator
+ task *kernel.Task
+
+ // When produceSymlinks is set, dirents produces for the FDs are reported
+ // as symlink. Otherwise, they are reported as regular files.
+ produceSymlink bool
+}
+
+// IterDirents implements kernfs.inodeDynamicLookup.
+func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, absOffset, relOffset int64) (int64, error) {
+ var fds []int32
+ i.task.WithMuLocked(func(t *kernel.Task) {
+ if fdTable := t.FDTable(); fdTable != nil {
+ fds = fdTable.GetFDs()
+ }
+ })
+
+ offset := absOffset + relOffset
+ typ := uint8(linux.DT_REG)
+ if i.produceSymlink {
+ typ = linux.DT_LNK
+ }
+
+ // Find the appropriate starting point.
+ idx := sort.Search(len(fds), func(i int) bool { return fds[i] >= int32(relOffset) })
+ if idx >= len(fds) {
+ return offset, nil
+ }
+ for _, fd := range fds[idx:] {
+ dirent := vfs.Dirent{
+ Name: strconv.FormatUint(uint64(fd), 10),
+ Type: typ,
+ Ino: i.inoGen.NextIno(),
+ NextOff: offset + 1,
+ }
+ if err := cb.Handle(dirent); err != nil {
+ return offset, err
+ }
+ offset++
+ }
+ return offset, nil
+}
+
+// fdDirInode represents the inode for /proc/[pid]/fd directory.
+//
+// +stateify savable
+type fdDirInode struct {
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren
+ kernfs.InodeAttrs
+ kernfs.OrderedChildren
+ kernfs.AlwaysValid
+ fdDir
+}
+
+var _ kernfs.Inode = (*fdDirInode)(nil)
+
+func newFDDirInode(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
+ inode := &fdDirInode{
+ fdDir: fdDir{
+ inoGen: inoGen,
+ task: task,
+ produceSymlink: true,
+ },
+ }
+ inode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+
+ dentry := &kernfs.Dentry{}
+ dentry.Init(inode)
+ inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+
+ return dentry
+}
+
+// Lookup implements kernfs.inodeDynamicLookup.
+func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
+ fdInt, err := strconv.ParseInt(name, 10, 32)
+ if err != nil {
+ return nil, syserror.ENOENT
+ }
+ fd := int32(fdInt)
+ if !taskFDExists(i.task, fd) {
+ return nil, syserror.ENOENT
+ }
+ taskDentry := newFDSymlink(i.task, fd, i.inoGen.NextIno())
+ return taskDentry.VFSDentry(), nil
+}
+
+// Open implements kernfs.Inode.
+func (i *fdDirInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ fd := &kernfs.GenericDirectoryFD{}
+ fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
+ return fd.VFSFileDescription(), nil
+}
+
+// CheckPermissions implements kernfs.Inode.
+//
+// This is to match Linux, which uses a special permission handler to guarantee
+// that a process can still access /proc/self/fd after it has executed
+// setuid. See fs/proc/fd.c:proc_fd_permission.
+func (i *fdDirInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
+ err := i.InodeAttrs.CheckPermissions(ctx, creds, ats)
+ if err == nil {
+ // Access granted, no extra check needed.
+ return nil
+ }
+ if t := kernel.TaskFromContext(ctx); t != nil {
+ // Allow access if the task trying to access it is in the thread group
+ // corresponding to this directory.
+ if i.task.ThreadGroup() == t.ThreadGroup() {
+ // Access granted (overridden).
+ return nil
+ }
+ }
+ return err
+}
+
+// fdSymlink is an symlink for the /proc/[pid]/fd/[fd] file.
+//
+// +stateify savable
+type fdSymlink struct {
+ kernfs.InodeAttrs
+ kernfs.InodeNoopRefCount
+ kernfs.InodeSymlink
+
+ task *kernel.Task
+ fd int32
+}
+
+var _ kernfs.Inode = (*fdSymlink)(nil)
+
+func newFDSymlink(task *kernel.Task, fd int32, ino uint64) *kernfs.Dentry {
+ inode := &fdSymlink{
+ task: task,
+ fd: fd,
+ }
+ inode.Init(task.Credentials(), ino, linux.ModeSymlink|0777)
+
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+ return d
+}
+
+func (s *fdSymlink) Readlink(ctx context.Context) (string, error) {
+ file, _ := getTaskFD(s.task, s.fd)
+ if file == nil {
+ return "", syserror.ENOENT
+ }
+ defer file.DecRef()
+ root := vfs.RootFromContext(ctx)
+ defer root.DecRef()
+ return s.task.Kernel().VFS().PathnameWithDeleted(ctx, root, file.VirtualDentry())
+}
+
+func (s *fdSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) {
+ file, _ := getTaskFD(s.task, s.fd)
+ if file == nil {
+ return vfs.VirtualDentry{}, "", syserror.ENOENT
+ }
+ defer file.DecRef()
+ vd := file.VirtualDentry()
+ vd.IncRef()
+ return vd, "", nil
+}
+
+// fdInfoDirInode represents the inode for /proc/[pid]/fdinfo directory.
+//
+// +stateify savable
+type fdInfoDirInode struct {
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren
+ kernfs.InodeAttrs
+ kernfs.OrderedChildren
+ kernfs.AlwaysValid
+ fdDir
+}
+
+var _ kernfs.Inode = (*fdInfoDirInode)(nil)
+
+func newFDInfoDirInode(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
+ inode := &fdInfoDirInode{
+ fdDir: fdDir{
+ inoGen: inoGen,
+ task: task,
+ },
+ }
+ inode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
+
+ dentry := &kernfs.Dentry{}
+ dentry.Init(inode)
+ inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+
+ return dentry
+}
+
+// Lookup implements kernfs.inodeDynamicLookup.
+func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
+ fdInt, err := strconv.ParseInt(name, 10, 32)
+ if err != nil {
+ return nil, syserror.ENOENT
+ }
+ fd := int32(fdInt)
+ if !taskFDExists(i.task, fd) {
+ return nil, syserror.ENOENT
+ }
+ data := &fdInfoData{
+ task: i.task,
+ fd: fd,
+ }
+ dentry := newTaskOwnedFile(i.task, i.inoGen.NextIno(), 0444, data)
+ return dentry.VFSDentry(), nil
+}
+
+// Open implements kernfs.Inode.
+func (i *fdInfoDirInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ fd := &kernfs.GenericDirectoryFD{}
+ fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
+ return fd.VFSFileDescription(), nil
+}
+
+// fdInfoData implements vfs.DynamicBytesSource for /proc/[pid]/fdinfo/[fd].
+//
+// +stateify savable
+type fdInfoData struct {
+ kernfs.DynamicBytesFile
+ refs.AtomicRefCount
+
+ task *kernel.Task
+ fd int32
+}
+
+var _ dynamicInode = (*fdInfoData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ file, descriptorFlags := getTaskFD(d.task, d.fd)
+ if file == nil {
+ return syserror.ENOENT
+ }
+ defer file.DecRef()
+ // TODO(b/121266871): Include pos, locks, and other data. For now we only
+ // have flags.
+ // See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+ flags := uint(file.StatusFlags()) | descriptorFlags.ToLinuxFileFlags()
+ fmt.Fprintf(buf, "flags:\t0%o\n", flags)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
new file mode 100755
index 000000000..2c6f8bdfc
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -0,0 +1,761 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/safemem"
+ "gvisor.dev/gvisor/pkg/sentry/fsbridge"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/limits"
+ "gvisor.dev/gvisor/pkg/sentry/mm"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// mm gets the kernel task's MemoryManager. No additional reference is taken on
+// mm here. This is safe because MemoryManager.destroy is required to leave the
+// MemoryManager in a state where it's still usable as a DynamicBytesSource.
+func getMM(task *kernel.Task) *mm.MemoryManager {
+ var tmm *mm.MemoryManager
+ task.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ tmm = mm
+ }
+ })
+ return tmm
+}
+
+// getMMIncRef returns t's MemoryManager. If getMMIncRef succeeds, the
+// MemoryManager's users count is incremented, and must be decremented by the
+// caller when it is no longer in use.
+func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) {
+ if task.ExitState() == kernel.TaskExitDead {
+ return nil, syserror.ESRCH
+ }
+ var m *mm.MemoryManager
+ task.WithMuLocked(func(t *kernel.Task) {
+ m = t.MemoryManager()
+ })
+ if m == nil || !m.IncUsers() {
+ return nil, io.EOF
+ }
+ return m, nil
+}
+
+func checkTaskState(t *kernel.Task) error {
+ switch t.ExitState() {
+ case kernel.TaskExitZombie:
+ return syserror.EACCES
+ case kernel.TaskExitDead:
+ return syserror.ESRCH
+ }
+ return nil
+}
+
+type bufferWriter struct {
+ buf *bytes.Buffer
+}
+
+// WriteFromBlocks writes up to srcs.NumBytes() bytes from srcs and returns
+// the number of bytes written. It may return a partial write without an
+// error (i.e. (n, nil) where 0 < n < srcs.NumBytes()). It should not
+// return a full write with an error (i.e. srcs.NumBytes(), err) where err
+// != nil).
+func (w *bufferWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
+ written := srcs.NumBytes()
+ for !srcs.IsEmpty() {
+ w.buf.Write(srcs.Head().ToSlice())
+ srcs = srcs.Tail()
+ }
+ return written, nil
+}
+
+// auxvData implements vfs.DynamicBytesSource for /proc/[pid]/auxv.
+//
+// +stateify savable
+type auxvData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*auxvData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ m, err := getMMIncRef(d.task)
+ if err != nil {
+ return err
+ }
+ defer m.DecUsers(ctx)
+
+ // Space for buffer with AT_NULL (0) terminator at the end.
+ auxv := m.Auxv()
+ buf.Grow((len(auxv) + 1) * 16)
+ for _, e := range auxv {
+ var tmp [8]byte
+ usermem.ByteOrder.PutUint64(tmp[:], e.Key)
+ buf.Write(tmp[:])
+
+ usermem.ByteOrder.PutUint64(tmp[:], uint64(e.Value))
+ buf.Write(tmp[:])
+ }
+ return nil
+}
+
+// execArgType enumerates the types of exec arguments that are exposed through
+// proc.
+type execArgType int
+
+const (
+ cmdlineDataArg execArgType = iota
+ environDataArg
+)
+
+// cmdlineData implements vfs.DynamicBytesSource for /proc/[pid]/cmdline.
+//
+// +stateify savable
+type cmdlineData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+
+ // arg is the type of exec argument this file contains.
+ arg execArgType
+}
+
+var _ dynamicInode = (*cmdlineData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ m, err := getMMIncRef(d.task)
+ if err != nil {
+ return err
+ }
+ defer m.DecUsers(ctx)
+
+ // Figure out the bounds of the exec arg we are trying to read.
+ var ar usermem.AddrRange
+ switch d.arg {
+ case cmdlineDataArg:
+ ar = usermem.AddrRange{
+ Start: m.ArgvStart(),
+ End: m.ArgvEnd(),
+ }
+ case environDataArg:
+ ar = usermem.AddrRange{
+ Start: m.EnvvStart(),
+ End: m.EnvvEnd(),
+ }
+ default:
+ panic(fmt.Sprintf("unknown exec arg type %v", d.arg))
+ }
+ if ar.Start == 0 || ar.End == 0 {
+ // Don't attempt to read before the start/end are set up.
+ return io.EOF
+ }
+
+ // N.B. Technically this should be usermem.IOOpts.IgnorePermissions = true
+ // until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading
+ // cmdline and environment").
+ writer := &bufferWriter{buf: buf}
+ if n, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil {
+ // Nothing to copy or something went wrong.
+ return err
+ }
+
+ // On Linux, if the NULL byte at the end of the argument vector has been
+ // overwritten, it continues reading the environment vector as part of
+ // the argument vector.
+ if d.arg == cmdlineDataArg && buf.Bytes()[buf.Len()-1] != 0 {
+ if end := bytes.IndexByte(buf.Bytes(), 0); end != -1 {
+ // If we found a NULL character somewhere else in argv, truncate the
+ // return up to the NULL terminator (including it).
+ buf.Truncate(end)
+ return nil
+ }
+
+ // There is no NULL terminator in the string, return into envp.
+ arEnvv := usermem.AddrRange{
+ Start: m.EnvvStart(),
+ End: m.EnvvEnd(),
+ }
+
+ // Upstream limits the returned amount to one page of slop.
+ // https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208
+ // we'll return one page total between argv and envp because of the
+ // above page restrictions.
+ if buf.Len() >= usermem.PageSize {
+ // Returned at least one page already, nothing else to add.
+ return nil
+ }
+ remaining := usermem.PageSize - buf.Len()
+ if int(arEnvv.Length()) > remaining {
+ end, ok := arEnvv.Start.AddLength(uint64(remaining))
+ if !ok {
+ return syserror.EFAULT
+ }
+ arEnvv.End = end
+ }
+ if _, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil {
+ return err
+ }
+
+ // Linux will return envp up to and including the first NULL character,
+ // so find it.
+ if end := bytes.IndexByte(buf.Bytes()[ar.Length():], 0); end != -1 {
+ buf.Truncate(end)
+ }
+ }
+
+ return nil
+}
+
+// +stateify savable
+type commInode struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+func newComm(task *kernel.Task, ino uint64, perm linux.FileMode) *kernfs.Dentry {
+ inode := &commInode{task: task}
+ inode.DynamicBytesFile.Init(task.Credentials(), ino, &commData{task: task}, perm)
+
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+ return d
+}
+
+func (i *commInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
+ // This file can always be read or written by members of the same thread
+ // group. See fs/proc/base.c:proc_tid_comm_permission.
+ //
+ // N.B. This check is currently a no-op as we don't yet support writing and
+ // this file is world-readable anyways.
+ t := kernel.TaskFromContext(ctx)
+ if t != nil && t.ThreadGroup() == i.task.ThreadGroup() && !ats.MayExec() {
+ return nil
+ }
+
+ return i.DynamicBytesFile.CheckPermissions(ctx, creds, ats)
+}
+
+// commData implements vfs.DynamicBytesSource for /proc/[pid]/comm.
+//
+// +stateify savable
+type commData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*commData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *commData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString(d.task.Name())
+ buf.WriteString("\n")
+ return nil
+}
+
+// idMapData implements vfs.DynamicBytesSource for /proc/[pid]/{gid_map|uid_map}.
+//
+// +stateify savable
+type idMapData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+ gids bool
+}
+
+var _ dynamicInode = (*idMapData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ var entries []auth.IDMapEntry
+ if d.gids {
+ entries = d.task.UserNamespace().GIDMap()
+ } else {
+ entries = d.task.UserNamespace().UIDMap()
+ }
+ for _, e := range entries {
+ fmt.Fprintf(buf, "%10d %10d %10d\n", e.FirstID, e.FirstParentID, e.Length)
+ }
+ return nil
+}
+
+// mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps.
+//
+// +stateify savable
+type mapsData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*mapsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *mapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ if mm := getMM(d.task); mm != nil {
+ mm.ReadMapsDataInto(ctx, buf)
+ }
+ return nil
+}
+
+// smapsData implements vfs.DynamicBytesSource for /proc/[pid]/smaps.
+//
+// +stateify savable
+type smapsData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*smapsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *smapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ if mm := getMM(d.task); mm != nil {
+ mm.ReadSmapsDataInto(ctx, buf)
+ }
+ return nil
+}
+
+// +stateify savable
+type taskStatData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+
+ // If tgstats is true, accumulate fault stats (not implemented) and CPU
+ // time across all tasks in t's thread group.
+ tgstats bool
+
+ // pidns is the PID namespace associated with the proc filesystem that
+ // includes the file using this statData.
+ pidns *kernel.PIDNamespace
+}
+
+var _ dynamicInode = (*taskStatData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfTask(s.task))
+ fmt.Fprintf(buf, "(%s) ", s.task.Name())
+ fmt.Fprintf(buf, "%c ", s.task.StateStatus()[0])
+ ppid := kernel.ThreadID(0)
+ if parent := s.task.Parent(); parent != nil {
+ ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
+ }
+ fmt.Fprintf(buf, "%d ", ppid)
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfProcessGroup(s.task.ThreadGroup().ProcessGroup()))
+ fmt.Fprintf(buf, "%d ", s.pidns.IDOfSession(s.task.ThreadGroup().Session()))
+ fmt.Fprintf(buf, "0 0 " /* tty_nr tpgid */)
+ fmt.Fprintf(buf, "0 " /* flags */)
+ fmt.Fprintf(buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */)
+ var cputime usage.CPUStats
+ if s.tgstats {
+ cputime = s.task.ThreadGroup().CPUStats()
+ } else {
+ cputime = s.task.CPUStats()
+ }
+ fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
+ cputime = s.task.ThreadGroup().JoinedChildCPUStats()
+ fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
+ fmt.Fprintf(buf, "%d %d ", s.task.Priority(), s.task.Niceness())
+ fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Count())
+
+ // itrealvalue. Since kernel 2.6.17, this field is no longer
+ // maintained, and is hard coded as 0.
+ fmt.Fprintf(buf, "0 ")
+
+ // Start time is relative to boot time, expressed in clock ticks.
+ fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.task.StartTime().Sub(s.task.Kernel().Timekeeper().BootTime())))
+
+ var vss, rss uint64
+ s.task.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
+ })
+ fmt.Fprintf(buf, "%d %d ", vss, rss/usermem.PageSize)
+
+ // rsslim.
+ fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Limits().Get(limits.Rss).Cur)
+
+ fmt.Fprintf(buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */)
+ fmt.Fprintf(buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */)
+ fmt.Fprintf(buf, "0 0 " /* nswap cnswap */)
+ terminationSignal := linux.Signal(0)
+ if s.task == s.task.ThreadGroup().Leader() {
+ terminationSignal = s.task.ThreadGroup().TerminationSignal()
+ }
+ fmt.Fprintf(buf, "%d ", terminationSignal)
+ fmt.Fprintf(buf, "0 0 0 " /* processor rt_priority policy */)
+ fmt.Fprintf(buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */)
+ fmt.Fprintf(buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */)
+ fmt.Fprintf(buf, "0\n" /* exit_code */)
+
+ return nil
+}
+
+// statmData implements vfs.DynamicBytesSource for /proc/[pid]/statm.
+//
+// +stateify savable
+type statmData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*statmData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ var vss, rss uint64
+ s.task.WithMuLocked(func(t *kernel.Task) {
+ if mm := t.MemoryManager(); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ }
+ })
+
+ fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize)
+ return nil
+}
+
+// statusData implements vfs.DynamicBytesSource for /proc/[pid]/status.
+//
+// +stateify savable
+type statusData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+ pidns *kernel.PIDNamespace
+}
+
+var _ dynamicInode = (*statusData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "Name:\t%s\n", s.task.Name())
+ fmt.Fprintf(buf, "State:\t%s\n", s.task.StateStatus())
+ fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.task.ThreadGroup()))
+ fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.task))
+ ppid := kernel.ThreadID(0)
+ if parent := s.task.Parent(); parent != nil {
+ ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
+ }
+ fmt.Fprintf(buf, "PPid:\t%d\n", ppid)
+ tpid := kernel.ThreadID(0)
+ if tracer := s.task.Tracer(); tracer != nil {
+ tpid = s.pidns.IDOfTask(tracer)
+ }
+ fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid)
+ var fds int
+ var vss, rss, data uint64
+ s.task.WithMuLocked(func(t *kernel.Task) {
+ if fdTable := t.FDTable(); fdTable != nil {
+ fds = fdTable.Size()
+ }
+ if mm := t.MemoryManager(); mm != nil {
+ vss = mm.VirtualMemorySize()
+ rss = mm.ResidentSetSize()
+ data = mm.VirtualDataSize()
+ }
+ })
+ fmt.Fprintf(buf, "FDSize:\t%d\n", fds)
+ fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10)
+ fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10)
+ fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10)
+ fmt.Fprintf(buf, "Threads:\t%d\n", s.task.ThreadGroup().Count())
+ creds := s.task.Credentials()
+ fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps)
+ fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
+ fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
+ fmt.Fprintf(buf, "CapBnd:\t%016x\n", creds.BoundingCaps)
+ fmt.Fprintf(buf, "Seccomp:\t%d\n", s.task.SeccompMode())
+ // We unconditionally report a single NUMA node. See
+ // pkg/sentry/syscalls/linux/sys_mempolicy.go.
+ fmt.Fprintf(buf, "Mems_allowed:\t1\n")
+ fmt.Fprintf(buf, "Mems_allowed_list:\t0\n")
+ return nil
+}
+
+// ioUsage is the /proc/[pid]/io and /proc/[pid]/task/[tid]/io data provider.
+type ioUsage interface {
+ // IOUsage returns the io usage data.
+ IOUsage() *usage.IO
+}
+
+// +stateify savable
+type ioData struct {
+ kernfs.DynamicBytesFile
+
+ ioUsage
+}
+
+var _ dynamicInode = (*ioData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (i *ioData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ io := usage.IO{}
+ io.Accumulate(i.IOUsage())
+
+ fmt.Fprintf(buf, "char: %d\n", io.CharsRead)
+ fmt.Fprintf(buf, "wchar: %d\n", io.CharsWritten)
+ fmt.Fprintf(buf, "syscr: %d\n", io.ReadSyscalls)
+ fmt.Fprintf(buf, "syscw: %d\n", io.WriteSyscalls)
+ fmt.Fprintf(buf, "read_bytes: %d\n", io.BytesRead)
+ fmt.Fprintf(buf, "write_bytes: %d\n", io.BytesWritten)
+ fmt.Fprintf(buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled)
+ return nil
+}
+
+// oomScoreAdj is a stub of the /proc/<pid>/oom_score_adj file.
+//
+// +stateify savable
+type oomScoreAdj struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ if o.task.ExitState() == kernel.TaskExitDead {
+ return syserror.ESRCH
+ }
+ fmt.Fprintf(buf, "%d\n", o.task.OOMScoreAdj())
+ return nil
+}
+
+// Write implements vfs.WritableDynamicBytesSource.Write.
+func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+
+ // Limit input size so as not to impact performance if input size is large.
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ var v int32
+ n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+ if err != nil {
+ return 0, err
+ }
+
+ if o.task.ExitState() == kernel.TaskExitDead {
+ return 0, syserror.ESRCH
+ }
+ if err := o.task.SetOOMScoreAdj(v); err != nil {
+ return 0, err
+ }
+
+ return n, nil
+}
+
+// exeSymlink is an symlink for the /proc/[pid]/exe file.
+//
+// +stateify savable
+type exeSymlink struct {
+ kernfs.InodeAttrs
+ kernfs.InodeNoopRefCount
+ kernfs.InodeSymlink
+
+ task *kernel.Task
+}
+
+var _ kernfs.Inode = (*exeSymlink)(nil)
+
+func newExeSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry {
+ inode := &exeSymlink{task: task}
+ inode.Init(task.Credentials(), ino, linux.ModeSymlink|0777)
+
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+ return d
+}
+
+// Readlink implements kernfs.Inode.
+func (s *exeSymlink) Readlink(ctx context.Context) (string, error) {
+ if !kernel.ContextCanTrace(ctx, s.task, false) {
+ return "", syserror.EACCES
+ }
+
+ // Pull out the executable for /proc/[pid]/exe.
+ exec, err := s.executable()
+ if err != nil {
+ return "", err
+ }
+ defer exec.DecRef()
+
+ return exec.PathnameWithDeleted(ctx), nil
+}
+
+// Getlink implements kernfs.Inode.Getlink.
+func (s *exeSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) {
+ if !kernel.ContextCanTrace(ctx, s.task, false) {
+ return vfs.VirtualDentry{}, "", syserror.EACCES
+ }
+
+ exec, err := s.executable()
+ if err != nil {
+ return vfs.VirtualDentry{}, "", err
+ }
+ defer exec.DecRef()
+
+ vd := exec.(*fsbridge.VFSFile).FileDescription().VirtualDentry()
+ vd.IncRef()
+ return vd, "", nil
+}
+
+func (s *exeSymlink) executable() (file fsbridge.File, err error) {
+ if err := checkTaskState(s.task); err != nil {
+ return nil, err
+ }
+
+ s.task.WithMuLocked(func(t *kernel.Task) {
+ mm := t.MemoryManager()
+ if mm == nil {
+ err = syserror.EACCES
+ return
+ }
+
+ // The MemoryManager may be destroyed, in which case
+ // MemoryManager.destroy will simply set the executable to nil
+ // (with locks held).
+ file = mm.Executable()
+ if file == nil {
+ err = syserror.ESRCH
+ }
+ })
+ return
+}
+
+// mountInfoData is used to implement /proc/[pid]/mountinfo.
+//
+// +stateify savable
+type mountInfoData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*mountInfoData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ var fsctx *kernel.FSContext
+ i.task.WithMuLocked(func(t *kernel.Task) {
+ fsctx = t.FSContext()
+ })
+ if fsctx == nil {
+ // The task has been destroyed. Nothing to show here.
+ return nil
+ }
+ rootDir := fsctx.RootDirectoryVFS2()
+ if !rootDir.Ok() {
+ // Root has been destroyed. Don't try to read mounts.
+ return nil
+ }
+ defer rootDir.DecRef()
+ i.task.Kernel().VFS().GenerateProcMountInfo(ctx, rootDir, buf)
+ return nil
+}
+
+// mountsData is used to implement /proc/[pid]/mounts.
+//
+// +stateify savable
+type mountsData struct {
+ kernfs.DynamicBytesFile
+
+ task *kernel.Task
+}
+
+var _ dynamicInode = (*mountsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ var fsctx *kernel.FSContext
+ i.task.WithMuLocked(func(t *kernel.Task) {
+ fsctx = t.FSContext()
+ })
+ if fsctx == nil {
+ // The task has been destroyed. Nothing to show here.
+ return nil
+ }
+ rootDir := fsctx.RootDirectoryVFS2()
+ if !rootDir.Ok() {
+ // Root has been destroyed. Don't try to read mounts.
+ return nil
+ }
+ defer rootDir.DecRef()
+ i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf)
+ return nil
+}
+
+type namespaceSymlink struct {
+ kernfs.StaticSymlink
+
+ task *kernel.Task
+}
+
+func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry {
+ // Namespace symlinks should contain the namespace name and the inode number
+ // for the namespace instance, so for example user:[123456]. We currently fake
+ // the inode number by sticking the symlink inode in its place.
+ target := fmt.Sprintf("%s:[%d]", ns, ino)
+
+ inode := &namespaceSymlink{task: task}
+ // Note: credentials are overridden by taskOwnedInode.
+ inode.Init(task.Credentials(), ino, target)
+
+ taskInode := &taskOwnedInode{Inode: inode, owner: task}
+ d := &kernfs.Dentry{}
+ d.Init(taskInode)
+ return d
+}
+
+// Readlink implements Inode.
+func (s *namespaceSymlink) Readlink(ctx context.Context) (string, error) {
+ if err := checkTaskState(s.task); err != nil {
+ return "", err
+ }
+ return s.StaticSymlink.Readlink(ctx)
+}
+
+// Getlink implements Inode.Getlink.
+func (s *namespaceSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) {
+ if err := checkTaskState(s.task); err != nil {
+ return vfs.VirtualDentry{}, "", err
+ }
+ return s.StaticSymlink.Getlink(ctx)
+}
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
new file mode 100755
index 000000000..6595fcee6
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/task_net.go
@@ -0,0 +1,808 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "reflect"
+ "time"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/log"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/socket"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix"
+ "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/tcpip/header"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+func newTaskNetDir(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
+ k := task.Kernel()
+ pidns := task.PIDNamespace()
+ root := auth.NewRootCredentials(pidns.UserNamespace())
+
+ var contents map[string]*kernfs.Dentry
+ if stack := task.NetworkNamespace().Stack(); stack != nil {
+ const (
+ arp = "IP address HW type Flags HW address Mask Device\n"
+ netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n"
+ packet = "sk RefCnt Type Proto Iface R Rmem User Inode\n"
+ protocols = "protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"
+ ptype = "Type Device Function\n"
+ upd6 = " sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n"
+ )
+ psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond))
+
+ // TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task
+ // network namespace.
+ contents = map[string]*kernfs.Dentry{
+ "dev": newDentry(root, inoGen.NextIno(), 0444, &netDevData{stack: stack}),
+ "snmp": newDentry(root, inoGen.NextIno(), 0444, &netSnmpData{stack: stack}),
+
+ // The following files are simple stubs until they are implemented in
+ // netstack, if the file contains a header the stub is just the header
+ // otherwise it is an empty file.
+ "arp": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(arp)),
+ "netlink": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(netlink)),
+ "netstat": newDentry(root, inoGen.NextIno(), 0444, &netStatData{}),
+ "packet": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(packet)),
+ "protocols": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(protocols)),
+
+ // Linux sets psched values to: nsec per usec, psched tick in ns, 1000000,
+ // high res timer ticks per sec (ClockGetres returns 1ns resolution).
+ "psched": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(psched)),
+ "ptype": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(ptype)),
+ "route": newDentry(root, inoGen.NextIno(), 0444, &netRouteData{stack: stack}),
+ "tcp": newDentry(root, inoGen.NextIno(), 0444, &netTCPData{kernel: k}),
+ "udp": newDentry(root, inoGen.NextIno(), 0444, &netUDPData{kernel: k}),
+ "unix": newDentry(root, inoGen.NextIno(), 0444, &netUnixData{kernel: k}),
+ }
+
+ if stack.SupportsIPv6() {
+ contents["if_inet6"] = newDentry(root, inoGen.NextIno(), 0444, &ifinet6{stack: stack})
+ contents["ipv6_route"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(""))
+ contents["tcp6"] = newDentry(root, inoGen.NextIno(), 0444, &netTCP6Data{kernel: k})
+ contents["udp6"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(upd6))
+ }
+ }
+
+ return newTaskOwnedDir(task, inoGen.NextIno(), 0555, contents)
+}
+
+// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
+//
+// +stateify savable
+type ifinet6 struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*ifinet6)(nil)
+
+func (n *ifinet6) contents() []string {
+ var lines []string
+ nics := n.stack.Interfaces()
+ for id, naddrs := range n.stack.InterfaceAddrs() {
+ nic, ok := nics[id]
+ if !ok {
+ // NIC was added after NICNames was called. We'll just ignore it.
+ continue
+ }
+
+ for _, a := range naddrs {
+ // IPv6 only.
+ if a.Family != linux.AF_INET6 {
+ continue
+ }
+
+ // Fields:
+ // IPv6 address displayed in 32 hexadecimal chars without colons
+ // Netlink device number (interface index) in hexadecimal (use nic id)
+ // Prefix length in hexadecimal
+ // Scope value (use 0)
+ // Interface flags
+ // Device name
+ lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
+ }
+ }
+ return lines
+}
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ for _, l := range n.contents() {
+ buf.WriteString(l)
+ }
+ return nil
+}
+
+// netDevData implements vfs.DynamicBytesSource for /proc/net/dev.
+//
+// +stateify savable
+type netDevData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*netDevData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *netDevData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ interfaces := n.stack.Interfaces()
+ buf.WriteString("Inter-| Receive | Transmit\n")
+ buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n")
+
+ for _, i := range interfaces {
+ // Implements the same format as
+ // net/core/net-procfs.c:dev_seq_printf_stats.
+ var stats inet.StatDev
+ if err := n.stack.Statistics(&stats, i.Name); err != nil {
+ log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err)
+ continue
+ }
+ fmt.Fprintf(
+ buf,
+ "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
+ i.Name,
+ // Received
+ stats[0], // bytes
+ stats[1], // packets
+ stats[2], // errors
+ stats[3], // dropped
+ stats[4], // fifo
+ stats[5], // frame
+ stats[6], // compressed
+ stats[7], // multicast
+ // Transmitted
+ stats[8], // bytes
+ stats[9], // packets
+ stats[10], // errors
+ stats[11], // dropped
+ stats[12], // fifo
+ stats[13], // frame
+ stats[14], // compressed
+ stats[15], // multicast
+ )
+ }
+
+ return nil
+}
+
+// netUnixData implements vfs.DynamicBytesSource for /proc/net/unix.
+//
+// +stateify savable
+type netUnixData struct {
+ kernfs.DynamicBytesFile
+
+ kernel *kernel.Kernel
+}
+
+var _ dynamicInode = (*netUnixData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n")
+ for _, se := range n.kernel.ListSockets() {
+ s := se.SockVFS2
+ if !s.TryIncRef() {
+ log.Debugf("Couldn't get reference on %v in socket table, racing with destruction?", s)
+ continue
+ }
+ if family, _, _ := s.Impl().(socket.SocketVFS2).Type(); family != linux.AF_UNIX {
+ s.DecRef()
+ // Not a unix socket.
+ continue
+ }
+ sops := s.Impl().(*unix.SocketVFS2)
+
+ addr, err := sops.Endpoint().GetLocalAddress()
+ if err != nil {
+ log.Warningf("Failed to retrieve socket name from %+v: %v", s, err)
+ addr.Addr = "<unknown>"
+ }
+
+ sockFlags := 0
+ if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok {
+ if ce.Listening() {
+ // For unix domain sockets, linux reports a single flag
+ // value if the socket is listening, of __SO_ACCEPTCON.
+ sockFlags = linux.SO_ACCEPTCON
+ }
+ }
+
+ // Get inode number.
+ var ino uint64
+ stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_INO})
+ if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
+ log.Warningf("Failed to retrieve ino for socket file: %v", statErr)
+ } else {
+ ino = stat.Ino
+ }
+
+ // In the socket entry below, the value for the 'Num' field requires
+ // some consideration. Linux prints the address to the struct
+ // unix_sock representing a socket in the kernel, but may redact the
+ // value for unprivileged users depending on the kptr_restrict
+ // sysctl.
+ //
+ // One use for this field is to allow a privileged user to
+ // introspect into the kernel memory to determine information about
+ // a socket not available through procfs, such as the socket's peer.
+ //
+ // In gvisor, returning a pointer to our internal structures would
+ // be pointless, as it wouldn't match the memory layout for struct
+ // unix_sock, making introspection difficult. We could populate a
+ // struct unix_sock with the appropriate data, but even that
+ // requires consideration for which kernel version to emulate, as
+ // the definition of this struct changes over time.
+ //
+ // For now, we always redact this pointer.
+ fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %8d",
+ (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
+ s.Refs()-1, // RefCount, don't count our own ref.
+ 0, // Protocol, always 0 for UDS.
+ sockFlags, // Flags.
+ sops.Endpoint().Type(), // Type.
+ sops.State(), // State.
+ ino, // Inode.
+ )
+
+ // Path
+ if len(addr.Addr) != 0 {
+ if addr.Addr[0] == 0 {
+ // Abstract path.
+ fmt.Fprintf(buf, " @%s", string(addr.Addr[1:]))
+ } else {
+ fmt.Fprintf(buf, " %s", string(addr.Addr))
+ }
+ }
+ fmt.Fprintf(buf, "\n")
+
+ s.DecRef()
+ }
+ return nil
+}
+
+func networkToHost16(n uint16) uint16 {
+ // n is in network byte order, so is big-endian. The most-significant byte
+ // should be stored in the lower address.
+ //
+ // We manually inline binary.BigEndian.Uint16() because Go does not support
+ // non-primitive consts, so binary.BigEndian is a (mutable) var, so calls to
+ // binary.BigEndian.Uint16() require a read of binary.BigEndian and an
+ // interface method call, defeating inlining.
+ buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)}
+ return usermem.ByteOrder.Uint16(buf[:])
+}
+
+func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
+ switch family {
+ case linux.AF_INET:
+ var a linux.SockAddrInet
+ if i != nil {
+ a = *i.(*linux.SockAddrInet)
+ }
+
+ // linux.SockAddrInet.Port is stored in the network byte order and is
+ // printed like a number in host byte order. Note that all numbers in host
+ // byte order are printed with the most-significant byte first when
+ // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux.
+ port := networkToHost16(a.Port)
+
+ // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order
+ // (i.e. most-significant byte in index 0). Linux represents this as a
+ // __be32 which is a typedef for an unsigned int, and is printed with
+ // %X. This means that for a little-endian machine, Linux prints the
+ // least-significant byte of the address first. To emulate this, we first
+ // invert the byte order for the address using usermem.ByteOrder.Uint32,
+ // which makes it have the equivalent encoding to a __be32 on a little
+ // endian machine. Note that this operation is a no-op on a big endian
+ // machine. Then similar to Linux, we format it with %X, which will print
+ // the most-significant byte of the __be32 address first, which is now
+ // actually the least-significant byte of the original address in
+ // linux.SockAddrInet.Addr on little endian machines, due to the conversion.
+ addr := usermem.ByteOrder.Uint32(a.Addr[:])
+
+ fmt.Fprintf(w, "%08X:%04X ", addr, port)
+ case linux.AF_INET6:
+ var a linux.SockAddrInet6
+ if i != nil {
+ a = *i.(*linux.SockAddrInet6)
+ }
+
+ port := networkToHost16(a.Port)
+ addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4])
+ addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8])
+ addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12])
+ addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16])
+ fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port)
+ }
+}
+
+func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error {
+ // t may be nil here if our caller is not part of a task goroutine. This can
+ // happen for example if we're here for "sentryctl cat". When t is nil,
+ // degrade gracefully and retrieve what we can.
+ t := kernel.TaskFromContext(ctx)
+
+ for _, se := range k.ListSockets() {
+ s := se.SockVFS2
+ if !s.TryIncRef() {
+ log.Debugf("Couldn't get reference on %v in socket table, racing with destruction?", s)
+ continue
+ }
+ sops, ok := s.Impl().(socket.SocketVFS2)
+ if !ok {
+ panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s))
+ }
+ if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) {
+ s.DecRef()
+ // Not tcp4 sockets.
+ continue
+ }
+
+ // Linux's documentation for the fields below can be found at
+ // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
+ // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
+ // Note that the header doesn't contain labels for all the fields.
+
+ // Field: sl; entry number.
+ fmt.Fprintf(buf, "%4d: ", se.ID)
+
+ // Field: local_adddress.
+ var localAddr linux.SockAddr
+ if t != nil {
+ if local, _, err := sops.GetSockName(t); err == nil {
+ localAddr = local
+ }
+ }
+ writeInetAddr(buf, family, localAddr)
+
+ // Field: rem_address.
+ var remoteAddr linux.SockAddr
+ if t != nil {
+ if remote, _, err := sops.GetPeerName(t); err == nil {
+ remoteAddr = remote
+ }
+ }
+ writeInetAddr(buf, family, remoteAddr)
+
+ // Field: state; socket state.
+ fmt.Fprintf(buf, "%02X ", sops.State())
+
+ // Field: tx_queue, rx_queue; number of packets in the transmit and
+ // receive queue. Unimplemented.
+ fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
+
+ // Field: tr, tm->when; timer active state and number of jiffies
+ // until timer expires. Unimplemented.
+ fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
+
+ // Field: retrnsmt; number of unrecovered RTO timeouts.
+ // Unimplemented.
+ fmt.Fprintf(buf, "%08X ", 0)
+
+ stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_UID | linux.STATX_INO})
+
+ // Field: uid.
+ if statErr != nil || stat.Mask&linux.STATX_UID == 0 {
+ log.Warningf("Failed to retrieve uid for socket file: %v", statErr)
+ fmt.Fprintf(buf, "%5d ", 0)
+ } else {
+ creds := auth.CredentialsFromContext(ctx)
+ fmt.Fprintf(buf, "%5d ", uint32(auth.KUID(stat.UID).In(creds.UserNamespace).OrOverflow()))
+ }
+
+ // Field: timeout; number of unanswered 0-window probes.
+ // Unimplemented.
+ fmt.Fprintf(buf, "%8d ", 0)
+
+ // Field: inode.
+ if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
+ log.Warningf("Failed to retrieve inode for socket file: %v", statErr)
+ fmt.Fprintf(buf, "%8d ", 0)
+ } else {
+ fmt.Fprintf(buf, "%8d ", stat.Ino)
+ }
+
+ // Field: refcount. Don't count the ref we obtain while deferencing
+ // the weakref to this socket.
+ fmt.Fprintf(buf, "%d ", s.Refs()-1)
+
+ // Field: Socket struct address. Redacted due to the same reason as
+ // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+ fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
+
+ // Field: retransmit timeout. Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: predicted tick of soft clock (delayed ACK control data).
+ // Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: sending congestion window, Unimplemented.
+ fmt.Fprintf(buf, "%d ", 0)
+
+ // Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
+ // Unimplemented, report as large threshold.
+ fmt.Fprintf(buf, "%d", -1)
+
+ fmt.Fprintf(buf, "\n")
+
+ s.DecRef()
+ }
+
+ return nil
+}
+
+// netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp.
+//
+// +stateify savable
+type netTCPData struct {
+ kernfs.DynamicBytesFile
+
+ kernel *kernel.Kernel
+}
+
+var _ dynamicInode = (*netTCPData)(nil)
+
+func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n")
+ return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET)
+}
+
+// netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6.
+//
+// +stateify savable
+type netTCP6Data struct {
+ kernfs.DynamicBytesFile
+
+ kernel *kernel.Kernel
+}
+
+var _ dynamicInode = (*netTCP6Data)(nil)
+
+func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")
+ return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6)
+}
+
+// netUDPData implements vfs.DynamicBytesSource for /proc/net/udp.
+//
+// +stateify savable
+type netUDPData struct {
+ kernfs.DynamicBytesFile
+
+ kernel *kernel.Kernel
+}
+
+var _ dynamicInode = (*netUDPData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // t may be nil here if our caller is not part of a task goroutine. This can
+ // happen for example if we're here for "sentryctl cat". When t is nil,
+ // degrade gracefully and retrieve what we can.
+ t := kernel.TaskFromContext(ctx)
+
+ for _, se := range d.kernel.ListSockets() {
+ s := se.SockVFS2
+ if !s.TryIncRef() {
+ log.Debugf("Couldn't get reference on %v in socket table, racing with destruction?", s)
+ continue
+ }
+ sops, ok := s.Impl().(socket.SocketVFS2)
+ if !ok {
+ panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s))
+ }
+ if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM {
+ s.DecRef()
+ // Not udp4 socket.
+ continue
+ }
+
+ // For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock().
+
+ // Field: sl; entry number.
+ fmt.Fprintf(buf, "%5d: ", se.ID)
+
+ // Field: local_adddress.
+ var localAddr linux.SockAddrInet
+ if t != nil {
+ if local, _, err := sops.GetSockName(t); err == nil {
+ localAddr = *local.(*linux.SockAddrInet)
+ }
+ }
+ writeInetAddr(buf, linux.AF_INET, &localAddr)
+
+ // Field: rem_address.
+ var remoteAddr linux.SockAddrInet
+ if t != nil {
+ if remote, _, err := sops.GetPeerName(t); err == nil {
+ remoteAddr = *remote.(*linux.SockAddrInet)
+ }
+ }
+ writeInetAddr(buf, linux.AF_INET, &remoteAddr)
+
+ // Field: state; socket state.
+ fmt.Fprintf(buf, "%02X ", sops.State())
+
+ // Field: tx_queue, rx_queue; number of packets in the transmit and
+ // receive queue. Unimplemented.
+ fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
+
+ // Field: tr, tm->when. Always 0 for UDP.
+ fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
+
+ // Field: retrnsmt. Always 0 for UDP.
+ fmt.Fprintf(buf, "%08X ", 0)
+
+ stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_UID | linux.STATX_INO})
+
+ // Field: uid.
+ if statErr != nil || stat.Mask&linux.STATX_UID == 0 {
+ log.Warningf("Failed to retrieve uid for socket file: %v", statErr)
+ fmt.Fprintf(buf, "%5d ", 0)
+ } else {
+ creds := auth.CredentialsFromContext(ctx)
+ fmt.Fprintf(buf, "%5d ", uint32(auth.KUID(stat.UID).In(creds.UserNamespace).OrOverflow()))
+ }
+
+ // Field: timeout. Always 0 for UDP.
+ fmt.Fprintf(buf, "%8d ", 0)
+
+ // Field: inode.
+ if statErr != nil || stat.Mask&linux.STATX_INO == 0 {
+ log.Warningf("Failed to retrieve inode for socket file: %v", statErr)
+ fmt.Fprintf(buf, "%8d ", 0)
+ } else {
+ fmt.Fprintf(buf, "%8d ", stat.Ino)
+ }
+
+ // Field: ref; reference count on the socket inode. Don't count the ref
+ // we obtain while deferencing the weakref to this socket.
+ fmt.Fprintf(buf, "%d ", s.Refs()-1)
+
+ // Field: Socket struct address. Redacted due to the same reason as
+ // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
+ fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
+
+ // Field: drops; number of dropped packets. Unimplemented.
+ fmt.Fprintf(buf, "%d", 0)
+
+ fmt.Fprintf(buf, "\n")
+
+ s.DecRef()
+ }
+ return nil
+}
+
+// netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp.
+//
+// +stateify savable
+type netSnmpData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*netSnmpData)(nil)
+
+type snmpLine struct {
+ prefix string
+ header string
+}
+
+var snmp = []snmpLine{
+ {
+ prefix: "Ip",
+ header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates",
+ },
+ {
+ prefix: "Icmp",
+ header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps",
+ },
+ {
+ prefix: "IcmpMsg",
+ },
+ {
+ prefix: "Tcp",
+ header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors",
+ },
+ {
+ prefix: "Udp",
+ header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
+ },
+ {
+ prefix: "UdpLite",
+ header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
+ },
+}
+
+func toSlice(a interface{}) []uint64 {
+ v := reflect.Indirect(reflect.ValueOf(a))
+ return v.Slice(0, v.Len()).Interface().([]uint64)
+}
+
+func sprintSlice(s []uint64) string {
+ if len(s) == 0 {
+ return ""
+ }
+ r := fmt.Sprint(s)
+ return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice.
+}
+
+// Generate implements vfs.DynamicBytesSource.
+func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ types := []interface{}{
+ &inet.StatSNMPIP{},
+ &inet.StatSNMPICMP{},
+ nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats.
+ &inet.StatSNMPTCP{},
+ &inet.StatSNMPUDP{},
+ &inet.StatSNMPUDPLite{},
+ }
+ for i, stat := range types {
+ line := snmp[i]
+ if stat == nil {
+ fmt.Fprintf(buf, "%s:\n", line.prefix)
+ fmt.Fprintf(buf, "%s:\n", line.prefix)
+ continue
+ }
+ if err := d.stack.Statistics(stat, line.prefix); err != nil {
+ if err == syserror.EOPNOTSUPP {
+ log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
+ } else {
+ log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
+ }
+ }
+
+ fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header)
+
+ if line.prefix == "Tcp" {
+ tcp := stat.(*inet.StatSNMPTCP)
+ // "Tcp" needs special processing because MaxConn is signed. RFC 2012.
+ fmt.Fprintf(buf, "%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:]))
+ } else {
+ fmt.Fprintf(buf, "%s: %s\n", line.prefix, sprintSlice(toSlice(stat)))
+ }
+ }
+ return nil
+}
+
+// netRouteData implements vfs.DynamicBytesSource for /proc/net/route.
+//
+// +stateify savable
+type netRouteData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*netRouteData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.
+// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
+func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT")
+
+ interfaces := d.stack.Interfaces()
+ for _, rt := range d.stack.RouteTable() {
+ // /proc/net/route only includes ipv4 routes.
+ if rt.Family != linux.AF_INET {
+ continue
+ }
+
+ // /proc/net/route does not include broadcast or multicast routes.
+ if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST {
+ continue
+ }
+
+ iface, ok := interfaces[rt.OutputInterface]
+ if !ok || iface.Name == "lo" {
+ continue
+ }
+
+ var (
+ gw uint32
+ prefix uint32
+ flags = linux.RTF_UP
+ )
+ if len(rt.GatewayAddr) == header.IPv4AddressSize {
+ flags |= linux.RTF_GATEWAY
+ gw = usermem.ByteOrder.Uint32(rt.GatewayAddr)
+ }
+ if len(rt.DstAddr) == header.IPv4AddressSize {
+ prefix = usermem.ByteOrder.Uint32(rt.DstAddr)
+ }
+ l := fmt.Sprintf(
+ "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d",
+ iface.Name,
+ prefix,
+ gw,
+ flags,
+ 0, // RefCnt.
+ 0, // Use.
+ 0, // Metric.
+ (uint32(1)<<rt.DstLen)-1,
+ 0, // MTU.
+ 0, // Window.
+ 0, // RTT.
+ )
+ fmt.Fprintf(buf, "%-127s\n", l)
+ }
+ return nil
+}
+
+// netStatData implements vfs.DynamicBytesSource for /proc/net/netstat.
+//
+// +stateify savable
+type netStatData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack
+}
+
+var _ dynamicInode = (*netStatData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.
+// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
+func (d *netStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ buf.WriteString("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed " +
+ "EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps " +
+ "LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive " +
+ "PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost " +
+ "ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog " +
+ "TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser " +
+ "TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging " +
+ "TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo " +
+ "TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit " +
+ "TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans " +
+ "TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes " +
+ "TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail " +
+ "TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent " +
+ "TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose " +
+ "TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed " +
+ "TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld " +
+ "TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected " +
+ "TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback " +
+ "TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter " +
+ "TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail " +
+ "TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK " +
+ "TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail " +
+ "TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow " +
+ "TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets " +
+ "TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv " +
+ "TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect " +
+ "TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd " +
+ "TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq " +
+ "TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge " +
+ "TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
new file mode 100755
index 000000000..9f2ef8200
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -0,0 +1,257 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "sort"
+ "strconv"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+)
+
+const (
+ selfName = "self"
+ threadSelfName = "thread-self"
+)
+
+// InoGenerator generates unique inode numbers for a given filesystem.
+type InoGenerator interface {
+ NextIno() uint64
+}
+
+// tasksInode represents the inode for /proc/ directory.
+//
+// +stateify savable
+type tasksInode struct {
+ kernfs.InodeNotSymlink
+ kernfs.InodeDirectoryNoNewChildren
+ kernfs.InodeAttrs
+ kernfs.OrderedChildren
+ kernfs.AlwaysValid
+
+ inoGen InoGenerator
+ pidns *kernel.PIDNamespace
+
+ // '/proc/self' and '/proc/thread-self' have custom directory offsets in
+ // Linux. So handle them outside of OrderedChildren.
+ selfSymlink *vfs.Dentry
+ threadSelfSymlink *vfs.Dentry
+
+ // cgroupControllers is a map of controller name to directory in the
+ // cgroup hierarchy. These controllers are immutable and will be listed
+ // in /proc/pid/cgroup if not nil.
+ cgroupControllers map[string]string
+}
+
+var _ kernfs.Inode = (*tasksInode)(nil)
+
+func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) {
+ root := auth.NewRootCredentials(pidns.UserNamespace())
+ contents := map[string]*kernfs.Dentry{
+ "cpuinfo": newDentry(root, inoGen.NextIno(), 0444, newStaticFileSetStat(cpuInfoData(k))),
+ "filesystems": newDentry(root, inoGen.NextIno(), 0444, &filesystemsData{}),
+ "loadavg": newDentry(root, inoGen.NextIno(), 0444, &loadavgData{}),
+ "sys": newSysDir(root, inoGen, k),
+ "meminfo": newDentry(root, inoGen.NextIno(), 0444, &meminfoData{}),
+ "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/mounts"),
+ "net": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/net"),
+ "stat": newDentry(root, inoGen.NextIno(), 0444, &statData{}),
+ "uptime": newDentry(root, inoGen.NextIno(), 0444, &uptimeData{}),
+ "version": newDentry(root, inoGen.NextIno(), 0444, &versionData{}),
+ }
+
+ inode := &tasksInode{
+ pidns: pidns,
+ inoGen: inoGen,
+ selfSymlink: newSelfSymlink(root, inoGen.NextIno(), pidns).VFSDentry(),
+ threadSelfSymlink: newThreadSelfSymlink(root, inoGen.NextIno(), pidns).VFSDentry(),
+ cgroupControllers: cgroupControllers,
+ }
+ inode.InodeAttrs.Init(root, inoGen.NextIno(), linux.ModeDirectory|0555)
+
+ dentry := &kernfs.Dentry{}
+ dentry.Init(inode)
+
+ inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ links := inode.OrderedChildren.Populate(dentry, contents)
+ inode.IncLinks(links)
+
+ return inode, dentry
+}
+
+// Lookup implements kernfs.inodeDynamicLookup.
+func (i *tasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
+ // Try to lookup a corresponding task.
+ tid, err := strconv.ParseUint(name, 10, 64)
+ if err != nil {
+ // If it failed to parse, check if it's one of the special handled files.
+ switch name {
+ case selfName:
+ return i.selfSymlink, nil
+ case threadSelfName:
+ return i.threadSelfSymlink, nil
+ }
+ return nil, syserror.ENOENT
+ }
+
+ task := i.pidns.TaskWithID(kernel.ThreadID(tid))
+ if task == nil {
+ return nil, syserror.ENOENT
+ }
+
+ taskDentry := newTaskInode(i.inoGen, task, i.pidns, true, i.cgroupControllers)
+ return taskDentry.VFSDentry(), nil
+}
+
+// IterDirents implements kernfs.inodeDynamicLookup.
+func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) {
+ // fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256
+ const FIRST_PROCESS_ENTRY = 256
+
+ // Use maxTaskID to shortcut searches that will result in 0 entries.
+ const maxTaskID = kernel.TasksLimit + 1
+ if offset >= maxTaskID {
+ return offset, nil
+ }
+
+ // According to Linux (fs/proc/base.c:proc_pid_readdir()), process directories
+ // start at offset FIRST_PROCESS_ENTRY with '/proc/self', followed by
+ // '/proc/thread-self' and then '/proc/[pid]'.
+ if offset < FIRST_PROCESS_ENTRY {
+ offset = FIRST_PROCESS_ENTRY
+ }
+
+ if offset == FIRST_PROCESS_ENTRY {
+ dirent := vfs.Dirent{
+ Name: selfName,
+ Type: linux.DT_LNK,
+ Ino: i.inoGen.NextIno(),
+ NextOff: offset + 1,
+ }
+ if err := cb.Handle(dirent); err != nil {
+ return offset, err
+ }
+ offset++
+ }
+ if offset == FIRST_PROCESS_ENTRY+1 {
+ dirent := vfs.Dirent{
+ Name: threadSelfName,
+ Type: linux.DT_LNK,
+ Ino: i.inoGen.NextIno(),
+ NextOff: offset + 1,
+ }
+ if err := cb.Handle(dirent); err != nil {
+ return offset, err
+ }
+ offset++
+ }
+
+ // Collect all tasks that TGIDs are greater than the offset specified. Per
+ // Linux we only include in directory listings if it's the leader. But for
+ // whatever crazy reason, you can still walk to the given node.
+ var tids []int
+ startTid := offset - FIRST_PROCESS_ENTRY - 2
+ for _, tg := range i.pidns.ThreadGroups() {
+ tid := i.pidns.IDOfThreadGroup(tg)
+ if int64(tid) < startTid {
+ continue
+ }
+ if leader := tg.Leader(); leader != nil {
+ tids = append(tids, int(tid))
+ }
+ }
+
+ if len(tids) == 0 {
+ return offset, nil
+ }
+
+ sort.Ints(tids)
+ for _, tid := range tids {
+ dirent := vfs.Dirent{
+ Name: strconv.FormatUint(uint64(tid), 10),
+ Type: linux.DT_DIR,
+ Ino: i.inoGen.NextIno(),
+ NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1,
+ }
+ if err := cb.Handle(dirent); err != nil {
+ return offset, err
+ }
+ offset++
+ }
+ return maxTaskID, nil
+}
+
+// Open implements kernfs.Inode.
+func (i *tasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
+ fd := &kernfs.GenericDirectoryFD{}
+ fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
+ return fd.VFSFileDescription(), nil
+}
+
+func (i *tasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
+ stat, err := i.InodeAttrs.Stat(vsfs, opts)
+ if err != nil {
+ return linux.Statx{}, err
+ }
+
+ if opts.Mask&linux.STATX_NLINK != 0 {
+ // Add dynamic children to link count.
+ for _, tg := range i.pidns.ThreadGroups() {
+ if leader := tg.Leader(); leader != nil {
+ stat.Nlink++
+ }
+ }
+ }
+
+ return stat, nil
+}
+
+// staticFileSetStat implements a special static file that allows inode
+// attributes to be set. This is to support /proc files that are readonly, but
+// allow attributes to be set.
+type staticFileSetStat struct {
+ dynamicBytesFileSetAttr
+ vfs.StaticData
+}
+
+var _ dynamicInode = (*staticFileSetStat)(nil)
+
+func newStaticFileSetStat(data string) *staticFileSetStat {
+ return &staticFileSetStat{StaticData: vfs.StaticData{Data: data}}
+}
+
+func cpuInfoData(k *kernel.Kernel) string {
+ features := k.FeatureSet()
+ if features == nil {
+ // Kernel is always initialized with a FeatureSet.
+ panic("cpuinfo read with nil FeatureSet")
+ }
+ var buf bytes.Buffer
+ for i, max := uint(0), k.ApplicationCores(); i < max; i++ {
+ features.WriteCPUInfoTo(i, &buf)
+ }
+ return buf.String()
+}
+
+func shmData(v uint64) dynamicInode {
+ return newStaticFile(strconv.FormatUint(v, 10))
+}
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
new file mode 100755
index 000000000..4621e2de0
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -0,0 +1,380 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+ "strconv"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/time"
+ "gvisor.dev/gvisor/pkg/sentry/usage"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+type selfSymlink struct {
+ kernfs.InodeAttrs
+ kernfs.InodeNoopRefCount
+ kernfs.InodeSymlink
+
+ pidns *kernel.PIDNamespace
+}
+
+var _ kernfs.Inode = (*selfSymlink)(nil)
+
+func newSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
+ inode := &selfSymlink{pidns: pidns}
+ inode.Init(creds, ino, linux.ModeSymlink|0777)
+
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+ return d
+}
+
+func (s *selfSymlink) Readlink(ctx context.Context) (string, error) {
+ t := kernel.TaskFromContext(ctx)
+ if t == nil {
+ // Who is reading this link?
+ return "", syserror.EINVAL
+ }
+ tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
+ if tgid == 0 {
+ return "", syserror.ENOENT
+ }
+ return strconv.FormatUint(uint64(tgid), 10), nil
+}
+
+func (s *selfSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) {
+ target, err := s.Readlink(ctx)
+ return vfs.VirtualDentry{}, target, err
+}
+
+// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
+func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
+ return syserror.EPERM
+}
+
+type threadSelfSymlink struct {
+ kernfs.InodeAttrs
+ kernfs.InodeNoopRefCount
+ kernfs.InodeSymlink
+
+ pidns *kernel.PIDNamespace
+}
+
+var _ kernfs.Inode = (*threadSelfSymlink)(nil)
+
+func newThreadSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
+ inode := &threadSelfSymlink{pidns: pidns}
+ inode.Init(creds, ino, linux.ModeSymlink|0777)
+
+ d := &kernfs.Dentry{}
+ d.Init(inode)
+ return d
+}
+
+func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) {
+ t := kernel.TaskFromContext(ctx)
+ if t == nil {
+ // Who is reading this link?
+ return "", syserror.EINVAL
+ }
+ tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
+ tid := s.pidns.IDOfTask(t)
+ if tid == 0 || tgid == 0 {
+ return "", syserror.ENOENT
+ }
+ return fmt.Sprintf("%d/task/%d", tgid, tid), nil
+}
+
+func (s *threadSelfSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) {
+ target, err := s.Readlink(ctx)
+ return vfs.VirtualDentry{}, target, err
+}
+
+// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
+func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
+ return syserror.EPERM
+}
+
+// dynamicBytesFileSetAttr implements a special file that allows inode
+// attributes to be set. This is to support /proc files that are readonly, but
+// allow attributes to be set.
+type dynamicBytesFileSetAttr struct {
+ kernfs.DynamicBytesFile
+}
+
+// SetStat implements Inode.SetStat.
+func (d *dynamicBytesFileSetAttr) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
+ return d.DynamicBytesFile.InodeAttrs.SetStat(ctx, fs, creds, opts)
+}
+
+// cpuStats contains the breakdown of CPU time for /proc/stat.
+type cpuStats struct {
+ // user is time spent in userspace tasks with non-positive niceness.
+ user uint64
+
+ // nice is time spent in userspace tasks with positive niceness.
+ nice uint64
+
+ // system is time spent in non-interrupt kernel context.
+ system uint64
+
+ // idle is time spent idle.
+ idle uint64
+
+ // ioWait is time spent waiting for IO.
+ ioWait uint64
+
+ // irq is time spent in interrupt context.
+ irq uint64
+
+ // softirq is time spent in software interrupt context.
+ softirq uint64
+
+ // steal is involuntary wait time.
+ steal uint64
+
+ // guest is time spent in guests with non-positive niceness.
+ guest uint64
+
+ // guestNice is time spent in guests with positive niceness.
+ guestNice uint64
+}
+
+// String implements fmt.Stringer.
+func (c cpuStats) String() string {
+ return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
+}
+
+// statData implements vfs.DynamicBytesSource for /proc/stat.
+//
+// +stateify savable
+type statData struct {
+ dynamicBytesFileSetAttr
+}
+
+var _ dynamicInode = (*statData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // TODO(b/37226836): We currently export only zero CPU stats. We could
+ // at least provide some aggregate stats.
+ var cpu cpuStats
+ fmt.Fprintf(buf, "cpu %s\n", cpu)
+
+ k := kernel.KernelFromContext(ctx)
+ for c, max := uint(0), k.ApplicationCores(); c < max; c++ {
+ fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
+ }
+
+ // The total number of interrupts is dependent on the CPUs and PCI
+ // devices on the system. See arch_probe_nr_irqs.
+ //
+ // Since we don't report real interrupt stats, just choose an arbitrary
+ // value from a representative VM.
+ const numInterrupts = 256
+
+ // The Kernel doesn't handle real interrupts, so report all zeroes.
+ // TODO(b/37226836): We could count page faults as #PF.
+ fmt.Fprintf(buf, "intr 0") // total
+ for i := 0; i < numInterrupts; i++ {
+ fmt.Fprintf(buf, " 0")
+ }
+ fmt.Fprintf(buf, "\n")
+
+ // Total number of context switches.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "ctxt 0\n")
+
+ // CLOCK_REALTIME timestamp from boot, in seconds.
+ fmt.Fprintf(buf, "btime %d\n", k.Timekeeper().BootTime().Seconds())
+
+ // Total number of clones.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "processes 0\n")
+
+ // Number of runnable tasks.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "procs_running 0\n")
+
+ // Number of tasks waiting on IO.
+ // TODO(b/37226836): Count this.
+ fmt.Fprintf(buf, "procs_blocked 0\n")
+
+ // Number of each softirq handled.
+ fmt.Fprintf(buf, "softirq 0") // total
+ for i := 0; i < linux.NumSoftIRQ; i++ {
+ fmt.Fprintf(buf, " 0")
+ }
+ fmt.Fprintf(buf, "\n")
+ return nil
+}
+
+// loadavgData backs /proc/loadavg.
+//
+// +stateify savable
+type loadavgData struct {
+ dynamicBytesFileSetAttr
+}
+
+var _ dynamicInode = (*loadavgData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ // TODO(b/62345059): Include real data in fields.
+ // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
+ // Column 4-5: currently running processes and the total number of processes.
+ // Column 6: the last process ID used.
+ fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
+ return nil
+}
+
+// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
+//
+// +stateify savable
+type meminfoData struct {
+ dynamicBytesFileSetAttr
+}
+
+var _ dynamicInode = (*meminfoData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ k := kernel.KernelFromContext(ctx)
+ mf := k.MemoryFile()
+ mf.UpdateUsage()
+ snapshot, totalUsage := usage.MemoryAccounting.Copy()
+ totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
+ anon := snapshot.Anonymous + snapshot.Tmpfs
+ file := snapshot.PageCache + snapshot.Mapped
+ // We don't actually have active/inactive LRUs, so just make up numbers.
+ activeFile := (file / 2) &^ (usermem.PageSize - 1)
+ inactiveFile := file - activeFile
+
+ fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024)
+ memFree := (totalSize - totalUsage) / 1024
+ // We use MemFree as MemAvailable because we don't swap.
+ // TODO(rahat): When reclaim is implemented the value of MemAvailable
+ // should change.
+ fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree)
+ fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree)
+ fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices
+ fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024)
+ // Emulate a system with no swap, which disables inactivation of anon pages.
+ fmt.Fprintf(buf, "SwapCache: 0 kB\n")
+ fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024)
+ fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024)
+ fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024)
+ fmt.Fprintf(buf, "Inactive(anon): 0 kB\n")
+ fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024)
+ fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
+ fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263)
+ fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263)
+ fmt.Fprintf(buf, "SwapTotal: 0 kB\n")
+ fmt.Fprintf(buf, "SwapFree: 0 kB\n")
+ fmt.Fprintf(buf, "Dirty: 0 kB\n")
+ fmt.Fprintf(buf, "Writeback: 0 kB\n")
+ fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024)
+ fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
+ fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024)
+ return nil
+}
+
+// uptimeData implements vfs.DynamicBytesSource for /proc/uptime.
+//
+// +stateify savable
+type uptimeData struct {
+ dynamicBytesFileSetAttr
+}
+
+var _ dynamicInode = (*uptimeData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ k := kernel.KernelFromContext(ctx)
+ now := time.NowFromContext(ctx)
+
+ // Pretend that we've spent zero time sleeping (second number).
+ fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds())
+ return nil
+}
+
+// versionData implements vfs.DynamicBytesSource for /proc/version.
+//
+// +stateify savable
+type versionData struct {
+ dynamicBytesFileSetAttr
+}
+
+var _ dynamicInode = (*versionData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ k := kernel.KernelFromContext(ctx)
+ init := k.GlobalInit()
+ if init == nil {
+ // Attempted to read before the init Task is created. This can
+ // only occur during startup, which should never need to read
+ // this file.
+ panic("Attempted to read version before initial Task is available")
+ }
+
+ // /proc/version takes the form:
+ //
+ // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
+ // (COMPILER_VERSION) VERSION"
+ //
+ // where:
+ // - SYSNAME, RELEASE, and VERSION are the same as returned by
+ // sys_utsname
+ // - COMPILE_USER is the user that build the kernel
+ // - COMPILE_HOST is the hostname of the machine on which the kernel
+ // was built
+ // - COMPILER_VERSION is the version reported by the building compiler
+ //
+ // Since we don't really want to expose build information to
+ // applications, those fields are omitted.
+ //
+ // FIXME(mpratt): Using Version from the init task SyscallTable
+ // disregards the different version a task may have (e.g., in a uts
+ // namespace).
+ ver := init.Leader().SyscallTable().Version
+ fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
+ return nil
+}
+
+// filesystemsData backs /proc/filesystems.
+//
+// +stateify savable
+type filesystemsData struct {
+ kernfs.DynamicBytesFile
+}
+
+var _ dynamicInode = (*filesystemsData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ k := kernel.KernelFromContext(ctx)
+ k.VFS().GenerateProcFilesystems(buf)
+ return nil
+}
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
new file mode 100755
index 000000000..3d5dc463c
--- /dev/null
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -0,0 +1,211 @@
+// Copyright 2019 The gVisor Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package proc
+
+import (
+ "bytes"
+ "fmt"
+
+ "gvisor.dev/gvisor/pkg/abi/linux"
+ "gvisor.dev/gvisor/pkg/context"
+ "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
+ "gvisor.dev/gvisor/pkg/sentry/inet"
+ "gvisor.dev/gvisor/pkg/sentry/kernel"
+ "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
+ "gvisor.dev/gvisor/pkg/sentry/vfs"
+ "gvisor.dev/gvisor/pkg/syserror"
+ "gvisor.dev/gvisor/pkg/usermem"
+)
+
+// newSysDir returns the dentry corresponding to /proc/sys directory.
+func newSysDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry {
+ return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "kernel": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "hostname": newDentry(root, inoGen.NextIno(), 0444, &hostnameData{}),
+ "shmall": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMALL)),
+ "shmmax": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMAX)),
+ "shmmni": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMNI)),
+ }),
+ "vm": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{}),
+ "overcommit_memory": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0\n")),
+ }),
+ "net": newSysNetDir(root, inoGen, k),
+ })
+}
+
+// newSysNetDir returns the dentry corresponding to /proc/sys/net directory.
+func newSysNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry {
+ var contents map[string]*kernfs.Dentry
+
+ // TODO(gvisor.dev/issue/1833): Support for using the network stack in the
+ // network namespace of the calling process.
+ if stack := k.RootNetworkNamespace().Stack(); stack != nil {
+ contents = map[string]*kernfs.Dentry{
+ "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "tcp_sack": newDentry(root, inoGen.NextIno(), 0644, &tcpSackData{stack: stack}),
+
+ // The following files are simple stubs until they are implemented in
+ // netstack, most of these files are configuration related. We use the
+ // value closest to the actual netstack behavior or any empty file, all
+ // of these files will have mode 0444 (read-only for all users).
+ "ip_local_port_range": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("16000 65535")),
+ "ip_local_reserved_ports": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
+ "ipfrag_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("30")),
+ "ip_nonlocal_bind": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "ip_no_pmtu_disc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+
+ // tcp_allowed_congestion_control tell the user what they are able to
+ // do as an unprivledged process so we leave it empty.
+ "tcp_allowed_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
+ "tcp_available_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")),
+ "tcp_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")),
+
+ // Many of the following stub files are features netstack doesn't
+ // support. The unsupported features return "0" to indicate they are
+ // disabled.
+ "tcp_base_mss": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1280")),
+ "tcp_dsack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_early_retrans": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fastopen": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_fastopen_key": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
+ "tcp_invalid_ratelimit": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_intvl": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_probes": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_keepalive_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("7200")),
+ "tcp_mtu_probing": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_no_metrics_save": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ "tcp_probe_interval": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_probe_threshold": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "tcp_retries1": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")),
+ "tcp_retries2": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("15")),
+ "tcp_rfc1337": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ "tcp_slow_start_after_idle": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ "tcp_synack_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")),
+ "tcp_syn_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")),
+ "tcp_timestamps": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
+ }),
+ "core": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "default_qdisc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("pfifo_fast")),
+ "message_burst": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("10")),
+ "message_cost": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")),
+ "optmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
+ "rmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ "rmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ "somaxconn": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("128")),
+ "wmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ "wmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
+ }),
+ }
+ }
+
+ return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
+ "net": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, contents),
+ })
+}
+
+// mmapMinAddrData implements vfs.DynamicBytesSource for
+// /proc/sys/vm/mmap_min_addr.
+//
+// +stateify savable
+type mmapMinAddrData struct {
+ kernfs.DynamicBytesFile
+
+ k *kernel.Kernel
+}
+
+var _ dynamicInode = (*mmapMinAddrData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress())
+ return nil
+}
+
+// hostnameData implements vfs.DynamicBytesSource for /proc/sys/kernel/hostname.
+//
+// +stateify savable
+type hostnameData struct {
+ kernfs.DynamicBytesFile
+}
+
+var _ dynamicInode = (*hostnameData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.Generate.
+func (*hostnameData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ utsns := kernel.UTSNamespaceFromContext(ctx)
+ buf.WriteString(utsns.HostName())
+ buf.WriteString("\n")
+ return nil
+}
+
+// tcpSackData implements vfs.WritableDynamicBytesSource for
+// /proc/sys/net/tcp_sack.
+//
+// +stateify savable
+type tcpSackData struct {
+ kernfs.DynamicBytesFile
+
+ stack inet.Stack `state:"wait"`
+ enabled *bool
+}
+
+var _ vfs.WritableDynamicBytesSource = (*tcpSackData)(nil)
+
+// Generate implements vfs.DynamicBytesSource.
+func (d *tcpSackData) Generate(ctx context.Context, buf *bytes.Buffer) error {
+ if d.enabled == nil {
+ sack, err := d.stack.TCPSACKEnabled()
+ if err != nil {
+ return err
+ }
+ d.enabled = &sack
+ }
+
+ val := "0\n"
+ if *d.enabled {
+ // Technically, this is not quite compatible with Linux. Linux stores these
+ // as an integer, so if you write "2" into tcp_sack, you should get 2 back.
+ // Tough luck.
+ val = "1\n"
+ }
+ buf.WriteString(val)
+ return nil
+}
+
+func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
+ if offset != 0 {
+ // No need to handle partial writes thus far.
+ return 0, syserror.EINVAL
+ }
+ if src.NumBytes() == 0 {
+ return 0, nil
+ }
+
+ // Limit the amount of memory allocated.
+ src = src.TakeFirst(usermem.PageSize - 1)
+
+ var v int32
+ n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
+ if err != nil {
+ return n, err
+ }
+ if d.enabled == nil {
+ d.enabled = new(bool)
+ }
+ *d.enabled = v != 0
+ return n, d.stack.SetTCPSACKEnabled(*d.enabled)
+}