diff options
Diffstat (limited to 'pkg/sentry/fsimpl/proc')
-rw-r--r-- | pkg/sentry/fsimpl/proc/BUILD | 66 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/filesystem.go | 100 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/subtasks.go | 135 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/task.go | 234 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/task_fds.go | 302 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/task_files.go | 761 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/task_net.go | 808 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks.go | 257 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_files.go | 380 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_sys.go | 211 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_sys_test.go | 78 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_test.go | 505 |
12 files changed, 0 insertions, 3837 deletions
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD deleted file mode 100644 index 17c1342b5..000000000 --- a/pkg/sentry/fsimpl/proc/BUILD +++ /dev/null @@ -1,66 +0,0 @@ -load("//tools:defs.bzl", "go_library", "go_test") - -licenses(["notice"]) - -go_library( - name = "proc", - srcs = [ - "filesystem.go", - "subtasks.go", - "task.go", - "task_fds.go", - "task_files.go", - "task_net.go", - "tasks.go", - "tasks_files.go", - "tasks_sys.go", - ], - visibility = ["//pkg/sentry:internal"], - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/log", - "//pkg/refs", - "//pkg/safemem", - "//pkg/sentry/fsbridge", - "//pkg/sentry/fsimpl/kernfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/kernel/time", - "//pkg/sentry/limits", - "//pkg/sentry/mm", - "//pkg/sentry/socket", - "//pkg/sentry/socket/unix", - "//pkg/sentry/socket/unix/transport", - "//pkg/sentry/usage", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/tcpip/header", - "//pkg/usermem", - ], -) - -go_test( - name = "proc_test", - size = "small", - srcs = [ - "tasks_sys_test.go", - "tasks_test.go", - ], - library = ":proc", - deps = [ - "//pkg/abi/linux", - "//pkg/context", - "//pkg/fspath", - "//pkg/sentry/contexttest", - "//pkg/sentry/fsimpl/testutil", - "//pkg/sentry/fsimpl/tmpfs", - "//pkg/sentry/inet", - "//pkg/sentry/kernel", - "//pkg/sentry/kernel/auth", - "//pkg/sentry/vfs", - "//pkg/syserror", - "//pkg/usermem", - ], -) diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go deleted file mode 100644 index 104fc9030..000000000 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package proc implements a partial in-memory file system for procfs. -package proc - -import ( - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" -) - -// Name is the default filesystem name. -const Name = "proc" - -// FilesystemType is the factory class for procfs. -// -// +stateify savable -type FilesystemType struct{} - -var _ vfs.FilesystemType = (*FilesystemType)(nil) - -// Name implements vfs.FilesystemType.Name. -func (FilesystemType) Name() string { - return Name -} - -// GetFilesystem implements vfs.FilesystemType.GetFilesystem. -func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) { - k := kernel.KernelFromContext(ctx) - if k == nil { - return nil, nil, fmt.Errorf("procfs requires a kernel") - } - pidns := kernel.PIDNamespaceFromContext(ctx) - if pidns == nil { - return nil, nil, fmt.Errorf("procfs requires a PID namespace") - } - - procfs := &kernfs.Filesystem{} - procfs.VFSFilesystem().Init(vfsObj, &ft, procfs) - - var cgroups map[string]string - if opts.InternalData != nil { - data := opts.InternalData.(*InternalData) - cgroups = data.Cgroups - } - - _, dentry := newTasksInode(procfs, k, pidns, cgroups) - return procfs.VFSFilesystem(), dentry.VFSDentry(), nil -} - -// dynamicInode is an overfitted interface for common Inodes with -// dynamicByteSource types used in procfs. -type dynamicInode interface { - kernfs.Inode - vfs.DynamicBytesSource - - Init(creds *auth.Credentials, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode) -} - -func newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry { - inode.Init(creds, ino, inode, perm) - - d := &kernfs.Dentry{} - d.Init(inode) - return d -} - -type staticFile struct { - kernfs.DynamicBytesFile - vfs.StaticData -} - -var _ dynamicInode = (*staticFile)(nil) - -func newStaticFile(data string) *staticFile { - return &staticFile{StaticData: vfs.StaticData{Data: data}} -} - -// InternalData contains internal data passed in to the procfs mount via -// vfs.GetFilesystemOptions.InternalData. -type InternalData struct { - Cgroups map[string]string -} diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go deleted file mode 100644 index a21313666..000000000 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ /dev/null @@ -1,135 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "sort" - "strconv" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// subtasksInode represents the inode for /proc/[pid]/task/ directory. -// -// +stateify savable -type subtasksInode struct { - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren - kernfs.InodeAttrs - kernfs.OrderedChildren - kernfs.AlwaysValid - - task *kernel.Task - pidns *kernel.PIDNamespace - inoGen InoGenerator - cgroupControllers map[string]string -} - -var _ kernfs.Inode = (*subtasksInode)(nil) - -func newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, inoGen InoGenerator, cgroupControllers map[string]string) *kernfs.Dentry { - subInode := &subtasksInode{ - task: task, - pidns: pidns, - inoGen: inoGen, - cgroupControllers: cgroupControllers, - } - // Note: credentials are overridden by taskOwnedInode. - subInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555) - subInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - - inode := &taskOwnedInode{Inode: subInode, owner: task} - dentry := &kernfs.Dentry{} - dentry.Init(inode) - - return dentry -} - -// Lookup implements kernfs.inodeDynamicLookup. -func (i *subtasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { - tid, err := strconv.ParseUint(name, 10, 32) - if err != nil { - return nil, syserror.ENOENT - } - - subTask := i.pidns.TaskWithID(kernel.ThreadID(tid)) - if subTask == nil { - return nil, syserror.ENOENT - } - if subTask.ThreadGroup() != i.task.ThreadGroup() { - return nil, syserror.ENOENT - } - - subTaskDentry := newTaskInode(i.inoGen, subTask, i.pidns, false, i.cgroupControllers) - return subTaskDentry.VFSDentry(), nil -} - -// IterDirents implements kernfs.inodeDynamicLookup. -func (i *subtasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { - tasks := i.task.ThreadGroup().MemberIDs(i.pidns) - if len(tasks) == 0 { - return offset, syserror.ENOENT - } - - tids := make([]int, 0, len(tasks)) - for _, tid := range tasks { - tids = append(tids, int(tid)) - } - - sort.Ints(tids) - for _, tid := range tids[relOffset:] { - dirent := vfs.Dirent{ - Name: strconv.FormatUint(uint64(tid), 10), - Type: linux.DT_DIR, - Ino: i.inoGen.NextIno(), - NextOff: offset + 1, - } - if err := cb.Handle(dirent); err != nil { - return offset, err - } - offset++ - } - return offset, nil -} - -// Open implements kernfs.Inode. -func (i *subtasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd := &kernfs.GenericDirectoryFD{} - fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts) - return fd.VFSFileDescription(), nil -} - -// Stat implements kernfs.Inode. -func (i *subtasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { - stat, err := i.InodeAttrs.Stat(vsfs, opts) - if err != nil { - return linux.Statx{}, err - } - if opts.Mask&linux.STATX_NLINK != 0 { - stat.Nlink += uint32(i.task.ThreadGroup().Count()) - } - return stat, nil -} - -// SetStat implements Inode.SetStat not allowing inode attributes to be changed. -func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM -} diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go deleted file mode 100644 index 888afc0fd..000000000 --- a/pkg/sentry/fsimpl/proc/task.go +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -// taskInode represents the inode for /proc/PID/ directory. -// -// +stateify savable -type taskInode struct { - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren - kernfs.InodeNoDynamicLookup - kernfs.InodeAttrs - kernfs.OrderedChildren - - task *kernel.Task -} - -var _ kernfs.Inode = (*taskInode)(nil) - -func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) *kernfs.Dentry { - contents := map[string]*kernfs.Dentry{ - "auxv": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &auxvData{task: task}), - "cmdline": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}), - "comm": newComm(task, inoGen.NextIno(), 0444), - "environ": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}), - "exe": newExeSymlink(task, inoGen.NextIno()), - "fd": newFDDirInode(task, inoGen), - "fdinfo": newFDInfoDirInode(task, inoGen), - "gid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: true}), - "io": newTaskOwnedFile(task, inoGen.NextIno(), 0400, newIO(task, isThreadGroup)), - "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}), - "mountinfo": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mountInfoData{task: task}), - "mounts": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mountsData{task: task}), - "net": newTaskNetDir(task, inoGen), - "ns": newTaskOwnedDir(task, inoGen.NextIno(), 0511, map[string]*kernfs.Dentry{ - "net": newNamespaceSymlink(task, inoGen.NextIno(), "net"), - "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"), - "user": newNamespaceSymlink(task, inoGen.NextIno(), "user"), - }), - "oom_score": newTaskOwnedFile(task, inoGen.NextIno(), 0444, newStaticFile("0\n")), - "oom_score_adj": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &oomScoreAdj{task: task}), - "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}), - "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), - "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}), - "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}), - "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}), - } - if isThreadGroup { - contents["task"] = newSubtasks(task, pidns, inoGen, cgroupControllers) - } - if len(cgroupControllers) > 0 { - contents["cgroup"] = newTaskOwnedFile(task, inoGen.NextIno(), 0444, newCgroupData(cgroupControllers)) - } - - taskInode := &taskInode{task: task} - // Note: credentials are overridden by taskOwnedInode. - taskInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555) - - inode := &taskOwnedInode{Inode: taskInode, owner: task} - dentry := &kernfs.Dentry{} - dentry.Init(inode) - - taskInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - links := taskInode.OrderedChildren.Populate(dentry, contents) - taskInode.IncLinks(links) - - return dentry -} - -// Valid implements kernfs.inodeDynamicLookup. This inode remains valid as long -// as the task is still running. When it's dead, another tasks with the same -// PID could replace it. -func (i *taskInode) Valid(ctx context.Context) bool { - return i.task.ExitState() != kernel.TaskExitDead -} - -// Open implements kernfs.Inode. -func (i *taskInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd := &kernfs.GenericDirectoryFD{} - fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts) - return fd.VFSFileDescription(), nil -} - -// SetStat implements Inode.SetStat not allowing inode attributes to be changed. -func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM -} - -// taskOwnedInode implements kernfs.Inode and overrides inode owner with task -// effective user and group. -type taskOwnedInode struct { - kernfs.Inode - - // owner is the task that owns this inode. - owner *kernel.Task -} - -var _ kernfs.Inode = (*taskOwnedInode)(nil) - -func newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry { - // Note: credentials are overridden by taskOwnedInode. - inode.Init(task.Credentials(), ino, inode, perm) - - taskInode := &taskOwnedInode{Inode: inode, owner: task} - d := &kernfs.Dentry{} - d.Init(taskInode) - return d -} - -func newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry { - dir := &kernfs.StaticDirectory{} - - // Note: credentials are overridden by taskOwnedInode. - dir.Init(task.Credentials(), ino, perm) - - inode := &taskOwnedInode{Inode: dir, owner: task} - d := &kernfs.Dentry{} - d.Init(inode) - - dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - links := dir.OrderedChildren.Populate(d, children) - dir.IncLinks(links) - - return d -} - -// Stat implements kernfs.Inode. -func (i *taskOwnedInode) Stat(fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { - stat, err := i.Inode.Stat(fs, opts) - if err != nil { - return linux.Statx{}, err - } - if opts.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 { - uid, gid := i.getOwner(linux.FileMode(stat.Mode)) - if opts.Mask&linux.STATX_UID != 0 { - stat.UID = uint32(uid) - } - if opts.Mask&linux.STATX_GID != 0 { - stat.GID = uint32(gid) - } - } - return stat, nil -} - -// CheckPermissions implements kernfs.Inode. -func (i *taskOwnedInode) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { - mode := i.Mode() - uid, gid := i.getOwner(mode) - return vfs.GenericCheckPermissions(creds, ats, mode, uid, gid) -} - -func (i *taskOwnedInode) getOwner(mode linux.FileMode) (auth.KUID, auth.KGID) { - // By default, set the task owner as the file owner. - creds := i.owner.Credentials() - uid := creds.EffectiveKUID - gid := creds.EffectiveKGID - - // Linux doesn't apply dumpability adjustments to world readable/executable - // directories so that applications can stat /proc/PID to determine the - // effective UID of a process. See fs/proc/base.c:task_dump_owner. - if mode.FileType() == linux.ModeDirectory && mode.Permissions() == 0555 { - return uid, gid - } - - // If the task is not dumpable, then root (in the namespace preferred) - // owns the file. - m := getMM(i.owner) - if m == nil { - return auth.RootKUID, auth.RootKGID - } - if m.Dumpability() != mm.UserDumpable { - uid = auth.RootKUID - if kuid := creds.UserNamespace.MapToKUID(auth.RootUID); kuid.Ok() { - uid = kuid - } - gid = auth.RootKGID - if kgid := creds.UserNamespace.MapToKGID(auth.RootGID); kgid.Ok() { - gid = kgid - } - } - return uid, gid -} - -func newIO(t *kernel.Task, isThreadGroup bool) *ioData { - if isThreadGroup { - return &ioData{ioUsage: t.ThreadGroup()} - } - return &ioData{ioUsage: t} -} - -// newCgroupData creates inode that shows cgroup information. -// From man 7 cgroups: "For each cgroup hierarchy of which the process is a -// member, there is one entry containing three colon-separated fields: -// hierarchy-ID:controller-list:cgroup-path" -func newCgroupData(controllers map[string]string) dynamicInode { - var buf bytes.Buffer - - // The hierarchy ids must be positive integers (for cgroup v1), but the - // exact number does not matter, so long as they are unique. We can - // just use a counter, but since linux sorts this file in descending - // order, we must count down to preserve this behavior. - i := len(controllers) - for name, dir := range controllers { - fmt.Fprintf(&buf, "%d:%s:%s\n", i, name, dir) - i-- - } - return newStaticFile(buf.String()) -} diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go deleted file mode 100644 index 046265eca..000000000 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ /dev/null @@ -1,302 +0,0 @@ -// Copyright 2020 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - "sort" - "strconv" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/refs" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -func getTaskFD(t *kernel.Task, fd int32) (*vfs.FileDescription, kernel.FDFlags) { - var ( - file *vfs.FileDescription - flags kernel.FDFlags - ) - t.WithMuLocked(func(t *kernel.Task) { - if fdt := t.FDTable(); fdt != nil { - file, flags = fdt.GetVFS2(fd) - } - }) - return file, flags -} - -func taskFDExists(t *kernel.Task, fd int32) bool { - file, _ := getTaskFD(t, fd) - if file == nil { - return false - } - file.DecRef() - return true -} - -type fdDir struct { - inoGen InoGenerator - task *kernel.Task - - // When produceSymlinks is set, dirents produces for the FDs are reported - // as symlink. Otherwise, they are reported as regular files. - produceSymlink bool -} - -// IterDirents implements kernfs.inodeDynamicLookup. -func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, absOffset, relOffset int64) (int64, error) { - var fds []int32 - i.task.WithMuLocked(func(t *kernel.Task) { - if fdTable := t.FDTable(); fdTable != nil { - fds = fdTable.GetFDs() - } - }) - - offset := absOffset + relOffset - typ := uint8(linux.DT_REG) - if i.produceSymlink { - typ = linux.DT_LNK - } - - // Find the appropriate starting point. - idx := sort.Search(len(fds), func(i int) bool { return fds[i] >= int32(relOffset) }) - if idx >= len(fds) { - return offset, nil - } - for _, fd := range fds[idx:] { - dirent := vfs.Dirent{ - Name: strconv.FormatUint(uint64(fd), 10), - Type: typ, - Ino: i.inoGen.NextIno(), - NextOff: offset + 1, - } - if err := cb.Handle(dirent); err != nil { - return offset, err - } - offset++ - } - return offset, nil -} - -// fdDirInode represents the inode for /proc/[pid]/fd directory. -// -// +stateify savable -type fdDirInode struct { - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren - kernfs.InodeAttrs - kernfs.OrderedChildren - kernfs.AlwaysValid - fdDir -} - -var _ kernfs.Inode = (*fdDirInode)(nil) - -func newFDDirInode(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry { - inode := &fdDirInode{ - fdDir: fdDir{ - inoGen: inoGen, - task: task, - produceSymlink: true, - }, - } - inode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555) - - dentry := &kernfs.Dentry{} - dentry.Init(inode) - inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - - return dentry -} - -// Lookup implements kernfs.inodeDynamicLookup. -func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { - fdInt, err := strconv.ParseInt(name, 10, 32) - if err != nil { - return nil, syserror.ENOENT - } - fd := int32(fdInt) - if !taskFDExists(i.task, fd) { - return nil, syserror.ENOENT - } - taskDentry := newFDSymlink(i.task, fd, i.inoGen.NextIno()) - return taskDentry.VFSDentry(), nil -} - -// Open implements kernfs.Inode. -func (i *fdDirInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd := &kernfs.GenericDirectoryFD{} - fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts) - return fd.VFSFileDescription(), nil -} - -// CheckPermissions implements kernfs.Inode. -// -// This is to match Linux, which uses a special permission handler to guarantee -// that a process can still access /proc/self/fd after it has executed -// setuid. See fs/proc/fd.c:proc_fd_permission. -func (i *fdDirInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { - err := i.InodeAttrs.CheckPermissions(ctx, creds, ats) - if err == nil { - // Access granted, no extra check needed. - return nil - } - if t := kernel.TaskFromContext(ctx); t != nil { - // Allow access if the task trying to access it is in the thread group - // corresponding to this directory. - if i.task.ThreadGroup() == t.ThreadGroup() { - // Access granted (overridden). - return nil - } - } - return err -} - -// fdSymlink is an symlink for the /proc/[pid]/fd/[fd] file. -// -// +stateify savable -type fdSymlink struct { - kernfs.InodeAttrs - kernfs.InodeNoopRefCount - kernfs.InodeSymlink - - task *kernel.Task - fd int32 -} - -var _ kernfs.Inode = (*fdSymlink)(nil) - -func newFDSymlink(task *kernel.Task, fd int32, ino uint64) *kernfs.Dentry { - inode := &fdSymlink{ - task: task, - fd: fd, - } - inode.Init(task.Credentials(), ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d -} - -func (s *fdSymlink) Readlink(ctx context.Context) (string, error) { - file, _ := getTaskFD(s.task, s.fd) - if file == nil { - return "", syserror.ENOENT - } - defer file.DecRef() - root := vfs.RootFromContext(ctx) - defer root.DecRef() - return s.task.Kernel().VFS().PathnameWithDeleted(ctx, root, file.VirtualDentry()) -} - -func (s *fdSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) { - file, _ := getTaskFD(s.task, s.fd) - if file == nil { - return vfs.VirtualDentry{}, "", syserror.ENOENT - } - defer file.DecRef() - vd := file.VirtualDentry() - vd.IncRef() - return vd, "", nil -} - -// fdInfoDirInode represents the inode for /proc/[pid]/fdinfo directory. -// -// +stateify savable -type fdInfoDirInode struct { - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren - kernfs.InodeAttrs - kernfs.OrderedChildren - kernfs.AlwaysValid - fdDir -} - -var _ kernfs.Inode = (*fdInfoDirInode)(nil) - -func newFDInfoDirInode(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry { - inode := &fdInfoDirInode{ - fdDir: fdDir{ - inoGen: inoGen, - task: task, - }, - } - inode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555) - - dentry := &kernfs.Dentry{} - dentry.Init(inode) - inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - - return dentry -} - -// Lookup implements kernfs.inodeDynamicLookup. -func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { - fdInt, err := strconv.ParseInt(name, 10, 32) - if err != nil { - return nil, syserror.ENOENT - } - fd := int32(fdInt) - if !taskFDExists(i.task, fd) { - return nil, syserror.ENOENT - } - data := &fdInfoData{ - task: i.task, - fd: fd, - } - dentry := newTaskOwnedFile(i.task, i.inoGen.NextIno(), 0444, data) - return dentry.VFSDentry(), nil -} - -// Open implements kernfs.Inode. -func (i *fdInfoDirInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd := &kernfs.GenericDirectoryFD{} - fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts) - return fd.VFSFileDescription(), nil -} - -// fdInfoData implements vfs.DynamicBytesSource for /proc/[pid]/fdinfo/[fd]. -// -// +stateify savable -type fdInfoData struct { - kernfs.DynamicBytesFile - refs.AtomicRefCount - - task *kernel.Task - fd int32 -} - -var _ dynamicInode = (*fdInfoData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { - file, descriptorFlags := getTaskFD(d.task, d.fd) - if file == nil { - return syserror.ENOENT - } - defer file.DecRef() - // TODO(b/121266871): Include pos, locks, and other data. For now we only - // have flags. - // See https://www.kernel.org/doc/Documentation/filesystems/proc.txt - flags := uint(file.StatusFlags()) | descriptorFlags.ToLinuxFileFlags() - fmt.Fprintf(buf, "flags:\t0%o\n", flags) - return nil -} diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go deleted file mode 100644 index 2c6f8bdfc..000000000 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ /dev/null @@ -1,761 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - "io" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/safemem" - "gvisor.dev/gvisor/pkg/sentry/fsbridge" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/limits" - "gvisor.dev/gvisor/pkg/sentry/mm" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// mm gets the kernel task's MemoryManager. No additional reference is taken on -// mm here. This is safe because MemoryManager.destroy is required to leave the -// MemoryManager in a state where it's still usable as a DynamicBytesSource. -func getMM(task *kernel.Task) *mm.MemoryManager { - var tmm *mm.MemoryManager - task.WithMuLocked(func(t *kernel.Task) { - if mm := t.MemoryManager(); mm != nil { - tmm = mm - } - }) - return tmm -} - -// getMMIncRef returns t's MemoryManager. If getMMIncRef succeeds, the -// MemoryManager's users count is incremented, and must be decremented by the -// caller when it is no longer in use. -func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) { - if task.ExitState() == kernel.TaskExitDead { - return nil, syserror.ESRCH - } - var m *mm.MemoryManager - task.WithMuLocked(func(t *kernel.Task) { - m = t.MemoryManager() - }) - if m == nil || !m.IncUsers() { - return nil, io.EOF - } - return m, nil -} - -func checkTaskState(t *kernel.Task) error { - switch t.ExitState() { - case kernel.TaskExitZombie: - return syserror.EACCES - case kernel.TaskExitDead: - return syserror.ESRCH - } - return nil -} - -type bufferWriter struct { - buf *bytes.Buffer -} - -// WriteFromBlocks writes up to srcs.NumBytes() bytes from srcs and returns -// the number of bytes written. It may return a partial write without an -// error (i.e. (n, nil) where 0 < n < srcs.NumBytes()). It should not -// return a full write with an error (i.e. srcs.NumBytes(), err) where err -// != nil). -func (w *bufferWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) { - written := srcs.NumBytes() - for !srcs.IsEmpty() { - w.buf.Write(srcs.Head().ToSlice()) - srcs = srcs.Tail() - } - return written, nil -} - -// auxvData implements vfs.DynamicBytesSource for /proc/[pid]/auxv. -// -// +stateify savable -type auxvData struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -var _ dynamicInode = (*auxvData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error { - m, err := getMMIncRef(d.task) - if err != nil { - return err - } - defer m.DecUsers(ctx) - - // Space for buffer with AT_NULL (0) terminator at the end. - auxv := m.Auxv() - buf.Grow((len(auxv) + 1) * 16) - for _, e := range auxv { - var tmp [8]byte - usermem.ByteOrder.PutUint64(tmp[:], e.Key) - buf.Write(tmp[:]) - - usermem.ByteOrder.PutUint64(tmp[:], uint64(e.Value)) - buf.Write(tmp[:]) - } - return nil -} - -// execArgType enumerates the types of exec arguments that are exposed through -// proc. -type execArgType int - -const ( - cmdlineDataArg execArgType = iota - environDataArg -) - -// cmdlineData implements vfs.DynamicBytesSource for /proc/[pid]/cmdline. -// -// +stateify savable -type cmdlineData struct { - kernfs.DynamicBytesFile - - task *kernel.Task - - // arg is the type of exec argument this file contains. - arg execArgType -} - -var _ dynamicInode = (*cmdlineData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error { - m, err := getMMIncRef(d.task) - if err != nil { - return err - } - defer m.DecUsers(ctx) - - // Figure out the bounds of the exec arg we are trying to read. - var ar usermem.AddrRange - switch d.arg { - case cmdlineDataArg: - ar = usermem.AddrRange{ - Start: m.ArgvStart(), - End: m.ArgvEnd(), - } - case environDataArg: - ar = usermem.AddrRange{ - Start: m.EnvvStart(), - End: m.EnvvEnd(), - } - default: - panic(fmt.Sprintf("unknown exec arg type %v", d.arg)) - } - if ar.Start == 0 || ar.End == 0 { - // Don't attempt to read before the start/end are set up. - return io.EOF - } - - // N.B. Technically this should be usermem.IOOpts.IgnorePermissions = true - // until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading - // cmdline and environment"). - writer := &bufferWriter{buf: buf} - if n, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil { - // Nothing to copy or something went wrong. - return err - } - - // On Linux, if the NULL byte at the end of the argument vector has been - // overwritten, it continues reading the environment vector as part of - // the argument vector. - if d.arg == cmdlineDataArg && buf.Bytes()[buf.Len()-1] != 0 { - if end := bytes.IndexByte(buf.Bytes(), 0); end != -1 { - // If we found a NULL character somewhere else in argv, truncate the - // return up to the NULL terminator (including it). - buf.Truncate(end) - return nil - } - - // There is no NULL terminator in the string, return into envp. - arEnvv := usermem.AddrRange{ - Start: m.EnvvStart(), - End: m.EnvvEnd(), - } - - // Upstream limits the returned amount to one page of slop. - // https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208 - // we'll return one page total between argv and envp because of the - // above page restrictions. - if buf.Len() >= usermem.PageSize { - // Returned at least one page already, nothing else to add. - return nil - } - remaining := usermem.PageSize - buf.Len() - if int(arEnvv.Length()) > remaining { - end, ok := arEnvv.Start.AddLength(uint64(remaining)) - if !ok { - return syserror.EFAULT - } - arEnvv.End = end - } - if _, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil { - return err - } - - // Linux will return envp up to and including the first NULL character, - // so find it. - if end := bytes.IndexByte(buf.Bytes()[ar.Length():], 0); end != -1 { - buf.Truncate(end) - } - } - - return nil -} - -// +stateify savable -type commInode struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -func newComm(task *kernel.Task, ino uint64, perm linux.FileMode) *kernfs.Dentry { - inode := &commInode{task: task} - inode.DynamicBytesFile.Init(task.Credentials(), ino, &commData{task: task}, perm) - - d := &kernfs.Dentry{} - d.Init(inode) - return d -} - -func (i *commInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { - // This file can always be read or written by members of the same thread - // group. See fs/proc/base.c:proc_tid_comm_permission. - // - // N.B. This check is currently a no-op as we don't yet support writing and - // this file is world-readable anyways. - t := kernel.TaskFromContext(ctx) - if t != nil && t.ThreadGroup() == i.task.ThreadGroup() && !ats.MayExec() { - return nil - } - - return i.DynamicBytesFile.CheckPermissions(ctx, creds, ats) -} - -// commData implements vfs.DynamicBytesSource for /proc/[pid]/comm. -// -// +stateify savable -type commData struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -var _ dynamicInode = (*commData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *commData) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString(d.task.Name()) - buf.WriteString("\n") - return nil -} - -// idMapData implements vfs.DynamicBytesSource for /proc/[pid]/{gid_map|uid_map}. -// -// +stateify savable -type idMapData struct { - kernfs.DynamicBytesFile - - task *kernel.Task - gids bool -} - -var _ dynamicInode = (*idMapData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error { - var entries []auth.IDMapEntry - if d.gids { - entries = d.task.UserNamespace().GIDMap() - } else { - entries = d.task.UserNamespace().UIDMap() - } - for _, e := range entries { - fmt.Fprintf(buf, "%10d %10d %10d\n", e.FirstID, e.FirstParentID, e.Length) - } - return nil -} - -// mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps. -// -// +stateify savable -type mapsData struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -var _ dynamicInode = (*mapsData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *mapsData) Generate(ctx context.Context, buf *bytes.Buffer) error { - if mm := getMM(d.task); mm != nil { - mm.ReadMapsDataInto(ctx, buf) - } - return nil -} - -// smapsData implements vfs.DynamicBytesSource for /proc/[pid]/smaps. -// -// +stateify savable -type smapsData struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -var _ dynamicInode = (*smapsData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *smapsData) Generate(ctx context.Context, buf *bytes.Buffer) error { - if mm := getMM(d.task); mm != nil { - mm.ReadSmapsDataInto(ctx, buf) - } - return nil -} - -// +stateify savable -type taskStatData struct { - kernfs.DynamicBytesFile - - task *kernel.Task - - // If tgstats is true, accumulate fault stats (not implemented) and CPU - // time across all tasks in t's thread group. - tgstats bool - - // pidns is the PID namespace associated with the proc filesystem that - // includes the file using this statData. - pidns *kernel.PIDNamespace -} - -var _ dynamicInode = (*taskStatData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%d ", s.pidns.IDOfTask(s.task)) - fmt.Fprintf(buf, "(%s) ", s.task.Name()) - fmt.Fprintf(buf, "%c ", s.task.StateStatus()[0]) - ppid := kernel.ThreadID(0) - if parent := s.task.Parent(); parent != nil { - ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup()) - } - fmt.Fprintf(buf, "%d ", ppid) - fmt.Fprintf(buf, "%d ", s.pidns.IDOfProcessGroup(s.task.ThreadGroup().ProcessGroup())) - fmt.Fprintf(buf, "%d ", s.pidns.IDOfSession(s.task.ThreadGroup().Session())) - fmt.Fprintf(buf, "0 0 " /* tty_nr tpgid */) - fmt.Fprintf(buf, "0 " /* flags */) - fmt.Fprintf(buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */) - var cputime usage.CPUStats - if s.tgstats { - cputime = s.task.ThreadGroup().CPUStats() - } else { - cputime = s.task.CPUStats() - } - fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime)) - cputime = s.task.ThreadGroup().JoinedChildCPUStats() - fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime)) - fmt.Fprintf(buf, "%d %d ", s.task.Priority(), s.task.Niceness()) - fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Count()) - - // itrealvalue. Since kernel 2.6.17, this field is no longer - // maintained, and is hard coded as 0. - fmt.Fprintf(buf, "0 ") - - // Start time is relative to boot time, expressed in clock ticks. - fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.task.StartTime().Sub(s.task.Kernel().Timekeeper().BootTime()))) - - var vss, rss uint64 - s.task.WithMuLocked(func(t *kernel.Task) { - if mm := t.MemoryManager(); mm != nil { - vss = mm.VirtualMemorySize() - rss = mm.ResidentSetSize() - } - }) - fmt.Fprintf(buf, "%d %d ", vss, rss/usermem.PageSize) - - // rsslim. - fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Limits().Get(limits.Rss).Cur) - - fmt.Fprintf(buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */) - fmt.Fprintf(buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */) - fmt.Fprintf(buf, "0 0 " /* nswap cnswap */) - terminationSignal := linux.Signal(0) - if s.task == s.task.ThreadGroup().Leader() { - terminationSignal = s.task.ThreadGroup().TerminationSignal() - } - fmt.Fprintf(buf, "%d ", terminationSignal) - fmt.Fprintf(buf, "0 0 0 " /* processor rt_priority policy */) - fmt.Fprintf(buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */) - fmt.Fprintf(buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */) - fmt.Fprintf(buf, "0\n" /* exit_code */) - - return nil -} - -// statmData implements vfs.DynamicBytesSource for /proc/[pid]/statm. -// -// +stateify savable -type statmData struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -var _ dynamicInode = (*statmData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error { - var vss, rss uint64 - s.task.WithMuLocked(func(t *kernel.Task) { - if mm := t.MemoryManager(); mm != nil { - vss = mm.VirtualMemorySize() - rss = mm.ResidentSetSize() - } - }) - - fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize) - return nil -} - -// statusData implements vfs.DynamicBytesSource for /proc/[pid]/status. -// -// +stateify savable -type statusData struct { - kernfs.DynamicBytesFile - - task *kernel.Task - pidns *kernel.PIDNamespace -} - -var _ dynamicInode = (*statusData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "Name:\t%s\n", s.task.Name()) - fmt.Fprintf(buf, "State:\t%s\n", s.task.StateStatus()) - fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.task.ThreadGroup())) - fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.task)) - ppid := kernel.ThreadID(0) - if parent := s.task.Parent(); parent != nil { - ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup()) - } - fmt.Fprintf(buf, "PPid:\t%d\n", ppid) - tpid := kernel.ThreadID(0) - if tracer := s.task.Tracer(); tracer != nil { - tpid = s.pidns.IDOfTask(tracer) - } - fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid) - var fds int - var vss, rss, data uint64 - s.task.WithMuLocked(func(t *kernel.Task) { - if fdTable := t.FDTable(); fdTable != nil { - fds = fdTable.Size() - } - if mm := t.MemoryManager(); mm != nil { - vss = mm.VirtualMemorySize() - rss = mm.ResidentSetSize() - data = mm.VirtualDataSize() - } - }) - fmt.Fprintf(buf, "FDSize:\t%d\n", fds) - fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10) - fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10) - fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10) - fmt.Fprintf(buf, "Threads:\t%d\n", s.task.ThreadGroup().Count()) - creds := s.task.Credentials() - fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps) - fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps) - fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps) - fmt.Fprintf(buf, "CapBnd:\t%016x\n", creds.BoundingCaps) - fmt.Fprintf(buf, "Seccomp:\t%d\n", s.task.SeccompMode()) - // We unconditionally report a single NUMA node. See - // pkg/sentry/syscalls/linux/sys_mempolicy.go. - fmt.Fprintf(buf, "Mems_allowed:\t1\n") - fmt.Fprintf(buf, "Mems_allowed_list:\t0\n") - return nil -} - -// ioUsage is the /proc/[pid]/io and /proc/[pid]/task/[tid]/io data provider. -type ioUsage interface { - // IOUsage returns the io usage data. - IOUsage() *usage.IO -} - -// +stateify savable -type ioData struct { - kernfs.DynamicBytesFile - - ioUsage -} - -var _ dynamicInode = (*ioData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (i *ioData) Generate(ctx context.Context, buf *bytes.Buffer) error { - io := usage.IO{} - io.Accumulate(i.IOUsage()) - - fmt.Fprintf(buf, "char: %d\n", io.CharsRead) - fmt.Fprintf(buf, "wchar: %d\n", io.CharsWritten) - fmt.Fprintf(buf, "syscr: %d\n", io.ReadSyscalls) - fmt.Fprintf(buf, "syscw: %d\n", io.WriteSyscalls) - fmt.Fprintf(buf, "read_bytes: %d\n", io.BytesRead) - fmt.Fprintf(buf, "write_bytes: %d\n", io.BytesWritten) - fmt.Fprintf(buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled) - return nil -} - -// oomScoreAdj is a stub of the /proc/<pid>/oom_score_adj file. -// -// +stateify savable -type oomScoreAdj struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error { - if o.task.ExitState() == kernel.TaskExitDead { - return syserror.ESRCH - } - fmt.Fprintf(buf, "%d\n", o.task.OOMScoreAdj()) - return nil -} - -// Write implements vfs.WritableDynamicBytesSource.Write. -func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { - if src.NumBytes() == 0 { - return 0, nil - } - - // Limit input size so as not to impact performance if input size is large. - src = src.TakeFirst(usermem.PageSize - 1) - - var v int32 - n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) - if err != nil { - return 0, err - } - - if o.task.ExitState() == kernel.TaskExitDead { - return 0, syserror.ESRCH - } - if err := o.task.SetOOMScoreAdj(v); err != nil { - return 0, err - } - - return n, nil -} - -// exeSymlink is an symlink for the /proc/[pid]/exe file. -// -// +stateify savable -type exeSymlink struct { - kernfs.InodeAttrs - kernfs.InodeNoopRefCount - kernfs.InodeSymlink - - task *kernel.Task -} - -var _ kernfs.Inode = (*exeSymlink)(nil) - -func newExeSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry { - inode := &exeSymlink{task: task} - inode.Init(task.Credentials(), ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d -} - -// Readlink implements kernfs.Inode. -func (s *exeSymlink) Readlink(ctx context.Context) (string, error) { - if !kernel.ContextCanTrace(ctx, s.task, false) { - return "", syserror.EACCES - } - - // Pull out the executable for /proc/[pid]/exe. - exec, err := s.executable() - if err != nil { - return "", err - } - defer exec.DecRef() - - return exec.PathnameWithDeleted(ctx), nil -} - -// Getlink implements kernfs.Inode.Getlink. -func (s *exeSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) { - if !kernel.ContextCanTrace(ctx, s.task, false) { - return vfs.VirtualDentry{}, "", syserror.EACCES - } - - exec, err := s.executable() - if err != nil { - return vfs.VirtualDentry{}, "", err - } - defer exec.DecRef() - - vd := exec.(*fsbridge.VFSFile).FileDescription().VirtualDentry() - vd.IncRef() - return vd, "", nil -} - -func (s *exeSymlink) executable() (file fsbridge.File, err error) { - if err := checkTaskState(s.task); err != nil { - return nil, err - } - - s.task.WithMuLocked(func(t *kernel.Task) { - mm := t.MemoryManager() - if mm == nil { - err = syserror.EACCES - return - } - - // The MemoryManager may be destroyed, in which case - // MemoryManager.destroy will simply set the executable to nil - // (with locks held). - file = mm.Executable() - if file == nil { - err = syserror.ESRCH - } - }) - return -} - -// mountInfoData is used to implement /proc/[pid]/mountinfo. -// -// +stateify savable -type mountInfoData struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -var _ dynamicInode = (*mountInfoData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { - var fsctx *kernel.FSContext - i.task.WithMuLocked(func(t *kernel.Task) { - fsctx = t.FSContext() - }) - if fsctx == nil { - // The task has been destroyed. Nothing to show here. - return nil - } - rootDir := fsctx.RootDirectoryVFS2() - if !rootDir.Ok() { - // Root has been destroyed. Don't try to read mounts. - return nil - } - defer rootDir.DecRef() - i.task.Kernel().VFS().GenerateProcMountInfo(ctx, rootDir, buf) - return nil -} - -// mountsData is used to implement /proc/[pid]/mounts. -// -// +stateify savable -type mountsData struct { - kernfs.DynamicBytesFile - - task *kernel.Task -} - -var _ dynamicInode = (*mountsData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error { - var fsctx *kernel.FSContext - i.task.WithMuLocked(func(t *kernel.Task) { - fsctx = t.FSContext() - }) - if fsctx == nil { - // The task has been destroyed. Nothing to show here. - return nil - } - rootDir := fsctx.RootDirectoryVFS2() - if !rootDir.Ok() { - // Root has been destroyed. Don't try to read mounts. - return nil - } - defer rootDir.DecRef() - i.task.Kernel().VFS().GenerateProcMounts(ctx, rootDir, buf) - return nil -} - -type namespaceSymlink struct { - kernfs.StaticSymlink - - task *kernel.Task -} - -func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry { - // Namespace symlinks should contain the namespace name and the inode number - // for the namespace instance, so for example user:[123456]. We currently fake - // the inode number by sticking the symlink inode in its place. - target := fmt.Sprintf("%s:[%d]", ns, ino) - - inode := &namespaceSymlink{task: task} - // Note: credentials are overridden by taskOwnedInode. - inode.Init(task.Credentials(), ino, target) - - taskInode := &taskOwnedInode{Inode: inode, owner: task} - d := &kernfs.Dentry{} - d.Init(taskInode) - return d -} - -// Readlink implements Inode. -func (s *namespaceSymlink) Readlink(ctx context.Context) (string, error) { - if err := checkTaskState(s.task); err != nil { - return "", err - } - return s.StaticSymlink.Readlink(ctx) -} - -// Getlink implements Inode.Getlink. -func (s *namespaceSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) { - if err := checkTaskState(s.task); err != nil { - return vfs.VirtualDentry{}, "", err - } - return s.StaticSymlink.Getlink(ctx) -} diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go deleted file mode 100644 index 6595fcee6..000000000 --- a/pkg/sentry/fsimpl/proc/task_net.go +++ /dev/null @@ -1,808 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - "io" - "reflect" - "time" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/log" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/socket" - "gvisor.dev/gvisor/pkg/sentry/socket/unix" - "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/pkg/usermem" -) - -func newTaskNetDir(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry { - k := task.Kernel() - pidns := task.PIDNamespace() - root := auth.NewRootCredentials(pidns.UserNamespace()) - - var contents map[string]*kernfs.Dentry - if stack := task.NetworkNamespace().Stack(); stack != nil { - const ( - arp = "IP address HW type Flags HW address Mask Device\n" - netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n" - packet = "sk RefCnt Type Proto Iface R Rmem User Inode\n" - protocols = "protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n" - ptype = "Type Device Function\n" - upd6 = " sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n" - ) - psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond)) - - // TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task - // network namespace. - contents = map[string]*kernfs.Dentry{ - "dev": newDentry(root, inoGen.NextIno(), 0444, &netDevData{stack: stack}), - "snmp": newDentry(root, inoGen.NextIno(), 0444, &netSnmpData{stack: stack}), - - // The following files are simple stubs until they are implemented in - // netstack, if the file contains a header the stub is just the header - // otherwise it is an empty file. - "arp": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(arp)), - "netlink": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(netlink)), - "netstat": newDentry(root, inoGen.NextIno(), 0444, &netStatData{}), - "packet": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(packet)), - "protocols": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(protocols)), - - // Linux sets psched values to: nsec per usec, psched tick in ns, 1000000, - // high res timer ticks per sec (ClockGetres returns 1ns resolution). - "psched": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(psched)), - "ptype": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(ptype)), - "route": newDentry(root, inoGen.NextIno(), 0444, &netRouteData{stack: stack}), - "tcp": newDentry(root, inoGen.NextIno(), 0444, &netTCPData{kernel: k}), - "udp": newDentry(root, inoGen.NextIno(), 0444, &netUDPData{kernel: k}), - "unix": newDentry(root, inoGen.NextIno(), 0444, &netUnixData{kernel: k}), - } - - if stack.SupportsIPv6() { - contents["if_inet6"] = newDentry(root, inoGen.NextIno(), 0444, &ifinet6{stack: stack}) - contents["ipv6_route"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")) - contents["tcp6"] = newDentry(root, inoGen.NextIno(), 0444, &netTCP6Data{kernel: k}) - contents["udp6"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(upd6)) - } - } - - return newTaskOwnedDir(task, inoGen.NextIno(), 0555, contents) -} - -// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6. -// -// +stateify savable -type ifinet6 struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*ifinet6)(nil) - -func (n *ifinet6) contents() []string { - var lines []string - nics := n.stack.Interfaces() - for id, naddrs := range n.stack.InterfaceAddrs() { - nic, ok := nics[id] - if !ok { - // NIC was added after NICNames was called. We'll just ignore it. - continue - } - - for _, a := range naddrs { - // IPv6 only. - if a.Family != linux.AF_INET6 { - continue - } - - // Fields: - // IPv6 address displayed in 32 hexadecimal chars without colons - // Netlink device number (interface index) in hexadecimal (use nic id) - // Prefix length in hexadecimal - // Scope value (use 0) - // Interface flags - // Device name - lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name)) - } - } - return lines -} - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error { - for _, l := range n.contents() { - buf.WriteString(l) - } - return nil -} - -// netDevData implements vfs.DynamicBytesSource for /proc/net/dev. -// -// +stateify savable -type netDevData struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*netDevData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *netDevData) Generate(ctx context.Context, buf *bytes.Buffer) error { - interfaces := n.stack.Interfaces() - buf.WriteString("Inter-| Receive | Transmit\n") - buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n") - - for _, i := range interfaces { - // Implements the same format as - // net/core/net-procfs.c:dev_seq_printf_stats. - var stats inet.StatDev - if err := n.stack.Statistics(&stats, i.Name); err != nil { - log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err) - continue - } - fmt.Fprintf( - buf, - "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n", - i.Name, - // Received - stats[0], // bytes - stats[1], // packets - stats[2], // errors - stats[3], // dropped - stats[4], // fifo - stats[5], // frame - stats[6], // compressed - stats[7], // multicast - // Transmitted - stats[8], // bytes - stats[9], // packets - stats[10], // errors - stats[11], // dropped - stats[12], // fifo - stats[13], // frame - stats[14], // compressed - stats[15], // multicast - ) - } - - return nil -} - -// netUnixData implements vfs.DynamicBytesSource for /proc/net/unix. -// -// +stateify savable -type netUnixData struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netUnixData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n") - for _, se := range n.kernel.ListSockets() { - s := se.SockVFS2 - if !s.TryIncRef() { - log.Debugf("Couldn't get reference on %v in socket table, racing with destruction?", s) - continue - } - if family, _, _ := s.Impl().(socket.SocketVFS2).Type(); family != linux.AF_UNIX { - s.DecRef() - // Not a unix socket. - continue - } - sops := s.Impl().(*unix.SocketVFS2) - - addr, err := sops.Endpoint().GetLocalAddress() - if err != nil { - log.Warningf("Failed to retrieve socket name from %+v: %v", s, err) - addr.Addr = "<unknown>" - } - - sockFlags := 0 - if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok { - if ce.Listening() { - // For unix domain sockets, linux reports a single flag - // value if the socket is listening, of __SO_ACCEPTCON. - sockFlags = linux.SO_ACCEPTCON - } - } - - // Get inode number. - var ino uint64 - stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_INO}) - if statErr != nil || stat.Mask&linux.STATX_INO == 0 { - log.Warningf("Failed to retrieve ino for socket file: %v", statErr) - } else { - ino = stat.Ino - } - - // In the socket entry below, the value for the 'Num' field requires - // some consideration. Linux prints the address to the struct - // unix_sock representing a socket in the kernel, but may redact the - // value for unprivileged users depending on the kptr_restrict - // sysctl. - // - // One use for this field is to allow a privileged user to - // introspect into the kernel memory to determine information about - // a socket not available through procfs, such as the socket's peer. - // - // In gvisor, returning a pointer to our internal structures would - // be pointless, as it wouldn't match the memory layout for struct - // unix_sock, making introspection difficult. We could populate a - // struct unix_sock with the appropriate data, but even that - // requires consideration for which kernel version to emulate, as - // the definition of this struct changes over time. - // - // For now, we always redact this pointer. - fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %8d", - (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct. - s.Refs()-1, // RefCount, don't count our own ref. - 0, // Protocol, always 0 for UDS. - sockFlags, // Flags. - sops.Endpoint().Type(), // Type. - sops.State(), // State. - ino, // Inode. - ) - - // Path - if len(addr.Addr) != 0 { - if addr.Addr[0] == 0 { - // Abstract path. - fmt.Fprintf(buf, " @%s", string(addr.Addr[1:])) - } else { - fmt.Fprintf(buf, " %s", string(addr.Addr)) - } - } - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - return nil -} - -func networkToHost16(n uint16) uint16 { - // n is in network byte order, so is big-endian. The most-significant byte - // should be stored in the lower address. - // - // We manually inline binary.BigEndian.Uint16() because Go does not support - // non-primitive consts, so binary.BigEndian is a (mutable) var, so calls to - // binary.BigEndian.Uint16() require a read of binary.BigEndian and an - // interface method call, defeating inlining. - buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)} - return usermem.ByteOrder.Uint16(buf[:]) -} - -func writeInetAddr(w io.Writer, family int, i linux.SockAddr) { - switch family { - case linux.AF_INET: - var a linux.SockAddrInet - if i != nil { - a = *i.(*linux.SockAddrInet) - } - - // linux.SockAddrInet.Port is stored in the network byte order and is - // printed like a number in host byte order. Note that all numbers in host - // byte order are printed with the most-significant byte first when - // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux. - port := networkToHost16(a.Port) - - // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order - // (i.e. most-significant byte in index 0). Linux represents this as a - // __be32 which is a typedef for an unsigned int, and is printed with - // %X. This means that for a little-endian machine, Linux prints the - // least-significant byte of the address first. To emulate this, we first - // invert the byte order for the address using usermem.ByteOrder.Uint32, - // which makes it have the equivalent encoding to a __be32 on a little - // endian machine. Note that this operation is a no-op on a big endian - // machine. Then similar to Linux, we format it with %X, which will print - // the most-significant byte of the __be32 address first, which is now - // actually the least-significant byte of the original address in - // linux.SockAddrInet.Addr on little endian machines, due to the conversion. - addr := usermem.ByteOrder.Uint32(a.Addr[:]) - - fmt.Fprintf(w, "%08X:%04X ", addr, port) - case linux.AF_INET6: - var a linux.SockAddrInet6 - if i != nil { - a = *i.(*linux.SockAddrInet6) - } - - port := networkToHost16(a.Port) - addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4]) - addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8]) - addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12]) - addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16]) - fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port) - } -} - -func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error { - // t may be nil here if our caller is not part of a task goroutine. This can - // happen for example if we're here for "sentryctl cat". When t is nil, - // degrade gracefully and retrieve what we can. - t := kernel.TaskFromContext(ctx) - - for _, se := range k.ListSockets() { - s := se.SockVFS2 - if !s.TryIncRef() { - log.Debugf("Couldn't get reference on %v in socket table, racing with destruction?", s) - continue - } - sops, ok := s.Impl().(socket.SocketVFS2) - if !ok { - panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s)) - } - if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) { - s.DecRef() - // Not tcp4 sockets. - continue - } - - // Linux's documentation for the fields below can be found at - // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt. - // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock(). - // Note that the header doesn't contain labels for all the fields. - - // Field: sl; entry number. - fmt.Fprintf(buf, "%4d: ", se.ID) - - // Field: local_adddress. - var localAddr linux.SockAddr - if t != nil { - if local, _, err := sops.GetSockName(t); err == nil { - localAddr = local - } - } - writeInetAddr(buf, family, localAddr) - - // Field: rem_address. - var remoteAddr linux.SockAddr - if t != nil { - if remote, _, err := sops.GetPeerName(t); err == nil { - remoteAddr = remote - } - } - writeInetAddr(buf, family, remoteAddr) - - // Field: state; socket state. - fmt.Fprintf(buf, "%02X ", sops.State()) - - // Field: tx_queue, rx_queue; number of packets in the transmit and - // receive queue. Unimplemented. - fmt.Fprintf(buf, "%08X:%08X ", 0, 0) - - // Field: tr, tm->when; timer active state and number of jiffies - // until timer expires. Unimplemented. - fmt.Fprintf(buf, "%02X:%08X ", 0, 0) - - // Field: retrnsmt; number of unrecovered RTO timeouts. - // Unimplemented. - fmt.Fprintf(buf, "%08X ", 0) - - stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_UID | linux.STATX_INO}) - - // Field: uid. - if statErr != nil || stat.Mask&linux.STATX_UID == 0 { - log.Warningf("Failed to retrieve uid for socket file: %v", statErr) - fmt.Fprintf(buf, "%5d ", 0) - } else { - creds := auth.CredentialsFromContext(ctx) - fmt.Fprintf(buf, "%5d ", uint32(auth.KUID(stat.UID).In(creds.UserNamespace).OrOverflow())) - } - - // Field: timeout; number of unanswered 0-window probes. - // Unimplemented. - fmt.Fprintf(buf, "%8d ", 0) - - // Field: inode. - if statErr != nil || stat.Mask&linux.STATX_INO == 0 { - log.Warningf("Failed to retrieve inode for socket file: %v", statErr) - fmt.Fprintf(buf, "%8d ", 0) - } else { - fmt.Fprintf(buf, "%8d ", stat.Ino) - } - - // Field: refcount. Don't count the ref we obtain while deferencing - // the weakref to this socket. - fmt.Fprintf(buf, "%d ", s.Refs()-1) - - // Field: Socket struct address. Redacted due to the same reason as - // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. - fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) - - // Field: retransmit timeout. Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: predicted tick of soft clock (delayed ACK control data). - // Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: (ack.quick<<1)|ack.pingpong, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: sending congestion window, Unimplemented. - fmt.Fprintf(buf, "%d ", 0) - - // Field: Slow start size threshold, -1 if threshold >= 0xFFFF. - // Unimplemented, report as large threshold. - fmt.Fprintf(buf, "%d", -1) - - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - - return nil -} - -// netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp. -// -// +stateify savable -type netTCPData struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netTCPData)(nil) - -func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n") - return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET) -} - -// netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6. -// -// +stateify savable -type netTCP6Data struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netTCP6Data)(nil) - -func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n") - return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6) -} - -// netUDPData implements vfs.DynamicBytesSource for /proc/net/udp. -// -// +stateify savable -type netUDPData struct { - kernfs.DynamicBytesFile - - kernel *kernel.Kernel -} - -var _ dynamicInode = (*netUDPData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // t may be nil here if our caller is not part of a task goroutine. This can - // happen for example if we're here for "sentryctl cat". When t is nil, - // degrade gracefully and retrieve what we can. - t := kernel.TaskFromContext(ctx) - - for _, se := range d.kernel.ListSockets() { - s := se.SockVFS2 - if !s.TryIncRef() { - log.Debugf("Couldn't get reference on %v in socket table, racing with destruction?", s) - continue - } - sops, ok := s.Impl().(socket.SocketVFS2) - if !ok { - panic(fmt.Sprintf("Found non-socket file in socket table: %+v", s)) - } - if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM { - s.DecRef() - // Not udp4 socket. - continue - } - - // For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock(). - - // Field: sl; entry number. - fmt.Fprintf(buf, "%5d: ", se.ID) - - // Field: local_adddress. - var localAddr linux.SockAddrInet - if t != nil { - if local, _, err := sops.GetSockName(t); err == nil { - localAddr = *local.(*linux.SockAddrInet) - } - } - writeInetAddr(buf, linux.AF_INET, &localAddr) - - // Field: rem_address. - var remoteAddr linux.SockAddrInet - if t != nil { - if remote, _, err := sops.GetPeerName(t); err == nil { - remoteAddr = *remote.(*linux.SockAddrInet) - } - } - writeInetAddr(buf, linux.AF_INET, &remoteAddr) - - // Field: state; socket state. - fmt.Fprintf(buf, "%02X ", sops.State()) - - // Field: tx_queue, rx_queue; number of packets in the transmit and - // receive queue. Unimplemented. - fmt.Fprintf(buf, "%08X:%08X ", 0, 0) - - // Field: tr, tm->when. Always 0 for UDP. - fmt.Fprintf(buf, "%02X:%08X ", 0, 0) - - // Field: retrnsmt. Always 0 for UDP. - fmt.Fprintf(buf, "%08X ", 0) - - stat, statErr := s.Stat(ctx, vfs.StatOptions{Mask: linux.STATX_UID | linux.STATX_INO}) - - // Field: uid. - if statErr != nil || stat.Mask&linux.STATX_UID == 0 { - log.Warningf("Failed to retrieve uid for socket file: %v", statErr) - fmt.Fprintf(buf, "%5d ", 0) - } else { - creds := auth.CredentialsFromContext(ctx) - fmt.Fprintf(buf, "%5d ", uint32(auth.KUID(stat.UID).In(creds.UserNamespace).OrOverflow())) - } - - // Field: timeout. Always 0 for UDP. - fmt.Fprintf(buf, "%8d ", 0) - - // Field: inode. - if statErr != nil || stat.Mask&linux.STATX_INO == 0 { - log.Warningf("Failed to retrieve inode for socket file: %v", statErr) - fmt.Fprintf(buf, "%8d ", 0) - } else { - fmt.Fprintf(buf, "%8d ", stat.Ino) - } - - // Field: ref; reference count on the socket inode. Don't count the ref - // we obtain while deferencing the weakref to this socket. - fmt.Fprintf(buf, "%d ", s.Refs()-1) - - // Field: Socket struct address. Redacted due to the same reason as - // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData. - fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil)) - - // Field: drops; number of dropped packets. Unimplemented. - fmt.Fprintf(buf, "%d", 0) - - fmt.Fprintf(buf, "\n") - - s.DecRef() - } - return nil -} - -// netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp. -// -// +stateify savable -type netSnmpData struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*netSnmpData)(nil) - -type snmpLine struct { - prefix string - header string -} - -var snmp = []snmpLine{ - { - prefix: "Ip", - header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates", - }, - { - prefix: "Icmp", - header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps", - }, - { - prefix: "IcmpMsg", - }, - { - prefix: "Tcp", - header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors", - }, - { - prefix: "Udp", - header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", - }, - { - prefix: "UdpLite", - header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti", - }, -} - -func toSlice(a interface{}) []uint64 { - v := reflect.Indirect(reflect.ValueOf(a)) - return v.Slice(0, v.Len()).Interface().([]uint64) -} - -func sprintSlice(s []uint64) string { - if len(s) == 0 { - return "" - } - r := fmt.Sprint(s) - return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice. -} - -// Generate implements vfs.DynamicBytesSource. -func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error { - types := []interface{}{ - &inet.StatSNMPIP{}, - &inet.StatSNMPICMP{}, - nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats. - &inet.StatSNMPTCP{}, - &inet.StatSNMPUDP{}, - &inet.StatSNMPUDPLite{}, - } - for i, stat := range types { - line := snmp[i] - if stat == nil { - fmt.Fprintf(buf, "%s:\n", line.prefix) - fmt.Fprintf(buf, "%s:\n", line.prefix) - continue - } - if err := d.stack.Statistics(stat, line.prefix); err != nil { - if err == syserror.EOPNOTSUPP { - log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) - } else { - log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err) - } - } - - fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header) - - if line.prefix == "Tcp" { - tcp := stat.(*inet.StatSNMPTCP) - // "Tcp" needs special processing because MaxConn is signed. RFC 2012. - fmt.Fprintf(buf, "%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:])) - } else { - fmt.Fprintf(buf, "%s: %s\n", line.prefix, sprintSlice(toSlice(stat))) - } - } - return nil -} - -// netRouteData implements vfs.DynamicBytesSource for /proc/net/route. -// -// +stateify savable -type netRouteData struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*netRouteData)(nil) - -// Generate implements vfs.DynamicBytesSource. -// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show. -func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT") - - interfaces := d.stack.Interfaces() - for _, rt := range d.stack.RouteTable() { - // /proc/net/route only includes ipv4 routes. - if rt.Family != linux.AF_INET { - continue - } - - // /proc/net/route does not include broadcast or multicast routes. - if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST { - continue - } - - iface, ok := interfaces[rt.OutputInterface] - if !ok || iface.Name == "lo" { - continue - } - - var ( - gw uint32 - prefix uint32 - flags = linux.RTF_UP - ) - if len(rt.GatewayAddr) == header.IPv4AddressSize { - flags |= linux.RTF_GATEWAY - gw = usermem.ByteOrder.Uint32(rt.GatewayAddr) - } - if len(rt.DstAddr) == header.IPv4AddressSize { - prefix = usermem.ByteOrder.Uint32(rt.DstAddr) - } - l := fmt.Sprintf( - "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d", - iface.Name, - prefix, - gw, - flags, - 0, // RefCnt. - 0, // Use. - 0, // Metric. - (uint32(1)<<rt.DstLen)-1, - 0, // MTU. - 0, // Window. - 0, // RTT. - ) - fmt.Fprintf(buf, "%-127s\n", l) - } - return nil -} - -// netStatData implements vfs.DynamicBytesSource for /proc/net/netstat. -// -// +stateify savable -type netStatData struct { - kernfs.DynamicBytesFile - - stack inet.Stack -} - -var _ dynamicInode = (*netStatData)(nil) - -// Generate implements vfs.DynamicBytesSource. -// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show. -func (d *netStatData) Generate(ctx context.Context, buf *bytes.Buffer) error { - buf.WriteString("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed " + - "EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps " + - "LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive " + - "PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost " + - "ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog " + - "TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser " + - "TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging " + - "TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo " + - "TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit " + - "TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans " + - "TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes " + - "TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail " + - "TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent " + - "TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose " + - "TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed " + - "TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld " + - "TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected " + - "TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback " + - "TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter " + - "TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail " + - "TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK " + - "TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail " + - "TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow " + - "TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets " + - "TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv " + - "TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect " + - "TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd " + - "TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq " + - "TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge " + - "TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n") - return nil -} diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go deleted file mode 100644 index 9f2ef8200..000000000 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ /dev/null @@ -1,257 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "sort" - "strconv" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" -) - -const ( - selfName = "self" - threadSelfName = "thread-self" -) - -// InoGenerator generates unique inode numbers for a given filesystem. -type InoGenerator interface { - NextIno() uint64 -} - -// tasksInode represents the inode for /proc/ directory. -// -// +stateify savable -type tasksInode struct { - kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren - kernfs.InodeAttrs - kernfs.OrderedChildren - kernfs.AlwaysValid - - inoGen InoGenerator - pidns *kernel.PIDNamespace - - // '/proc/self' and '/proc/thread-self' have custom directory offsets in - // Linux. So handle them outside of OrderedChildren. - selfSymlink *vfs.Dentry - threadSelfSymlink *vfs.Dentry - - // cgroupControllers is a map of controller name to directory in the - // cgroup hierarchy. These controllers are immutable and will be listed - // in /proc/pid/cgroup if not nil. - cgroupControllers map[string]string -} - -var _ kernfs.Inode = (*tasksInode)(nil) - -func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) { - root := auth.NewRootCredentials(pidns.UserNamespace()) - contents := map[string]*kernfs.Dentry{ - "cpuinfo": newDentry(root, inoGen.NextIno(), 0444, newStaticFileSetStat(cpuInfoData(k))), - "filesystems": newDentry(root, inoGen.NextIno(), 0444, &filesystemsData{}), - "loadavg": newDentry(root, inoGen.NextIno(), 0444, &loadavgData{}), - "sys": newSysDir(root, inoGen, k), - "meminfo": newDentry(root, inoGen.NextIno(), 0444, &meminfoData{}), - "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/mounts"), - "net": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/net"), - "stat": newDentry(root, inoGen.NextIno(), 0444, &statData{}), - "uptime": newDentry(root, inoGen.NextIno(), 0444, &uptimeData{}), - "version": newDentry(root, inoGen.NextIno(), 0444, &versionData{}), - } - - inode := &tasksInode{ - pidns: pidns, - inoGen: inoGen, - selfSymlink: newSelfSymlink(root, inoGen.NextIno(), pidns).VFSDentry(), - threadSelfSymlink: newThreadSelfSymlink(root, inoGen.NextIno(), pidns).VFSDentry(), - cgroupControllers: cgroupControllers, - } - inode.InodeAttrs.Init(root, inoGen.NextIno(), linux.ModeDirectory|0555) - - dentry := &kernfs.Dentry{} - dentry.Init(inode) - - inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - links := inode.OrderedChildren.Populate(dentry, contents) - inode.IncLinks(links) - - return inode, dentry -} - -// Lookup implements kernfs.inodeDynamicLookup. -func (i *tasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) { - // Try to lookup a corresponding task. - tid, err := strconv.ParseUint(name, 10, 64) - if err != nil { - // If it failed to parse, check if it's one of the special handled files. - switch name { - case selfName: - return i.selfSymlink, nil - case threadSelfName: - return i.threadSelfSymlink, nil - } - return nil, syserror.ENOENT - } - - task := i.pidns.TaskWithID(kernel.ThreadID(tid)) - if task == nil { - return nil, syserror.ENOENT - } - - taskDentry := newTaskInode(i.inoGen, task, i.pidns, true, i.cgroupControllers) - return taskDentry.VFSDentry(), nil -} - -// IterDirents implements kernfs.inodeDynamicLookup. -func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) { - // fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256 - const FIRST_PROCESS_ENTRY = 256 - - // Use maxTaskID to shortcut searches that will result in 0 entries. - const maxTaskID = kernel.TasksLimit + 1 - if offset >= maxTaskID { - return offset, nil - } - - // According to Linux (fs/proc/base.c:proc_pid_readdir()), process directories - // start at offset FIRST_PROCESS_ENTRY with '/proc/self', followed by - // '/proc/thread-self' and then '/proc/[pid]'. - if offset < FIRST_PROCESS_ENTRY { - offset = FIRST_PROCESS_ENTRY - } - - if offset == FIRST_PROCESS_ENTRY { - dirent := vfs.Dirent{ - Name: selfName, - Type: linux.DT_LNK, - Ino: i.inoGen.NextIno(), - NextOff: offset + 1, - } - if err := cb.Handle(dirent); err != nil { - return offset, err - } - offset++ - } - if offset == FIRST_PROCESS_ENTRY+1 { - dirent := vfs.Dirent{ - Name: threadSelfName, - Type: linux.DT_LNK, - Ino: i.inoGen.NextIno(), - NextOff: offset + 1, - } - if err := cb.Handle(dirent); err != nil { - return offset, err - } - offset++ - } - - // Collect all tasks that TGIDs are greater than the offset specified. Per - // Linux we only include in directory listings if it's the leader. But for - // whatever crazy reason, you can still walk to the given node. - var tids []int - startTid := offset - FIRST_PROCESS_ENTRY - 2 - for _, tg := range i.pidns.ThreadGroups() { - tid := i.pidns.IDOfThreadGroup(tg) - if int64(tid) < startTid { - continue - } - if leader := tg.Leader(); leader != nil { - tids = append(tids, int(tid)) - } - } - - if len(tids) == 0 { - return offset, nil - } - - sort.Ints(tids) - for _, tid := range tids { - dirent := vfs.Dirent{ - Name: strconv.FormatUint(uint64(tid), 10), - Type: linux.DT_DIR, - Ino: i.inoGen.NextIno(), - NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1, - } - if err := cb.Handle(dirent); err != nil { - return offset, err - } - offset++ - } - return maxTaskID, nil -} - -// Open implements kernfs.Inode. -func (i *tasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { - fd := &kernfs.GenericDirectoryFD{} - fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts) - return fd.VFSFileDescription(), nil -} - -func (i *tasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) { - stat, err := i.InodeAttrs.Stat(vsfs, opts) - if err != nil { - return linux.Statx{}, err - } - - if opts.Mask&linux.STATX_NLINK != 0 { - // Add dynamic children to link count. - for _, tg := range i.pidns.ThreadGroups() { - if leader := tg.Leader(); leader != nil { - stat.Nlink++ - } - } - } - - return stat, nil -} - -// staticFileSetStat implements a special static file that allows inode -// attributes to be set. This is to support /proc files that are readonly, but -// allow attributes to be set. -type staticFileSetStat struct { - dynamicBytesFileSetAttr - vfs.StaticData -} - -var _ dynamicInode = (*staticFileSetStat)(nil) - -func newStaticFileSetStat(data string) *staticFileSetStat { - return &staticFileSetStat{StaticData: vfs.StaticData{Data: data}} -} - -func cpuInfoData(k *kernel.Kernel) string { - features := k.FeatureSet() - if features == nil { - // Kernel is always initialized with a FeatureSet. - panic("cpuinfo read with nil FeatureSet") - } - var buf bytes.Buffer - for i, max := uint(0), k.ApplicationCores(); i < max; i++ { - features.WriteCPUInfoTo(i, &buf) - } - return buf.String() -} - -func shmData(v uint64) dynamicInode { - return newStaticFile(strconv.FormatUint(v, 10)) -} diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go deleted file mode 100644 index 4621e2de0..000000000 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ /dev/null @@ -1,380 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - "strconv" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/kernel/time" - "gvisor.dev/gvisor/pkg/sentry/usage" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -type selfSymlink struct { - kernfs.InodeAttrs - kernfs.InodeNoopRefCount - kernfs.InodeSymlink - - pidns *kernel.PIDNamespace -} - -var _ kernfs.Inode = (*selfSymlink)(nil) - -func newSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry { - inode := &selfSymlink{pidns: pidns} - inode.Init(creds, ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d -} - -func (s *selfSymlink) Readlink(ctx context.Context) (string, error) { - t := kernel.TaskFromContext(ctx) - if t == nil { - // Who is reading this link? - return "", syserror.EINVAL - } - tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) - if tgid == 0 { - return "", syserror.ENOENT - } - return strconv.FormatUint(uint64(tgid), 10), nil -} - -func (s *selfSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) { - target, err := s.Readlink(ctx) - return vfs.VirtualDentry{}, target, err -} - -// SetStat implements Inode.SetStat not allowing inode attributes to be changed. -func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM -} - -type threadSelfSymlink struct { - kernfs.InodeAttrs - kernfs.InodeNoopRefCount - kernfs.InodeSymlink - - pidns *kernel.PIDNamespace -} - -var _ kernfs.Inode = (*threadSelfSymlink)(nil) - -func newThreadSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry { - inode := &threadSelfSymlink{pidns: pidns} - inode.Init(creds, ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d -} - -func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) { - t := kernel.TaskFromContext(ctx) - if t == nil { - // Who is reading this link? - return "", syserror.EINVAL - } - tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup()) - tid := s.pidns.IDOfTask(t) - if tid == 0 || tgid == 0 { - return "", syserror.ENOENT - } - return fmt.Sprintf("%d/task/%d", tgid, tid), nil -} - -func (s *threadSelfSymlink) Getlink(ctx context.Context) (vfs.VirtualDentry, string, error) { - target, err := s.Readlink(ctx) - return vfs.VirtualDentry{}, target, err -} - -// SetStat implements Inode.SetStat not allowing inode attributes to be changed. -func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { - return syserror.EPERM -} - -// dynamicBytesFileSetAttr implements a special file that allows inode -// attributes to be set. This is to support /proc files that are readonly, but -// allow attributes to be set. -type dynamicBytesFileSetAttr struct { - kernfs.DynamicBytesFile -} - -// SetStat implements Inode.SetStat. -func (d *dynamicBytesFileSetAttr) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { - return d.DynamicBytesFile.InodeAttrs.SetStat(ctx, fs, creds, opts) -} - -// cpuStats contains the breakdown of CPU time for /proc/stat. -type cpuStats struct { - // user is time spent in userspace tasks with non-positive niceness. - user uint64 - - // nice is time spent in userspace tasks with positive niceness. - nice uint64 - - // system is time spent in non-interrupt kernel context. - system uint64 - - // idle is time spent idle. - idle uint64 - - // ioWait is time spent waiting for IO. - ioWait uint64 - - // irq is time spent in interrupt context. - irq uint64 - - // softirq is time spent in software interrupt context. - softirq uint64 - - // steal is involuntary wait time. - steal uint64 - - // guest is time spent in guests with non-positive niceness. - guest uint64 - - // guestNice is time spent in guests with positive niceness. - guestNice uint64 -} - -// String implements fmt.Stringer. -func (c cpuStats) String() string { - return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice) -} - -// statData implements vfs.DynamicBytesSource for /proc/stat. -// -// +stateify savable -type statData struct { - dynamicBytesFileSetAttr -} - -var _ dynamicInode = (*statData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (*statData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // TODO(b/37226836): We currently export only zero CPU stats. We could - // at least provide some aggregate stats. - var cpu cpuStats - fmt.Fprintf(buf, "cpu %s\n", cpu) - - k := kernel.KernelFromContext(ctx) - for c, max := uint(0), k.ApplicationCores(); c < max; c++ { - fmt.Fprintf(buf, "cpu%d %s\n", c, cpu) - } - - // The total number of interrupts is dependent on the CPUs and PCI - // devices on the system. See arch_probe_nr_irqs. - // - // Since we don't report real interrupt stats, just choose an arbitrary - // value from a representative VM. - const numInterrupts = 256 - - // The Kernel doesn't handle real interrupts, so report all zeroes. - // TODO(b/37226836): We could count page faults as #PF. - fmt.Fprintf(buf, "intr 0") // total - for i := 0; i < numInterrupts; i++ { - fmt.Fprintf(buf, " 0") - } - fmt.Fprintf(buf, "\n") - - // Total number of context switches. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "ctxt 0\n") - - // CLOCK_REALTIME timestamp from boot, in seconds. - fmt.Fprintf(buf, "btime %d\n", k.Timekeeper().BootTime().Seconds()) - - // Total number of clones. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "processes 0\n") - - // Number of runnable tasks. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "procs_running 0\n") - - // Number of tasks waiting on IO. - // TODO(b/37226836): Count this. - fmt.Fprintf(buf, "procs_blocked 0\n") - - // Number of each softirq handled. - fmt.Fprintf(buf, "softirq 0") // total - for i := 0; i < linux.NumSoftIRQ; i++ { - fmt.Fprintf(buf, " 0") - } - fmt.Fprintf(buf, "\n") - return nil -} - -// loadavgData backs /proc/loadavg. -// -// +stateify savable -type loadavgData struct { - dynamicBytesFileSetAttr -} - -var _ dynamicInode = (*loadavgData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (*loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error { - // TODO(b/62345059): Include real data in fields. - // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods. - // Column 4-5: currently running processes and the total number of processes. - // Column 6: the last process ID used. - fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0) - return nil -} - -// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo. -// -// +stateify savable -type meminfoData struct { - dynamicBytesFileSetAttr -} - -var _ dynamicInode = (*meminfoData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { - k := kernel.KernelFromContext(ctx) - mf := k.MemoryFile() - mf.UpdateUsage() - snapshot, totalUsage := usage.MemoryAccounting.Copy() - totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage) - anon := snapshot.Anonymous + snapshot.Tmpfs - file := snapshot.PageCache + snapshot.Mapped - // We don't actually have active/inactive LRUs, so just make up numbers. - activeFile := (file / 2) &^ (usermem.PageSize - 1) - inactiveFile := file - activeFile - - fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024) - memFree := (totalSize - totalUsage) / 1024 - // We use MemFree as MemAvailable because we don't swap. - // TODO(rahat): When reclaim is implemented the value of MemAvailable - // should change. - fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree) - fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree) - fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices - fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024) - // Emulate a system with no swap, which disables inactivation of anon pages. - fmt.Fprintf(buf, "SwapCache: 0 kB\n") - fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024) - fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024) - fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024) - fmt.Fprintf(buf, "Inactive(anon): 0 kB\n") - fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024) - fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024) - fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263) - fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263) - fmt.Fprintf(buf, "SwapTotal: 0 kB\n") - fmt.Fprintf(buf, "SwapFree: 0 kB\n") - fmt.Fprintf(buf, "Dirty: 0 kB\n") - fmt.Fprintf(buf, "Writeback: 0 kB\n") - fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024) - fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know - fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024) - return nil -} - -// uptimeData implements vfs.DynamicBytesSource for /proc/uptime. -// -// +stateify savable -type uptimeData struct { - dynamicBytesFileSetAttr -} - -var _ dynamicInode = (*uptimeData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error { - k := kernel.KernelFromContext(ctx) - now := time.NowFromContext(ctx) - - // Pretend that we've spent zero time sleeping (second number). - fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds()) - return nil -} - -// versionData implements vfs.DynamicBytesSource for /proc/version. -// -// +stateify savable -type versionData struct { - dynamicBytesFileSetAttr -} - -var _ dynamicInode = (*versionData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (*versionData) Generate(ctx context.Context, buf *bytes.Buffer) error { - k := kernel.KernelFromContext(ctx) - init := k.GlobalInit() - if init == nil { - // Attempted to read before the init Task is created. This can - // only occur during startup, which should never need to read - // this file. - panic("Attempted to read version before initial Task is available") - } - - // /proc/version takes the form: - // - // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST) - // (COMPILER_VERSION) VERSION" - // - // where: - // - SYSNAME, RELEASE, and VERSION are the same as returned by - // sys_utsname - // - COMPILE_USER is the user that build the kernel - // - COMPILE_HOST is the hostname of the machine on which the kernel - // was built - // - COMPILER_VERSION is the version reported by the building compiler - // - // Since we don't really want to expose build information to - // applications, those fields are omitted. - // - // FIXME(mpratt): Using Version from the init task SyscallTable - // disregards the different version a task may have (e.g., in a uts - // namespace). - ver := init.Leader().SyscallTable().Version - fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version) - return nil -} - -// filesystemsData backs /proc/filesystems. -// -// +stateify savable -type filesystemsData struct { - kernfs.DynamicBytesFile -} - -var _ dynamicInode = (*filesystemsData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error { - k := kernel.KernelFromContext(ctx) - k.VFS().GenerateProcFilesystems(buf) - return nil -} diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go deleted file mode 100644 index 3d5dc463c..000000000 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ /dev/null @@ -1,211 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "fmt" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" - "gvisor.dev/gvisor/pkg/sentry/inet" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -// newSysDir returns the dentry corresponding to /proc/sys directory. -func newSysDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry { - return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ - "kernel": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ - "hostname": newDentry(root, inoGen.NextIno(), 0444, &hostnameData{}), - "shmall": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMALL)), - "shmmax": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMAX)), - "shmmni": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMNI)), - }), - "vm": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ - "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{}), - "overcommit_memory": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0\n")), - }), - "net": newSysNetDir(root, inoGen, k), - }) -} - -// newSysNetDir returns the dentry corresponding to /proc/sys/net directory. -func newSysNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry { - var contents map[string]*kernfs.Dentry - - // TODO(gvisor.dev/issue/1833): Support for using the network stack in the - // network namespace of the calling process. - if stack := k.RootNetworkNamespace().Stack(); stack != nil { - contents = map[string]*kernfs.Dentry{ - "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ - "tcp_sack": newDentry(root, inoGen.NextIno(), 0644, &tcpSackData{stack: stack}), - - // The following files are simple stubs until they are implemented in - // netstack, most of these files are configuration related. We use the - // value closest to the actual netstack behavior or any empty file, all - // of these files will have mode 0444 (read-only for all users). - "ip_local_port_range": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("16000 65535")), - "ip_local_reserved_ports": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), - "ipfrag_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("30")), - "ip_nonlocal_bind": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "ip_no_pmtu_disc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), - - // tcp_allowed_congestion_control tell the user what they are able to - // do as an unprivledged process so we leave it empty. - "tcp_allowed_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), - "tcp_available_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")), - "tcp_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")), - - // Many of the following stub files are features netstack doesn't - // support. The unsupported features return "0" to indicate they are - // disabled. - "tcp_base_mss": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1280")), - "tcp_dsack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_early_retrans": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_fack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_fastopen": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_fastopen_key": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")), - "tcp_invalid_ratelimit": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_keepalive_intvl": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_keepalive_probes": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_keepalive_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("7200")), - "tcp_mtu_probing": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_no_metrics_save": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), - "tcp_probe_interval": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_probe_threshold": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "tcp_retries1": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")), - "tcp_retries2": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("15")), - "tcp_rfc1337": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), - "tcp_slow_start_after_idle": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), - "tcp_synack_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")), - "tcp_syn_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")), - "tcp_timestamps": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")), - }), - "core": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ - "default_qdisc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("pfifo_fast")), - "message_burst": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("10")), - "message_cost": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")), - "optmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")), - "rmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), - "rmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), - "somaxconn": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("128")), - "wmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), - "wmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")), - }), - } - } - - return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{ - "net": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, contents), - }) -} - -// mmapMinAddrData implements vfs.DynamicBytesSource for -// /proc/sys/vm/mmap_min_addr. -// -// +stateify savable -type mmapMinAddrData struct { - kernfs.DynamicBytesFile - - k *kernel.Kernel -} - -var _ dynamicInode = (*mmapMinAddrData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error { - fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress()) - return nil -} - -// hostnameData implements vfs.DynamicBytesSource for /proc/sys/kernel/hostname. -// -// +stateify savable -type hostnameData struct { - kernfs.DynamicBytesFile -} - -var _ dynamicInode = (*hostnameData)(nil) - -// Generate implements vfs.DynamicBytesSource.Generate. -func (*hostnameData) Generate(ctx context.Context, buf *bytes.Buffer) error { - utsns := kernel.UTSNamespaceFromContext(ctx) - buf.WriteString(utsns.HostName()) - buf.WriteString("\n") - return nil -} - -// tcpSackData implements vfs.WritableDynamicBytesSource for -// /proc/sys/net/tcp_sack. -// -// +stateify savable -type tcpSackData struct { - kernfs.DynamicBytesFile - - stack inet.Stack `state:"wait"` - enabled *bool -} - -var _ vfs.WritableDynamicBytesSource = (*tcpSackData)(nil) - -// Generate implements vfs.DynamicBytesSource. -func (d *tcpSackData) Generate(ctx context.Context, buf *bytes.Buffer) error { - if d.enabled == nil { - sack, err := d.stack.TCPSACKEnabled() - if err != nil { - return err - } - d.enabled = &sack - } - - val := "0\n" - if *d.enabled { - // Technically, this is not quite compatible with Linux. Linux stores these - // as an integer, so if you write "2" into tcp_sack, you should get 2 back. - // Tough luck. - val = "1\n" - } - buf.WriteString(val) - return nil -} - -func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) { - if offset != 0 { - // No need to handle partial writes thus far. - return 0, syserror.EINVAL - } - if src.NumBytes() == 0 { - return 0, nil - } - - // Limit the amount of memory allocated. - src = src.TakeFirst(usermem.PageSize - 1) - - var v int32 - n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) - if err != nil { - return n, err - } - if d.enabled == nil { - d.enabled = new(bool) - } - *d.enabled = v != 0 - return n, d.stack.SetTCPSACKEnabled(*d.enabled) -} diff --git a/pkg/sentry/fsimpl/proc/tasks_sys_test.go b/pkg/sentry/fsimpl/proc/tasks_sys_test.go deleted file mode 100644 index be54897bb..000000000 --- a/pkg/sentry/fsimpl/proc/tasks_sys_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "bytes" - "reflect" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/sentry/contexttest" - "gvisor.dev/gvisor/pkg/sentry/inet" -) - -func newIPv6TestStack() *inet.TestStack { - s := inet.NewTestStack() - s.SupportsIPv6Flag = true - return s -} - -func TestIfinet6NoAddresses(t *testing.T) { - n := &ifinet6{stack: newIPv6TestStack()} - var buf bytes.Buffer - n.Generate(contexttest.Context(t), &buf) - if buf.Len() > 0 { - t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{}) - } -} - -func TestIfinet6(t *testing.T) { - s := newIPv6TestStack() - s.InterfacesMap[1] = inet.Interface{Name: "eth0"} - s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), - }, - } - s.InterfacesMap[2] = inet.Interface{Name: "eth1"} - s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{ - { - Family: linux.AF_INET6, - PrefixLen: 128, - Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"), - }, - } - want := map[string]struct{}{ - "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {}, - "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {}, - } - - n := &ifinet6{stack: s} - contents := n.contents() - if len(contents) != len(want) { - t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want)) - } - got := map[string]struct{}{} - for _, l := range contents { - got[l] = struct{}{} - } - - if !reflect.DeepEqual(got, want) { - t.Errorf("Got n.contents() = %v, want = %v", got, want) - } -} diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go deleted file mode 100644 index d0f97c137..000000000 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ /dev/null @@ -1,505 +0,0 @@ -// Copyright 2019 The gVisor Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package proc - -import ( - "fmt" - "math" - "path" - "strconv" - "testing" - - "gvisor.dev/gvisor/pkg/abi/linux" - "gvisor.dev/gvisor/pkg/context" - "gvisor.dev/gvisor/pkg/fspath" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil" - "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs" - "gvisor.dev/gvisor/pkg/sentry/kernel" - "gvisor.dev/gvisor/pkg/sentry/kernel/auth" - "gvisor.dev/gvisor/pkg/sentry/vfs" - "gvisor.dev/gvisor/pkg/syserror" - "gvisor.dev/gvisor/pkg/usermem" -) - -var ( - // Next offset 256 by convention. Adds 1 for the next offset. - selfLink = vfs.Dirent{Type: linux.DT_LNK, NextOff: 256 + 0 + 1} - threadSelfLink = vfs.Dirent{Type: linux.DT_LNK, NextOff: 256 + 1 + 1} - - // /proc/[pid] next offset starts at 256+2 (files above), then adds the - // PID, and adds 1 for the next offset. - proc1 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 1 + 1} - proc2 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 2 + 1} - proc3 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 3 + 1} -) - -var ( - tasksStaticFiles = map[string]testutil.DirentType{ - "cpuinfo": linux.DT_REG, - "filesystems": linux.DT_REG, - "loadavg": linux.DT_REG, - "meminfo": linux.DT_REG, - "mounts": linux.DT_LNK, - "net": linux.DT_LNK, - "self": linux.DT_LNK, - "stat": linux.DT_REG, - "sys": linux.DT_DIR, - "thread-self": linux.DT_LNK, - "uptime": linux.DT_REG, - "version": linux.DT_REG, - } - tasksStaticFilesNextOffs = map[string]int64{ - "self": selfLink.NextOff, - "thread-self": threadSelfLink.NextOff, - } - taskStaticFiles = map[string]testutil.DirentType{ - "auxv": linux.DT_REG, - "cgroup": linux.DT_REG, - "cmdline": linux.DT_REG, - "comm": linux.DT_REG, - "environ": linux.DT_REG, - "exe": linux.DT_LNK, - "fd": linux.DT_DIR, - "fdinfo": linux.DT_DIR, - "gid_map": linux.DT_REG, - "io": linux.DT_REG, - "maps": linux.DT_REG, - "mountinfo": linux.DT_REG, - "mounts": linux.DT_REG, - "net": linux.DT_DIR, - "ns": linux.DT_DIR, - "oom_score": linux.DT_REG, - "oom_score_adj": linux.DT_REG, - "smaps": linux.DT_REG, - "stat": linux.DT_REG, - "statm": linux.DT_REG, - "status": linux.DT_REG, - "task": linux.DT_DIR, - "uid_map": linux.DT_REG, - } -) - -func setup(t *testing.T) *testutil.System { - k, err := testutil.Boot() - if err != nil { - t.Fatalf("Error creating kernel: %v", err) - } - - ctx := k.SupervisorContext() - creds := auth.CredentialsFromContext(ctx) - - k.VFS().MustRegisterFilesystemType(Name, &FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{ - AllowUserMount: true, - }) - - mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", tmpfs.Name, &vfs.GetFilesystemOptions{}) - if err != nil { - t.Fatalf("NewMountNamespace(): %v", err) - } - pop := &vfs.PathOperation{ - Root: mntns.Root(), - Start: mntns.Root(), - Path: fspath.Parse("/proc"), - } - if err := k.VFS().MkdirAt(ctx, creds, pop, &vfs.MkdirOptions{Mode: 0777}); err != nil { - t.Fatalf("MkDir(/proc): %v", err) - } - - pop = &vfs.PathOperation{ - Root: mntns.Root(), - Start: mntns.Root(), - Path: fspath.Parse("/proc"), - } - mntOpts := &vfs.MountOptions{ - GetFilesystemOptions: vfs.GetFilesystemOptions{ - InternalData: &InternalData{ - Cgroups: map[string]string{ - "cpuset": "/foo/cpuset", - "memory": "/foo/memory", - }, - }, - }, - } - if err := k.VFS().MountAt(ctx, creds, "", pop, Name, mntOpts); err != nil { - t.Fatalf("MountAt(/proc): %v", err) - } - return testutil.NewSystem(ctx, t, k.VFS(), mntns) -} - -func TestTasksEmpty(t *testing.T) { - s := setup(t) - defer s.Destroy() - - collector := s.ListDirents(s.PathOpAtRoot("/proc")) - s.AssertAllDirentTypes(collector, tasksStaticFiles) - s.AssertDirentOffsets(collector, tasksStaticFilesNextOffs) -} - -func TestTasks(t *testing.T) { - s := setup(t) - defer s.Destroy() - - expectedDirents := make(map[string]testutil.DirentType) - for n, d := range tasksStaticFiles { - expectedDirents[n] = d - } - - k := kernel.KernelFromContext(s.Ctx) - var tasks []*kernel.Task - for i := 0; i < 5; i++ { - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatalf("CreateTask(): %v", err) - } - tasks = append(tasks, task) - expectedDirents[fmt.Sprintf("%d", i+1)] = linux.DT_DIR - } - - collector := s.ListDirents(s.PathOpAtRoot("/proc")) - s.AssertAllDirentTypes(collector, expectedDirents) - s.AssertDirentOffsets(collector, tasksStaticFilesNextOffs) - - lastPid := 0 - dirents := collector.OrderedDirents() - doneSkippingNonTaskDirs := false - for _, d := range dirents { - pid, err := strconv.Atoi(d.Name) - if err != nil { - if !doneSkippingNonTaskDirs { - // We haven't gotten to the task dirs yet. - continue - } - t.Fatalf("Invalid process directory %q", d.Name) - } - doneSkippingNonTaskDirs = true - if lastPid > pid { - t.Errorf("pids not in order: %v", dirents) - } - found := false - for _, t := range tasks { - if k.TaskSet().Root.IDOfTask(t) == kernel.ThreadID(pid) { - found = true - } - } - if !found { - t.Errorf("Additional task ID %d listed: %v", pid, tasks) - } - // Next offset starts at 256+2 ('self' and 'thread-self'), then adds the - // PID, and adds 1 for the next offset. - if want := int64(256 + 2 + pid + 1); d.NextOff != want { - t.Errorf("Wrong dirent offset want: %d got: %d: %+v", want, d.NextOff, d) - } - } - if !doneSkippingNonTaskDirs { - t.Fatalf("Never found any process directories.") - } - - // Test lookup. - for _, path := range []string{"/proc/1", "/proc/2"} { - fd, err := s.VFS.OpenAt( - s.Ctx, - s.Creds, - s.PathOpAtRoot(path), - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt(%q) failed: %v", path, err) - } - defer fd.DecRef() - buf := make([]byte, 1) - bufIOSeq := usermem.BytesIOSequence(buf) - if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); err != syserror.EISDIR { - t.Errorf("wrong error reading directory: %v", err) - } - } - - if _, err := s.VFS.OpenAt( - s.Ctx, - s.Creds, - s.PathOpAtRoot("/proc/9999"), - &vfs.OpenOptions{}, - ); err != syserror.ENOENT { - t.Fatalf("wrong error from vfsfs.OpenAt(/proc/9999): %v", err) - } -} - -func TestTasksOffset(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - for i := 0; i < 3; i++ { - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - if _, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root); err != nil { - t.Fatalf("CreateTask(): %v", err) - } - } - - for _, tc := range []struct { - name string - offset int64 - wants map[string]vfs.Dirent - }{ - { - name: "small offset", - offset: 100, - wants: map[string]vfs.Dirent{ - "self": selfLink, - "thread-self": threadSelfLink, - "1": proc1, - "2": proc2, - "3": proc3, - }, - }, - { - name: "offset at start", - offset: 256, - wants: map[string]vfs.Dirent{ - "self": selfLink, - "thread-self": threadSelfLink, - "1": proc1, - "2": proc2, - "3": proc3, - }, - }, - { - name: "skip /proc/self", - offset: 257, - wants: map[string]vfs.Dirent{ - "thread-self": threadSelfLink, - "1": proc1, - "2": proc2, - "3": proc3, - }, - }, - { - name: "skip symlinks", - offset: 258, - wants: map[string]vfs.Dirent{ - "1": proc1, - "2": proc2, - "3": proc3, - }, - }, - { - name: "skip first process", - offset: 260, - wants: map[string]vfs.Dirent{ - "2": proc2, - "3": proc3, - }, - }, - { - name: "last process", - offset: 261, - wants: map[string]vfs.Dirent{ - "3": proc3, - }, - }, - { - name: "after last", - offset: 262, - wants: nil, - }, - { - name: "TaskLimit+1", - offset: kernel.TasksLimit + 1, - wants: nil, - }, - { - name: "max", - offset: math.MaxInt64, - wants: nil, - }, - } { - t.Run(tc.name, func(t *testing.T) { - s := s.WithSubtest(t) - fd, err := s.VFS.OpenAt( - s.Ctx, - s.Creds, - s.PathOpAtRoot("/proc"), - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt(/) failed: %v", err) - } - defer fd.DecRef() - if _, err := fd.Seek(s.Ctx, tc.offset, linux.SEEK_SET); err != nil { - t.Fatalf("Seek(%d, SEEK_SET): %v", tc.offset, err) - } - - var collector testutil.DirentCollector - if err := fd.IterDirents(s.Ctx, &collector); err != nil { - t.Fatalf("IterDirent(): %v", err) - } - - expectedTypes := make(map[string]testutil.DirentType) - expectedOffsets := make(map[string]int64) - for name, want := range tc.wants { - expectedTypes[name] = want.Type - if want.NextOff != 0 { - expectedOffsets[name] = want.NextOff - } - } - - collector.SkipDotsChecks(true) // We seek()ed past the dots. - s.AssertAllDirentTypes(&collector, expectedTypes) - s.AssertDirentOffsets(&collector, expectedOffsets) - }) - } -} - -func TestTask(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - _, err := testutil.CreateTask(s.Ctx, "name", tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatalf("CreateTask(): %v", err) - } - - collector := s.ListDirents(s.PathOpAtRoot("/proc/1")) - s.AssertAllDirentTypes(collector, taskStaticFiles) -} - -func TestProcSelf(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, "name", tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatalf("CreateTask(): %v", err) - } - - collector := s.WithTemporaryContext(task).ListDirents(&vfs.PathOperation{ - Root: s.Root, - Start: s.Root, - Path: fspath.Parse("/proc/self/"), - FollowFinalSymlink: true, - }) - s.AssertAllDirentTypes(collector, taskStaticFiles) -} - -func iterateDir(ctx context.Context, t *testing.T, s *testutil.System, fd *vfs.FileDescription) { - t.Logf("Iterating: %s", fd.MappedName(ctx)) - - var collector testutil.DirentCollector - if err := fd.IterDirents(ctx, &collector); err != nil { - t.Fatalf("IterDirents(): %v", err) - } - if err := collector.Contains(".", linux.DT_DIR); err != nil { - t.Error(err.Error()) - } - if err := collector.Contains("..", linux.DT_DIR); err != nil { - t.Error(err.Error()) - } - - for _, d := range collector.Dirents() { - if d.Name == "." || d.Name == ".." { - continue - } - childPath := path.Join(fd.MappedName(ctx), d.Name) - if d.Type == linux.DT_LNK { - link, err := s.VFS.ReadlinkAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse(childPath)}, - ) - if err != nil { - t.Errorf("vfsfs.ReadlinkAt(%v) failed: %v", childPath, err) - } else { - t.Logf("Skipping symlink: /proc%s => %s", childPath, link) - } - continue - } - - t.Logf("Opening: /proc%s", childPath) - child, err := s.VFS.OpenAt( - ctx, - auth.CredentialsFromContext(ctx), - &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse(childPath)}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Errorf("vfsfs.OpenAt(%v) failed: %v", childPath, err) - continue - } - defer child.DecRef() - stat, err := child.Stat(ctx, vfs.StatOptions{}) - if err != nil { - t.Errorf("Stat(%v) failed: %v", childPath, err) - } - if got := linux.FileMode(stat.Mode).DirentType(); got != d.Type { - t.Errorf("wrong file mode, stat: %v, dirent: %v", got, d.Type) - } - if d.Type == linux.DT_DIR { - // Found another dir, let's do it again! - iterateDir(ctx, t, s, child) - } - } -} - -// TestTree iterates all directories and stats every file. -func TestTree(t *testing.T) { - s := setup(t) - defer s.Destroy() - - k := kernel.KernelFromContext(s.Ctx) - - pop := &vfs.PathOperation{ - Root: s.Root, - Start: s.Root, - Path: fspath.Parse("test-file"), - } - opts := &vfs.OpenOptions{ - Flags: linux.O_RDONLY | linux.O_CREAT, - Mode: 0777, - } - file, err := s.VFS.OpenAt(s.Ctx, s.Creds, pop, opts) - if err != nil { - t.Fatalf("failed to create test file: %v", err) - } - defer file.DecRef() - - var tasks []*kernel.Task - for i := 0; i < 5; i++ { - tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits()) - task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root) - if err != nil { - t.Fatalf("CreateTask(): %v", err) - } - // Add file to populate /proc/[pid]/fd and fdinfo directories. - task.FDTable().NewFDVFS2(task, 0, file, kernel.FDFlags{}) - tasks = append(tasks, task) - } - - ctx := tasks[0] - fd, err := s.VFS.OpenAt( - ctx, - auth.CredentialsFromContext(s.Ctx), - &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse("/proc")}, - &vfs.OpenOptions{}, - ) - if err != nil { - t.Fatalf("vfsfs.OpenAt(/proc) failed: %v", err) - } - iterateDir(ctx, t, s, fd) - fd.DecRef() -} |