summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/proc
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/proc')
-rw-r--r--pkg/sentry/fsimpl/proc/BUILD67
-rw-r--r--pkg/sentry/fsimpl/proc/filesystem.go95
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go135
-rw-r--r--pkg/sentry/fsimpl/proc/task.go257
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go287
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go821
-rw-r--r--pkg/sentry/fsimpl/proc/task_net.go790
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go257
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_files.go370
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go211
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys_test.go78
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_test.go505
12 files changed, 0 insertions, 3873 deletions
diff --git a/pkg/sentry/fsimpl/proc/BUILD b/pkg/sentry/fsimpl/proc/BUILD
deleted file mode 100644
index 8156984eb..000000000
--- a/pkg/sentry/fsimpl/proc/BUILD
+++ /dev/null
@@ -1,67 +0,0 @@
-load("//tools:defs.bzl", "go_library", "go_test")
-
-licenses(["notice"])
-
-go_library(
- name = "proc",
- srcs = [
- "filesystem.go",
- "subtasks.go",
- "task.go",
- "task_fds.go",
- "task_files.go",
- "task_net.go",
- "tasks.go",
- "tasks_files.go",
- "tasks_sys.go",
- ],
- visibility = ["//pkg/sentry:internal"],
- deps = [
- "//pkg/abi/linux",
- "//pkg/context",
- "//pkg/log",
- "//pkg/refs",
- "//pkg/safemem",
- "//pkg/sentry/fs",
- "//pkg/sentry/fsbridge",
- "//pkg/sentry/fsimpl/kernfs",
- "//pkg/sentry/inet",
- "//pkg/sentry/kernel",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/kernel/time",
- "//pkg/sentry/limits",
- "//pkg/sentry/mm",
- "//pkg/sentry/socket",
- "//pkg/sentry/socket/unix",
- "//pkg/sentry/socket/unix/transport",
- "//pkg/sentry/usage",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- "//pkg/tcpip/header",
- "//pkg/usermem",
- ],
-)
-
-go_test(
- name = "proc_test",
- size = "small",
- srcs = [
- "tasks_sys_test.go",
- "tasks_test.go",
- ],
- library = ":proc",
- deps = [
- "//pkg/abi/linux",
- "//pkg/context",
- "//pkg/fspath",
- "//pkg/sentry/contexttest",
- "//pkg/sentry/fsimpl/testutil",
- "//pkg/sentry/fsimpl/tmpfs",
- "//pkg/sentry/inet",
- "//pkg/sentry/kernel",
- "//pkg/sentry/kernel/auth",
- "//pkg/sentry/vfs",
- "//pkg/syserror",
- "//pkg/usermem",
- ],
-)
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
deleted file mode 100644
index 5c19d5522..000000000
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package proc implements a partial in-memory file system for procfs.
-package proc
-
-import (
- "fmt"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
-)
-
-// Name is the default filesystem name.
-const Name = "proc"
-
-// FilesystemType is the factory class for procfs.
-//
-// +stateify savable
-type FilesystemType struct{}
-
-var _ vfs.FilesystemType = (*FilesystemType)(nil)
-
-// GetFilesystem implements vfs.FilesystemType.
-func (ft *FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
- k := kernel.KernelFromContext(ctx)
- if k == nil {
- return nil, nil, fmt.Errorf("procfs requires a kernel")
- }
- pidns := kernel.PIDNamespaceFromContext(ctx)
- if pidns == nil {
- return nil, nil, fmt.Errorf("procfs requires a PID namespace")
- }
-
- procfs := &kernfs.Filesystem{}
- procfs.VFSFilesystem().Init(vfsObj, procfs)
-
- var cgroups map[string]string
- if opts.InternalData != nil {
- data := opts.InternalData.(*InternalData)
- cgroups = data.Cgroups
- }
-
- _, dentry := newTasksInode(procfs, k, pidns, cgroups)
- return procfs.VFSFilesystem(), dentry.VFSDentry(), nil
-}
-
-// dynamicInode is an overfitted interface for common Inodes with
-// dynamicByteSource types used in procfs.
-type dynamicInode interface {
- kernfs.Inode
- vfs.DynamicBytesSource
-
- Init(creds *auth.Credentials, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode)
-}
-
-func newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
- inode.Init(creds, ino, inode, perm)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
-}
-
-type staticFile struct {
- kernfs.DynamicBytesFile
- vfs.StaticData
-}
-
-var _ dynamicInode = (*staticFile)(nil)
-
-func newStaticFile(data string) *staticFile {
- return &staticFile{StaticData: vfs.StaticData{Data: data}}
-}
-
-// InternalData contains internal data passed in to the procfs mount via
-// vfs.GetFilesystemOptions.InternalData.
-type InternalData struct {
- Cgroups map[string]string
-}
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
deleted file mode 100644
index a21313666..000000000
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ /dev/null
@@ -1,135 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "sort"
- "strconv"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// subtasksInode represents the inode for /proc/[pid]/task/ directory.
-//
-// +stateify savable
-type subtasksInode struct {
- kernfs.InodeNotSymlink
- kernfs.InodeDirectoryNoNewChildren
- kernfs.InodeAttrs
- kernfs.OrderedChildren
- kernfs.AlwaysValid
-
- task *kernel.Task
- pidns *kernel.PIDNamespace
- inoGen InoGenerator
- cgroupControllers map[string]string
-}
-
-var _ kernfs.Inode = (*subtasksInode)(nil)
-
-func newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, inoGen InoGenerator, cgroupControllers map[string]string) *kernfs.Dentry {
- subInode := &subtasksInode{
- task: task,
- pidns: pidns,
- inoGen: inoGen,
- cgroupControllers: cgroupControllers,
- }
- // Note: credentials are overridden by taskOwnedInode.
- subInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
- subInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
-
- inode := &taskOwnedInode{Inode: subInode, owner: task}
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
-
- return dentry
-}
-
-// Lookup implements kernfs.inodeDynamicLookup.
-func (i *subtasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
- tid, err := strconv.ParseUint(name, 10, 32)
- if err != nil {
- return nil, syserror.ENOENT
- }
-
- subTask := i.pidns.TaskWithID(kernel.ThreadID(tid))
- if subTask == nil {
- return nil, syserror.ENOENT
- }
- if subTask.ThreadGroup() != i.task.ThreadGroup() {
- return nil, syserror.ENOENT
- }
-
- subTaskDentry := newTaskInode(i.inoGen, subTask, i.pidns, false, i.cgroupControllers)
- return subTaskDentry.VFSDentry(), nil
-}
-
-// IterDirents implements kernfs.inodeDynamicLookup.
-func (i *subtasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
- tasks := i.task.ThreadGroup().MemberIDs(i.pidns)
- if len(tasks) == 0 {
- return offset, syserror.ENOENT
- }
-
- tids := make([]int, 0, len(tasks))
- for _, tid := range tasks {
- tids = append(tids, int(tid))
- }
-
- sort.Ints(tids)
- for _, tid := range tids[relOffset:] {
- dirent := vfs.Dirent{
- Name: strconv.FormatUint(uint64(tid), 10),
- Type: linux.DT_DIR,
- Ino: i.inoGen.NextIno(),
- NextOff: offset + 1,
- }
- if err := cb.Handle(dirent); err != nil {
- return offset, err
- }
- offset++
- }
- return offset, nil
-}
-
-// Open implements kernfs.Inode.
-func (i *subtasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- fd := &kernfs.GenericDirectoryFD{}
- fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
- return fd.VFSFileDescription(), nil
-}
-
-// Stat implements kernfs.Inode.
-func (i *subtasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- stat, err := i.InodeAttrs.Stat(vsfs, opts)
- if err != nil {
- return linux.Statx{}, err
- }
- if opts.Mask&linux.STATX_NLINK != 0 {
- stat.Nlink += uint32(i.task.ThreadGroup().Count())
- }
- return stat, nil
-}
-
-// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
-func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
-}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
deleted file mode 100644
index 49d6efb0e..000000000
--- a/pkg/sentry/fsimpl/proc/task.go
+++ /dev/null
@@ -1,257 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-// taskInode represents the inode for /proc/PID/ directory.
-//
-// +stateify savable
-type taskInode struct {
- kernfs.InodeNotSymlink
- kernfs.InodeDirectoryNoNewChildren
- kernfs.InodeNoDynamicLookup
- kernfs.InodeAttrs
- kernfs.OrderedChildren
-
- task *kernel.Task
-}
-
-var _ kernfs.Inode = (*taskInode)(nil)
-
-func newTaskInode(inoGen InoGenerator, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) *kernfs.Dentry {
- contents := map[string]*kernfs.Dentry{
- "auxv": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &auxvData{task: task}),
- "cmdline": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}),
- "comm": newComm(task, inoGen.NextIno(), 0444),
- "environ": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}),
- "exe": newExeSymlink(task, inoGen.NextIno()),
- "fd": newFDDirInode(task, inoGen),
- "fdinfo": newFDInfoDirInode(task, inoGen),
- "gid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: true}),
- "io": newTaskOwnedFile(task, inoGen.NextIno(), 0400, newIO(task, isThreadGroup)),
- "maps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mapsData{task: task}),
- "mountinfo": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mountInfoData{task: task}),
- "mounts": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &mountsData{task: task}),
- "net": newTaskNetDir(task, inoGen),
- "ns": newTaskOwnedDir(task, inoGen.NextIno(), 0511, map[string]*kernfs.Dentry{
- "net": newNamespaceSymlink(task, inoGen.NextIno(), "net"),
- "pid": newNamespaceSymlink(task, inoGen.NextIno(), "pid"),
- "user": newNamespaceSymlink(task, inoGen.NextIno(), "user"),
- }),
- "oom_score": newTaskOwnedFile(task, inoGen.NextIno(), 0444, newStaticFile("0\n")),
- "oom_score_adj": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &oomScoreAdj{task: task}),
- "smaps": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &smapsData{task: task}),
- "stat": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
- "statm": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statmData{task: task}),
- "status": newTaskOwnedFile(task, inoGen.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
- "uid_map": newTaskOwnedFile(task, inoGen.NextIno(), 0644, &idMapData{task: task, gids: false}),
- }
- if isThreadGroup {
- contents["task"] = newSubtasks(task, pidns, inoGen, cgroupControllers)
- }
- if len(cgroupControllers) > 0 {
- contents["cgroup"] = newTaskOwnedFile(task, inoGen.NextIno(), 0444, newCgroupData(cgroupControllers))
- }
-
- taskInode := &taskInode{task: task}
- // Note: credentials are overridden by taskOwnedInode.
- taskInode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
-
- inode := &taskOwnedInode{Inode: taskInode, owner: task}
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
-
- taskInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- links := taskInode.OrderedChildren.Populate(dentry, contents)
- taskInode.IncLinks(links)
-
- return dentry
-}
-
-// Valid implements kernfs.inodeDynamicLookup. This inode remains valid as long
-// as the task is still running. When it's dead, another tasks with the same
-// PID could replace it.
-func (i *taskInode) Valid(ctx context.Context) bool {
- return i.task.ExitState() != kernel.TaskExitDead
-}
-
-// Open implements kernfs.Inode.
-func (i *taskInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- fd := &kernfs.GenericDirectoryFD{}
- fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
- return fd.VFSFileDescription(), nil
-}
-
-// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
-func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
-}
-
-// taskOwnedInode implements kernfs.Inode and overrides inode owner with task
-// effective user and group.
-type taskOwnedInode struct {
- kernfs.Inode
-
- // owner is the task that owns this inode.
- owner *kernel.Task
-}
-
-var _ kernfs.Inode = (*taskOwnedInode)(nil)
-
-func newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
- // Note: credentials are overridden by taskOwnedInode.
- inode.Init(task.Credentials(), ino, inode, perm)
-
- taskInode := &taskOwnedInode{Inode: inode, owner: task}
- d := &kernfs.Dentry{}
- d.Init(taskInode)
- return d
-}
-
-func newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry {
- dir := &kernfs.StaticDirectory{}
-
- // Note: credentials are overridden by taskOwnedInode.
- dir.Init(task.Credentials(), ino, perm)
-
- inode := &taskOwnedInode{Inode: dir, owner: task}
- d := &kernfs.Dentry{}
- d.Init(inode)
-
- dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- links := dir.OrderedChildren.Populate(d, children)
- dir.IncLinks(links)
-
- return d
-}
-
-// Stat implements kernfs.Inode.
-func (i *taskOwnedInode) Stat(fs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- stat, err := i.Inode.Stat(fs, opts)
- if err != nil {
- return linux.Statx{}, err
- }
- if opts.Mask&(linux.STATX_UID|linux.STATX_GID) != 0 {
- uid, gid := i.getOwner(linux.FileMode(stat.Mode))
- if opts.Mask&linux.STATX_UID != 0 {
- stat.UID = uint32(uid)
- }
- if opts.Mask&linux.STATX_GID != 0 {
- stat.GID = uint32(gid)
- }
- }
- return stat, nil
-}
-
-// CheckPermissions implements kernfs.Inode.
-func (i *taskOwnedInode) CheckPermissions(_ context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
- mode := i.Mode()
- uid, gid := i.getOwner(mode)
- return vfs.GenericCheckPermissions(
- creds,
- ats,
- mode.FileType() == linux.ModeDirectory,
- uint16(mode),
- uid,
- gid,
- )
-}
-
-func (i *taskOwnedInode) getOwner(mode linux.FileMode) (auth.KUID, auth.KGID) {
- // By default, set the task owner as the file owner.
- creds := i.owner.Credentials()
- uid := creds.EffectiveKUID
- gid := creds.EffectiveKGID
-
- // Linux doesn't apply dumpability adjustments to world readable/executable
- // directories so that applications can stat /proc/PID to determine the
- // effective UID of a process. See fs/proc/base.c:task_dump_owner.
- if mode.FileType() == linux.ModeDirectory && mode.Permissions() == 0555 {
- return uid, gid
- }
-
- // If the task is not dumpable, then root (in the namespace preferred)
- // owns the file.
- m := getMM(i.owner)
- if m == nil {
- return auth.RootKUID, auth.RootKGID
- }
- if m.Dumpability() != mm.UserDumpable {
- uid = auth.RootKUID
- if kuid := creds.UserNamespace.MapToKUID(auth.RootUID); kuid.Ok() {
- uid = kuid
- }
- gid = auth.RootKGID
- if kgid := creds.UserNamespace.MapToKGID(auth.RootGID); kgid.Ok() {
- gid = kgid
- }
- }
- return uid, gid
-}
-
-func newIO(t *kernel.Task, isThreadGroup bool) *ioData {
- if isThreadGroup {
- return &ioData{ioUsage: t.ThreadGroup()}
- }
- return &ioData{ioUsage: t}
-}
-
-func newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry {
- // Namespace symlinks should contain the namespace name and the inode number
- // for the namespace instance, so for example user:[123456]. We currently fake
- // the inode number by sticking the symlink inode in its place.
- target := fmt.Sprintf("%s:[%d]", ns, ino)
-
- inode := &kernfs.StaticSymlink{}
- // Note: credentials are overridden by taskOwnedInode.
- inode.Init(task.Credentials(), ino, target)
-
- taskInode := &taskOwnedInode{Inode: inode, owner: task}
- d := &kernfs.Dentry{}
- d.Init(taskInode)
- return d
-}
-
-// newCgroupData creates inode that shows cgroup information.
-// From man 7 cgroups: "For each cgroup hierarchy of which the process is a
-// member, there is one entry containing three colon-separated fields:
-// hierarchy-ID:controller-list:cgroup-path"
-func newCgroupData(controllers map[string]string) dynamicInode {
- var buf bytes.Buffer
-
- // The hierarchy ids must be positive integers (for cgroup v1), but the
- // exact number does not matter, so long as they are unique. We can
- // just use a counter, but since linux sorts this file in descending
- // order, we must count down to preserve this behavior.
- i := len(controllers)
- for name, dir := range controllers {
- fmt.Fprintf(&buf, "%d:%s:%s\n", i, name, dir)
- i--
- }
- return newStaticFile(buf.String())
-}
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
deleted file mode 100644
index 76bfc5307..000000000
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ /dev/null
@@ -1,287 +0,0 @@
-// Copyright 2020 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
- "sort"
- "strconv"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/refs"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-type fdDir struct {
- inoGen InoGenerator
- task *kernel.Task
-
- // When produceSymlinks is set, dirents produces for the FDs are reported
- // as symlink. Otherwise, they are reported as regular files.
- produceSymlink bool
-}
-
-func (i *fdDir) lookup(name string) (*vfs.FileDescription, kernel.FDFlags, error) {
- fd, err := strconv.ParseUint(name, 10, 64)
- if err != nil {
- return nil, kernel.FDFlags{}, syserror.ENOENT
- }
-
- var (
- file *vfs.FileDescription
- flags kernel.FDFlags
- )
- i.task.WithMuLocked(func(t *kernel.Task) {
- if fdTable := t.FDTable(); fdTable != nil {
- file, flags = fdTable.GetVFS2(int32(fd))
- }
- })
- if file == nil {
- return nil, kernel.FDFlags{}, syserror.ENOENT
- }
- return file, flags, nil
-}
-
-// IterDirents implements kernfs.inodeDynamicLookup.
-func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, absOffset, relOffset int64) (int64, error) {
- var fds []int32
- i.task.WithMuLocked(func(t *kernel.Task) {
- if fdTable := t.FDTable(); fdTable != nil {
- fds = fdTable.GetFDs()
- }
- })
-
- offset := absOffset + relOffset
- typ := uint8(linux.DT_REG)
- if i.produceSymlink {
- typ = linux.DT_LNK
- }
-
- // Find the appropriate starting point.
- idx := sort.Search(len(fds), func(i int) bool { return fds[i] >= int32(relOffset) })
- if idx >= len(fds) {
- return offset, nil
- }
- for _, fd := range fds[idx:] {
- dirent := vfs.Dirent{
- Name: strconv.FormatUint(uint64(fd), 10),
- Type: typ,
- Ino: i.inoGen.NextIno(),
- NextOff: offset + 1,
- }
- if err := cb.Handle(dirent); err != nil {
- return offset, err
- }
- offset++
- }
- return offset, nil
-}
-
-// fdDirInode represents the inode for /proc/[pid]/fd directory.
-//
-// +stateify savable
-type fdDirInode struct {
- kernfs.InodeNotSymlink
- kernfs.InodeDirectoryNoNewChildren
- kernfs.InodeAttrs
- kernfs.OrderedChildren
- kernfs.AlwaysValid
- fdDir
-}
-
-var _ kernfs.Inode = (*fdDirInode)(nil)
-
-func newFDDirInode(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
- inode := &fdDirInode{
- fdDir: fdDir{
- inoGen: inoGen,
- task: task,
- produceSymlink: true,
- },
- }
- inode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
-
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
- inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
-
- return dentry
-}
-
-// Lookup implements kernfs.inodeDynamicLookup.
-func (i *fdDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
- file, _, err := i.lookup(name)
- if err != nil {
- return nil, err
- }
- taskDentry := newFDSymlink(i.task.Credentials(), file, i.inoGen.NextIno())
- return taskDentry.VFSDentry(), nil
-}
-
-// Open implements kernfs.Inode.
-func (i *fdDirInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- fd := &kernfs.GenericDirectoryFD{}
- fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
- return fd.VFSFileDescription(), nil
-}
-
-// CheckPermissions implements kernfs.Inode.
-//
-// This is to match Linux, which uses a special permission handler to guarantee
-// that a process can still access /proc/self/fd after it has executed
-// setuid. See fs/proc/fd.c:proc_fd_permission.
-func (i *fdDirInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
- err := i.InodeAttrs.CheckPermissions(ctx, creds, ats)
- if err == nil {
- // Access granted, no extra check needed.
- return nil
- }
- if t := kernel.TaskFromContext(ctx); t != nil {
- // Allow access if the task trying to access it is in the thread group
- // corresponding to this directory.
- if i.task.ThreadGroup() == t.ThreadGroup() {
- // Access granted (overridden).
- return nil
- }
- }
- return err
-}
-
-// fdSymlink is an symlink for the /proc/[pid]/fd/[fd] file.
-//
-// +stateify savable
-type fdSymlink struct {
- refs.AtomicRefCount
- kernfs.InodeAttrs
- kernfs.InodeSymlink
-
- file *vfs.FileDescription
-}
-
-var _ kernfs.Inode = (*fdSymlink)(nil)
-
-func newFDSymlink(creds *auth.Credentials, file *vfs.FileDescription, ino uint64) *kernfs.Dentry {
- file.IncRef()
- inode := &fdSymlink{file: file}
- inode.Init(creds, ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
-}
-
-func (s *fdSymlink) Readlink(ctx context.Context) (string, error) {
- root := vfs.RootFromContext(ctx)
- defer root.DecRef()
-
- vfsObj := s.file.VirtualDentry().Mount().Filesystem().VirtualFilesystem()
- return vfsObj.PathnameWithDeleted(ctx, root, s.file.VirtualDentry())
-}
-
-func (s *fdSymlink) DecRef() {
- s.AtomicRefCount.DecRefWithDestructor(func() {
- s.Destroy()
- })
-}
-
-func (s *fdSymlink) Destroy() {
- s.file.DecRef()
-}
-
-// fdInfoDirInode represents the inode for /proc/[pid]/fdinfo directory.
-//
-// +stateify savable
-type fdInfoDirInode struct {
- kernfs.InodeNotSymlink
- kernfs.InodeDirectoryNoNewChildren
- kernfs.InodeAttrs
- kernfs.OrderedChildren
- kernfs.AlwaysValid
- fdDir
-}
-
-var _ kernfs.Inode = (*fdInfoDirInode)(nil)
-
-func newFDInfoDirInode(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
- inode := &fdInfoDirInode{
- fdDir: fdDir{
- inoGen: inoGen,
- task: task,
- },
- }
- inode.InodeAttrs.Init(task.Credentials(), inoGen.NextIno(), linux.ModeDirectory|0555)
-
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
- inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
-
- return dentry
-}
-
-// Lookup implements kernfs.inodeDynamicLookup.
-func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
- file, flags, err := i.lookup(name)
- if err != nil {
- return nil, err
- }
-
- data := &fdInfoData{file: file, flags: flags}
- dentry := newTaskOwnedFile(i.task, i.inoGen.NextIno(), 0444, data)
- return dentry.VFSDentry(), nil
-}
-
-// Open implements kernfs.Inode.
-func (i *fdInfoDirInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- fd := &kernfs.GenericDirectoryFD{}
- fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
- return fd.VFSFileDescription(), nil
-}
-
-// fdInfoData implements vfs.DynamicBytesSource for /proc/[pid]/fdinfo/[fd].
-//
-// +stateify savable
-type fdInfoData struct {
- kernfs.DynamicBytesFile
- refs.AtomicRefCount
-
- file *vfs.FileDescription
- flags kernel.FDFlags
-}
-
-var _ dynamicInode = (*fdInfoData)(nil)
-
-func (d *fdInfoData) DecRef() {
- d.AtomicRefCount.DecRefWithDestructor(d.destroy)
-}
-
-func (d *fdInfoData) destroy() {
- d.file.DecRef()
-}
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- // TODO(b/121266871): Include pos, locks, and other data. For now we only
- // have flags.
- // See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
- flags := uint(d.file.StatusFlags()) | d.flags.ToLinuxFileFlags()
- fmt.Fprintf(buf, "flags:\t0%o\n", flags)
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
deleted file mode 100644
index 8c743df8d..000000000
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ /dev/null
@@ -1,821 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
- "io"
- "sort"
- "strings"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/safemem"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/fsbridge"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/limits"
- "gvisor.dev/gvisor/pkg/sentry/mm"
- "gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// mm gets the kernel task's MemoryManager. No additional reference is taken on
-// mm here. This is safe because MemoryManager.destroy is required to leave the
-// MemoryManager in a state where it's still usable as a DynamicBytesSource.
-func getMM(task *kernel.Task) *mm.MemoryManager {
- var tmm *mm.MemoryManager
- task.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- tmm = mm
- }
- })
- return tmm
-}
-
-// getMMIncRef returns t's MemoryManager. If getMMIncRef succeeds, the
-// MemoryManager's users count is incremented, and must be decremented by the
-// caller when it is no longer in use.
-func getMMIncRef(task *kernel.Task) (*mm.MemoryManager, error) {
- if task.ExitState() == kernel.TaskExitDead {
- return nil, syserror.ESRCH
- }
- var m *mm.MemoryManager
- task.WithMuLocked(func(t *kernel.Task) {
- m = t.MemoryManager()
- })
- if m == nil || !m.IncUsers() {
- return nil, io.EOF
- }
- return m, nil
-}
-
-type bufferWriter struct {
- buf *bytes.Buffer
-}
-
-// WriteFromBlocks writes up to srcs.NumBytes() bytes from srcs and returns
-// the number of bytes written. It may return a partial write without an
-// error (i.e. (n, nil) where 0 < n < srcs.NumBytes()). It should not
-// return a full write with an error (i.e. srcs.NumBytes(), err) where err
-// != nil).
-func (w *bufferWriter) WriteFromBlocks(srcs safemem.BlockSeq) (uint64, error) {
- written := srcs.NumBytes()
- for !srcs.IsEmpty() {
- w.buf.Write(srcs.Head().ToSlice())
- srcs = srcs.Tail()
- }
- return written, nil
-}
-
-// auxvData implements vfs.DynamicBytesSource for /proc/[pid]/auxv.
-//
-// +stateify savable
-type auxvData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-var _ dynamicInode = (*auxvData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *auxvData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- m, err := getMMIncRef(d.task)
- if err != nil {
- return err
- }
- defer m.DecUsers(ctx)
-
- // Space for buffer with AT_NULL (0) terminator at the end.
- auxv := m.Auxv()
- buf.Grow((len(auxv) + 1) * 16)
- for _, e := range auxv {
- var tmp [8]byte
- usermem.ByteOrder.PutUint64(tmp[:], e.Key)
- buf.Write(tmp[:])
-
- usermem.ByteOrder.PutUint64(tmp[:], uint64(e.Value))
- buf.Write(tmp[:])
- }
- return nil
-}
-
-// execArgType enumerates the types of exec arguments that are exposed through
-// proc.
-type execArgType int
-
-const (
- cmdlineDataArg execArgType = iota
- environDataArg
-)
-
-// cmdlineData implements vfs.DynamicBytesSource for /proc/[pid]/cmdline.
-//
-// +stateify savable
-type cmdlineData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-
- // arg is the type of exec argument this file contains.
- arg execArgType
-}
-
-var _ dynamicInode = (*cmdlineData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *cmdlineData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- m, err := getMMIncRef(d.task)
- if err != nil {
- return err
- }
- defer m.DecUsers(ctx)
-
- // Figure out the bounds of the exec arg we are trying to read.
- var ar usermem.AddrRange
- switch d.arg {
- case cmdlineDataArg:
- ar = usermem.AddrRange{
- Start: m.ArgvStart(),
- End: m.ArgvEnd(),
- }
- case environDataArg:
- ar = usermem.AddrRange{
- Start: m.EnvvStart(),
- End: m.EnvvEnd(),
- }
- default:
- panic(fmt.Sprintf("unknown exec arg type %v", d.arg))
- }
- if ar.Start == 0 || ar.End == 0 {
- // Don't attempt to read before the start/end are set up.
- return io.EOF
- }
-
- // N.B. Technically this should be usermem.IOOpts.IgnorePermissions = true
- // until Linux 4.9 (272ddc8b3735 "proc: don't use FOLL_FORCE for reading
- // cmdline and environment").
- writer := &bufferWriter{buf: buf}
- if n, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(ar), writer, usermem.IOOpts{}); n == 0 || err != nil {
- // Nothing to copy or something went wrong.
- return err
- }
-
- // On Linux, if the NULL byte at the end of the argument vector has been
- // overwritten, it continues reading the environment vector as part of
- // the argument vector.
- if d.arg == cmdlineDataArg && buf.Bytes()[buf.Len()-1] != 0 {
- if end := bytes.IndexByte(buf.Bytes(), 0); end != -1 {
- // If we found a NULL character somewhere else in argv, truncate the
- // return up to the NULL terminator (including it).
- buf.Truncate(end)
- return nil
- }
-
- // There is no NULL terminator in the string, return into envp.
- arEnvv := usermem.AddrRange{
- Start: m.EnvvStart(),
- End: m.EnvvEnd(),
- }
-
- // Upstream limits the returned amount to one page of slop.
- // https://elixir.bootlin.com/linux/v4.20/source/fs/proc/base.c#L208
- // we'll return one page total between argv and envp because of the
- // above page restrictions.
- if buf.Len() >= usermem.PageSize {
- // Returned at least one page already, nothing else to add.
- return nil
- }
- remaining := usermem.PageSize - buf.Len()
- if int(arEnvv.Length()) > remaining {
- end, ok := arEnvv.Start.AddLength(uint64(remaining))
- if !ok {
- return syserror.EFAULT
- }
- arEnvv.End = end
- }
- if _, err := m.CopyInTo(ctx, usermem.AddrRangeSeqOf(arEnvv), writer, usermem.IOOpts{}); err != nil {
- return err
- }
-
- // Linux will return envp up to and including the first NULL character,
- // so find it.
- if end := bytes.IndexByte(buf.Bytes()[ar.Length():], 0); end != -1 {
- buf.Truncate(end)
- }
- }
-
- return nil
-}
-
-// +stateify savable
-type commInode struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-func newComm(task *kernel.Task, ino uint64, perm linux.FileMode) *kernfs.Dentry {
- inode := &commInode{task: task}
- inode.DynamicBytesFile.Init(task.Credentials(), ino, &commData{task: task}, perm)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
-}
-
-func (i *commInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
- // This file can always be read or written by members of the same thread
- // group. See fs/proc/base.c:proc_tid_comm_permission.
- //
- // N.B. This check is currently a no-op as we don't yet support writing and
- // this file is world-readable anyways.
- t := kernel.TaskFromContext(ctx)
- if t != nil && t.ThreadGroup() == i.task.ThreadGroup() && !ats.MayExec() {
- return nil
- }
-
- return i.DynamicBytesFile.CheckPermissions(ctx, creds, ats)
-}
-
-// commData implements vfs.DynamicBytesSource for /proc/[pid]/comm.
-//
-// +stateify savable
-type commData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-var _ dynamicInode = (*commData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *commData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- buf.WriteString(d.task.Name())
- buf.WriteString("\n")
- return nil
-}
-
-// idMapData implements vfs.DynamicBytesSource for /proc/[pid]/{gid_map|uid_map}.
-//
-// +stateify savable
-type idMapData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
- gids bool
-}
-
-var _ dynamicInode = (*idMapData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *idMapData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- var entries []auth.IDMapEntry
- if d.gids {
- entries = d.task.UserNamespace().GIDMap()
- } else {
- entries = d.task.UserNamespace().UIDMap()
- }
- for _, e := range entries {
- fmt.Fprintf(buf, "%10d %10d %10d\n", e.FirstID, e.FirstParentID, e.Length)
- }
- return nil
-}
-
-// mapsData implements vfs.DynamicBytesSource for /proc/[pid]/maps.
-//
-// +stateify savable
-type mapsData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-var _ dynamicInode = (*mapsData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *mapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- if mm := getMM(d.task); mm != nil {
- mm.ReadMapsDataInto(ctx, buf)
- }
- return nil
-}
-
-// smapsData implements vfs.DynamicBytesSource for /proc/[pid]/smaps.
-//
-// +stateify savable
-type smapsData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-var _ dynamicInode = (*smapsData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *smapsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- if mm := getMM(d.task); mm != nil {
- mm.ReadSmapsDataInto(ctx, buf)
- }
- return nil
-}
-
-// +stateify savable
-type taskStatData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-
- // If tgstats is true, accumulate fault stats (not implemented) and CPU
- // time across all tasks in t's thread group.
- tgstats bool
-
- // pidns is the PID namespace associated with the proc filesystem that
- // includes the file using this statData.
- pidns *kernel.PIDNamespace
-}
-
-var _ dynamicInode = (*taskStatData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (s *taskStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "%d ", s.pidns.IDOfTask(s.task))
- fmt.Fprintf(buf, "(%s) ", s.task.Name())
- fmt.Fprintf(buf, "%c ", s.task.StateStatus()[0])
- ppid := kernel.ThreadID(0)
- if parent := s.task.Parent(); parent != nil {
- ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
- }
- fmt.Fprintf(buf, "%d ", ppid)
- fmt.Fprintf(buf, "%d ", s.pidns.IDOfProcessGroup(s.task.ThreadGroup().ProcessGroup()))
- fmt.Fprintf(buf, "%d ", s.pidns.IDOfSession(s.task.ThreadGroup().Session()))
- fmt.Fprintf(buf, "0 0 " /* tty_nr tpgid */)
- fmt.Fprintf(buf, "0 " /* flags */)
- fmt.Fprintf(buf, "0 0 0 0 " /* minflt cminflt majflt cmajflt */)
- var cputime usage.CPUStats
- if s.tgstats {
- cputime = s.task.ThreadGroup().CPUStats()
- } else {
- cputime = s.task.CPUStats()
- }
- fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
- cputime = s.task.ThreadGroup().JoinedChildCPUStats()
- fmt.Fprintf(buf, "%d %d ", linux.ClockTFromDuration(cputime.UserTime), linux.ClockTFromDuration(cputime.SysTime))
- fmt.Fprintf(buf, "%d %d ", s.task.Priority(), s.task.Niceness())
- fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Count())
-
- // itrealvalue. Since kernel 2.6.17, this field is no longer
- // maintained, and is hard coded as 0.
- fmt.Fprintf(buf, "0 ")
-
- // Start time is relative to boot time, expressed in clock ticks.
- fmt.Fprintf(buf, "%d ", linux.ClockTFromDuration(s.task.StartTime().Sub(s.task.Kernel().Timekeeper().BootTime())))
-
- var vss, rss uint64
- s.task.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- }
- })
- fmt.Fprintf(buf, "%d %d ", vss, rss/usermem.PageSize)
-
- // rsslim.
- fmt.Fprintf(buf, "%d ", s.task.ThreadGroup().Limits().Get(limits.Rss).Cur)
-
- fmt.Fprintf(buf, "0 0 0 0 0 " /* startcode endcode startstack kstkesp kstkeip */)
- fmt.Fprintf(buf, "0 0 0 0 0 " /* signal blocked sigignore sigcatch wchan */)
- fmt.Fprintf(buf, "0 0 " /* nswap cnswap */)
- terminationSignal := linux.Signal(0)
- if s.task == s.task.ThreadGroup().Leader() {
- terminationSignal = s.task.ThreadGroup().TerminationSignal()
- }
- fmt.Fprintf(buf, "%d ", terminationSignal)
- fmt.Fprintf(buf, "0 0 0 " /* processor rt_priority policy */)
- fmt.Fprintf(buf, "0 0 0 " /* delayacct_blkio_ticks guest_time cguest_time */)
- fmt.Fprintf(buf, "0 0 0 0 0 0 0 " /* start_data end_data start_brk arg_start arg_end env_start env_end */)
- fmt.Fprintf(buf, "0\n" /* exit_code */)
-
- return nil
-}
-
-// statmData implements vfs.DynamicBytesSource for /proc/[pid]/statm.
-//
-// +stateify savable
-type statmData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-var _ dynamicInode = (*statmData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (s *statmData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- var vss, rss uint64
- s.task.WithMuLocked(func(t *kernel.Task) {
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- }
- })
-
- fmt.Fprintf(buf, "%d %d 0 0 0 0 0\n", vss/usermem.PageSize, rss/usermem.PageSize)
- return nil
-}
-
-// statusData implements vfs.DynamicBytesSource for /proc/[pid]/status.
-//
-// +stateify savable
-type statusData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
- pidns *kernel.PIDNamespace
-}
-
-var _ dynamicInode = (*statusData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (s *statusData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "Name:\t%s\n", s.task.Name())
- fmt.Fprintf(buf, "State:\t%s\n", s.task.StateStatus())
- fmt.Fprintf(buf, "Tgid:\t%d\n", s.pidns.IDOfThreadGroup(s.task.ThreadGroup()))
- fmt.Fprintf(buf, "Pid:\t%d\n", s.pidns.IDOfTask(s.task))
- ppid := kernel.ThreadID(0)
- if parent := s.task.Parent(); parent != nil {
- ppid = s.pidns.IDOfThreadGroup(parent.ThreadGroup())
- }
- fmt.Fprintf(buf, "PPid:\t%d\n", ppid)
- tpid := kernel.ThreadID(0)
- if tracer := s.task.Tracer(); tracer != nil {
- tpid = s.pidns.IDOfTask(tracer)
- }
- fmt.Fprintf(buf, "TracerPid:\t%d\n", tpid)
- var fds int
- var vss, rss, data uint64
- s.task.WithMuLocked(func(t *kernel.Task) {
- if fdTable := t.FDTable(); fdTable != nil {
- fds = fdTable.Size()
- }
- if mm := t.MemoryManager(); mm != nil {
- vss = mm.VirtualMemorySize()
- rss = mm.ResidentSetSize()
- data = mm.VirtualDataSize()
- }
- })
- fmt.Fprintf(buf, "FDSize:\t%d\n", fds)
- fmt.Fprintf(buf, "VmSize:\t%d kB\n", vss>>10)
- fmt.Fprintf(buf, "VmRSS:\t%d kB\n", rss>>10)
- fmt.Fprintf(buf, "VmData:\t%d kB\n", data>>10)
- fmt.Fprintf(buf, "Threads:\t%d\n", s.task.ThreadGroup().Count())
- creds := s.task.Credentials()
- fmt.Fprintf(buf, "CapInh:\t%016x\n", creds.InheritableCaps)
- fmt.Fprintf(buf, "CapPrm:\t%016x\n", creds.PermittedCaps)
- fmt.Fprintf(buf, "CapEff:\t%016x\n", creds.EffectiveCaps)
- fmt.Fprintf(buf, "CapBnd:\t%016x\n", creds.BoundingCaps)
- fmt.Fprintf(buf, "Seccomp:\t%d\n", s.task.SeccompMode())
- // We unconditionally report a single NUMA node. See
- // pkg/sentry/syscalls/linux/sys_mempolicy.go.
- fmt.Fprintf(buf, "Mems_allowed:\t1\n")
- fmt.Fprintf(buf, "Mems_allowed_list:\t0\n")
- return nil
-}
-
-// ioUsage is the /proc/[pid]/io and /proc/[pid]/task/[tid]/io data provider.
-type ioUsage interface {
- // IOUsage returns the io usage data.
- IOUsage() *usage.IO
-}
-
-// +stateify savable
-type ioData struct {
- kernfs.DynamicBytesFile
-
- ioUsage
-}
-
-var _ dynamicInode = (*ioData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (i *ioData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- io := usage.IO{}
- io.Accumulate(i.IOUsage())
-
- fmt.Fprintf(buf, "char: %d\n", io.CharsRead)
- fmt.Fprintf(buf, "wchar: %d\n", io.CharsWritten)
- fmt.Fprintf(buf, "syscr: %d\n", io.ReadSyscalls)
- fmt.Fprintf(buf, "syscw: %d\n", io.WriteSyscalls)
- fmt.Fprintf(buf, "read_bytes: %d\n", io.BytesRead)
- fmt.Fprintf(buf, "write_bytes: %d\n", io.BytesWritten)
- fmt.Fprintf(buf, "cancelled_write_bytes: %d\n", io.BytesWriteCancelled)
- return nil
-}
-
-// oomScoreAdj is a stub of the /proc/<pid>/oom_score_adj file.
-//
-// +stateify savable
-type oomScoreAdj struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-var _ vfs.WritableDynamicBytesSource = (*oomScoreAdj)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (o *oomScoreAdj) Generate(ctx context.Context, buf *bytes.Buffer) error {
- if o.task.ExitState() == kernel.TaskExitDead {
- return syserror.ESRCH
- }
- fmt.Fprintf(buf, "%d\n", o.task.OOMScoreAdj())
- return nil
-}
-
-// Write implements vfs.WritableDynamicBytesSource.Write.
-func (o *oomScoreAdj) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
- if src.NumBytes() == 0 {
- return 0, nil
- }
-
- // Limit input size so as not to impact performance if input size is large.
- src = src.TakeFirst(usermem.PageSize - 1)
-
- var v int32
- n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
- if err != nil {
- return 0, err
- }
-
- if o.task.ExitState() == kernel.TaskExitDead {
- return 0, syserror.ESRCH
- }
- if err := o.task.SetOOMScoreAdj(v); err != nil {
- return 0, err
- }
-
- return n, nil
-}
-
-// exeSymlink is an symlink for the /proc/[pid]/exe file.
-//
-// +stateify savable
-type exeSymlink struct {
- kernfs.InodeAttrs
- kernfs.InodeNoopRefCount
- kernfs.InodeSymlink
-
- task *kernel.Task
-}
-
-var _ kernfs.Inode = (*exeSymlink)(nil)
-
-func newExeSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry {
- inode := &exeSymlink{task: task}
- inode.Init(task.Credentials(), ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
-}
-
-// Readlink implements kernfs.Inode.
-func (s *exeSymlink) Readlink(ctx context.Context) (string, error) {
- if !kernel.ContextCanTrace(ctx, s.task, false) {
- return "", syserror.EACCES
- }
-
- // Pull out the executable for /proc/[pid]/exe.
- exec, err := s.executable()
- if err != nil {
- return "", err
- }
- defer exec.DecRef()
-
- return exec.PathnameWithDeleted(ctx), nil
-}
-
-func (s *exeSymlink) executable() (file fsbridge.File, err error) {
- s.task.WithMuLocked(func(t *kernel.Task) {
- mm := t.MemoryManager()
- if mm == nil {
- // TODO(b/34851096): Check shouldn't allow Readlink once the
- // Task is zombied.
- err = syserror.EACCES
- return
- }
-
- // The MemoryManager may be destroyed, in which case
- // MemoryManager.destroy will simply set the executable to nil
- // (with locks held).
- file = mm.Executable()
- if file == nil {
- err = syserror.ENOENT
- }
- })
- return
-}
-
-// forEachMountSource runs f for the process root mount and each mount that is
-// a descendant of the root.
-func forEachMount(t *kernel.Task, fn func(string, *fs.Mount)) {
- var fsctx *kernel.FSContext
- t.WithMuLocked(func(t *kernel.Task) {
- fsctx = t.FSContext()
- })
- if fsctx == nil {
- // The task has been destroyed. Nothing to show here.
- return
- }
-
- // All mount points must be relative to the rootDir, and mounts outside
- // will be excluded.
- rootDir := fsctx.RootDirectory()
- if rootDir == nil {
- // The task has been destroyed. Nothing to show here.
- return
- }
- defer rootDir.DecRef()
-
- mnt := t.MountNamespace().FindMount(rootDir)
- if mnt == nil {
- // Has it just been unmounted?
- return
- }
- ms := t.MountNamespace().AllMountsUnder(mnt)
- sort.Slice(ms, func(i, j int) bool {
- return ms[i].ID < ms[j].ID
- })
- for _, m := range ms {
- mroot := m.Root()
- if mroot == nil {
- continue // No longer valid.
- }
- mountPath, desc := mroot.FullName(rootDir)
- mroot.DecRef()
- if !desc {
- // MountSources that are not descendants of the chroot jail are ignored.
- continue
- }
- fn(mountPath, m)
- }
-}
-
-// mountInfoData is used to implement /proc/[pid]/mountinfo.
-//
-// +stateify savable
-type mountInfoData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-var _ dynamicInode = (*mountInfoData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (i *mountInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- forEachMount(i.task, func(mountPath string, m *fs.Mount) {
- mroot := m.Root()
- if mroot == nil {
- return // No longer valid.
- }
- defer mroot.DecRef()
-
- // Format:
- // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
- // (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
-
- // (1) MountSource ID.
- fmt.Fprintf(buf, "%d ", m.ID)
-
- // (2) Parent ID (or this ID if there is no parent).
- pID := m.ID
- if !m.IsRoot() && !m.IsUndo() {
- pID = m.ParentID
- }
- fmt.Fprintf(buf, "%d ", pID)
-
- // (3) Major:Minor device ID. We don't have a superblock, so we
- // just use the root inode device number.
- sa := mroot.Inode.StableAttr
- fmt.Fprintf(buf, "%d:%d ", sa.DeviceFileMajor, sa.DeviceFileMinor)
-
- // (4) Root: the pathname of the directory in the filesystem
- // which forms the root of this mount.
- //
- // NOTE(b/78135857): This will always be "/" until we implement
- // bind mounts.
- fmt.Fprintf(buf, "/ ")
-
- // (5) Mount point (relative to process root).
- fmt.Fprintf(buf, "%s ", mountPath)
-
- // (6) Mount options.
- flags := mroot.Inode.MountSource.Flags
- opts := "rw"
- if flags.ReadOnly {
- opts = "ro"
- }
- if flags.NoAtime {
- opts += ",noatime"
- }
- if flags.NoExec {
- opts += ",noexec"
- }
- fmt.Fprintf(buf, "%s ", opts)
-
- // (7) Optional fields: zero or more fields of the form "tag[:value]".
- // (8) Separator: the end of the optional fields is marked by a single hyphen.
- fmt.Fprintf(buf, "- ")
-
- // (9) Filesystem type.
- fmt.Fprintf(buf, "%s ", mroot.Inode.MountSource.FilesystemType)
-
- // (10) Mount source: filesystem-specific information or "none".
- fmt.Fprintf(buf, "none ")
-
- // (11) Superblock options, and final newline.
- fmt.Fprintf(buf, "%s\n", superBlockOpts(mountPath, mroot.Inode.MountSource))
- })
- return nil
-}
-
-func superBlockOpts(mountPath string, msrc *fs.MountSource) string {
- // gVisor doesn't (yet) have a concept of super block options, so we
- // use the ro/rw bit from the mount flag.
- opts := "rw"
- if msrc.Flags.ReadOnly {
- opts = "ro"
- }
-
- // NOTE(b/147673608): If the mount is a cgroup, we also need to include
- // the cgroup name in the options. For now we just read that from the
- // path.
- // TODO(gvisor.dev/issues/190): Once gVisor has full cgroup support, we
- // should get this value from the cgroup itself, and not rely on the
- // path.
- if msrc.FilesystemType == "cgroup" {
- splitPath := strings.Split(mountPath, "/")
- cgroupType := splitPath[len(splitPath)-1]
- opts += "," + cgroupType
- }
- return opts
-}
-
-// mountsData is used to implement /proc/[pid]/mounts.
-//
-// +stateify savable
-type mountsData struct {
- kernfs.DynamicBytesFile
-
- task *kernel.Task
-}
-
-var _ dynamicInode = (*mountInfoData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (i *mountsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- forEachMount(i.task, func(mountPath string, m *fs.Mount) {
- // Format:
- // <special device or remote filesystem> <mount point> <filesystem type> <mount options> <needs dump> <fsck order>
- //
- // We use the filesystem name as the first field, since there
- // is no real block device we can point to, and we also should
- // not expose anything about the remote filesystem.
- //
- // Only ro/rw option is supported for now.
- //
- // The "needs dump"and fsck flags are always 0, which is allowed.
- root := m.Root()
- if root == nil {
- return // No longer valid.
- }
- defer root.DecRef()
-
- flags := root.Inode.MountSource.Flags
- opts := "rw"
- if flags.ReadOnly {
- opts = "ro"
- }
- fmt.Fprintf(buf, "%s %s %s %s %d %d\n", "none", mountPath, root.Inode.MountSource.FilesystemType, opts, 0, 0)
- })
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
deleted file mode 100644
index 373a7b17d..000000000
--- a/pkg/sentry/fsimpl/proc/task_net.go
+++ /dev/null
@@ -1,790 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
- "io"
- "reflect"
- "time"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/log"
- "gvisor.dev/gvisor/pkg/sentry/fs"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/inet"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/socket"
- "gvisor.dev/gvisor/pkg/sentry/socket/unix"
- "gvisor.dev/gvisor/pkg/sentry/socket/unix/transport"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/tcpip/header"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-func newTaskNetDir(task *kernel.Task, inoGen InoGenerator) *kernfs.Dentry {
- k := task.Kernel()
- pidns := task.PIDNamespace()
- root := auth.NewRootCredentials(pidns.UserNamespace())
-
- var contents map[string]*kernfs.Dentry
- if stack := task.NetworkNamespace().Stack(); stack != nil {
- const (
- arp = "IP address HW type Flags HW address Mask Device\n"
- netlink = "sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode\n"
- packet = "sk RefCnt Type Proto Iface R Rmem User Inode\n"
- protocols = "protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"
- ptype = "Type Device Function\n"
- upd6 = " sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n"
- )
- psched := fmt.Sprintf("%08x %08x %08x %08x\n", uint64(time.Microsecond/time.Nanosecond), 64, 1000000, uint64(time.Second/time.Nanosecond))
-
- // TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task
- // network namespace.
- contents = map[string]*kernfs.Dentry{
- "dev": newDentry(root, inoGen.NextIno(), 0444, &netDevData{stack: stack}),
- "snmp": newDentry(root, inoGen.NextIno(), 0444, &netSnmpData{stack: stack}),
-
- // The following files are simple stubs until they are implemented in
- // netstack, if the file contains a header the stub is just the header
- // otherwise it is an empty file.
- "arp": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(arp)),
- "netlink": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(netlink)),
- "netstat": newDentry(root, inoGen.NextIno(), 0444, &netStatData{}),
- "packet": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(packet)),
- "protocols": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(protocols)),
-
- // Linux sets psched values to: nsec per usec, psched tick in ns, 1000000,
- // high res timer ticks per sec (ClockGetres returns 1ns resolution).
- "psched": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(psched)),
- "ptype": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(ptype)),
- "route": newDentry(root, inoGen.NextIno(), 0444, &netRouteData{stack: stack}),
- "tcp": newDentry(root, inoGen.NextIno(), 0444, &netTCPData{kernel: k}),
- "udp": newDentry(root, inoGen.NextIno(), 0444, &netUDPData{kernel: k}),
- "unix": newDentry(root, inoGen.NextIno(), 0444, &netUnixData{kernel: k}),
- }
-
- if stack.SupportsIPv6() {
- contents["if_inet6"] = newDentry(root, inoGen.NextIno(), 0444, &ifinet6{stack: stack})
- contents["ipv6_route"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(""))
- contents["tcp6"] = newDentry(root, inoGen.NextIno(), 0444, &netTCP6Data{kernel: k})
- contents["udp6"] = newDentry(root, inoGen.NextIno(), 0444, newStaticFile(upd6))
- }
- }
-
- return newTaskOwnedDir(task, inoGen.NextIno(), 0555, contents)
-}
-
-// ifinet6 implements vfs.DynamicBytesSource for /proc/net/if_inet6.
-//
-// +stateify savable
-type ifinet6 struct {
- kernfs.DynamicBytesFile
-
- stack inet.Stack
-}
-
-var _ dynamicInode = (*ifinet6)(nil)
-
-func (n *ifinet6) contents() []string {
- var lines []string
- nics := n.stack.Interfaces()
- for id, naddrs := range n.stack.InterfaceAddrs() {
- nic, ok := nics[id]
- if !ok {
- // NIC was added after NICNames was called. We'll just ignore it.
- continue
- }
-
- for _, a := range naddrs {
- // IPv6 only.
- if a.Family != linux.AF_INET6 {
- continue
- }
-
- // Fields:
- // IPv6 address displayed in 32 hexadecimal chars without colons
- // Netlink device number (interface index) in hexadecimal (use nic id)
- // Prefix length in hexadecimal
- // Scope value (use 0)
- // Interface flags
- // Device name
- lines = append(lines, fmt.Sprintf("%032x %02x %02x %02x %02x %8s\n", a.Addr, id, a.PrefixLen, 0, a.Flags, nic.Name))
- }
- }
- return lines
-}
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (n *ifinet6) Generate(ctx context.Context, buf *bytes.Buffer) error {
- for _, l := range n.contents() {
- buf.WriteString(l)
- }
- return nil
-}
-
-// netDevData implements vfs.DynamicBytesSource for /proc/net/dev.
-//
-// +stateify savable
-type netDevData struct {
- kernfs.DynamicBytesFile
-
- stack inet.Stack
-}
-
-var _ dynamicInode = (*netDevData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (n *netDevData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- interfaces := n.stack.Interfaces()
- buf.WriteString("Inter-| Receive | Transmit\n")
- buf.WriteString(" face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed\n")
-
- for _, i := range interfaces {
- // Implements the same format as
- // net/core/net-procfs.c:dev_seq_printf_stats.
- var stats inet.StatDev
- if err := n.stack.Statistics(&stats, i.Name); err != nil {
- log.Warningf("Failed to retrieve interface statistics for %v: %v", i.Name, err)
- continue
- }
- fmt.Fprintf(
- buf,
- "%6s: %7d %7d %4d %4d %4d %5d %10d %9d %8d %7d %4d %4d %4d %5d %7d %10d\n",
- i.Name,
- // Received
- stats[0], // bytes
- stats[1], // packets
- stats[2], // errors
- stats[3], // dropped
- stats[4], // fifo
- stats[5], // frame
- stats[6], // compressed
- stats[7], // multicast
- // Transmitted
- stats[8], // bytes
- stats[9], // packets
- stats[10], // errors
- stats[11], // dropped
- stats[12], // fifo
- stats[13], // frame
- stats[14], // compressed
- stats[15], // multicast
- )
- }
-
- return nil
-}
-
-// netUnixData implements vfs.DynamicBytesSource for /proc/net/unix.
-//
-// +stateify savable
-type netUnixData struct {
- kernfs.DynamicBytesFile
-
- kernel *kernel.Kernel
-}
-
-var _ dynamicInode = (*netUnixData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (n *netUnixData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- buf.WriteString("Num RefCount Protocol Flags Type St Inode Path\n")
- for _, se := range n.kernel.ListSockets() {
- s := se.Sock.Get()
- if s == nil {
- log.Debugf("Couldn't resolve weakref %v in socket table, racing with destruction?", se.Sock)
- continue
- }
- sfile := s.(*fs.File)
- if family, _, _ := sfile.FileOperations.(socket.Socket).Type(); family != linux.AF_UNIX {
- s.DecRef()
- // Not a unix socket.
- continue
- }
- sops := sfile.FileOperations.(*unix.SocketOperations)
-
- addr, err := sops.Endpoint().GetLocalAddress()
- if err != nil {
- log.Warningf("Failed to retrieve socket name from %+v: %v", sfile, err)
- addr.Addr = "<unknown>"
- }
-
- sockFlags := 0
- if ce, ok := sops.Endpoint().(transport.ConnectingEndpoint); ok {
- if ce.Listening() {
- // For unix domain sockets, linux reports a single flag
- // value if the socket is listening, of __SO_ACCEPTCON.
- sockFlags = linux.SO_ACCEPTCON
- }
- }
-
- // In the socket entry below, the value for the 'Num' field requires
- // some consideration. Linux prints the address to the struct
- // unix_sock representing a socket in the kernel, but may redact the
- // value for unprivileged users depending on the kptr_restrict
- // sysctl.
- //
- // One use for this field is to allow a privileged user to
- // introspect into the kernel memory to determine information about
- // a socket not available through procfs, such as the socket's peer.
- //
- // In gvisor, returning a pointer to our internal structures would
- // be pointless, as it wouldn't match the memory layout for struct
- // unix_sock, making introspection difficult. We could populate a
- // struct unix_sock with the appropriate data, but even that
- // requires consideration for which kernel version to emulate, as
- // the definition of this struct changes over time.
- //
- // For now, we always redact this pointer.
- fmt.Fprintf(buf, "%#016p: %08X %08X %08X %04X %02X %5d",
- (*unix.SocketOperations)(nil), // Num, pointer to kernel socket struct.
- sfile.ReadRefs()-1, // RefCount, don't count our own ref.
- 0, // Protocol, always 0 for UDS.
- sockFlags, // Flags.
- sops.Endpoint().Type(), // Type.
- sops.State(), // State.
- sfile.InodeID(), // Inode.
- )
-
- // Path
- if len(addr.Addr) != 0 {
- if addr.Addr[0] == 0 {
- // Abstract path.
- fmt.Fprintf(buf, " @%s", string(addr.Addr[1:]))
- } else {
- fmt.Fprintf(buf, " %s", string(addr.Addr))
- }
- }
- fmt.Fprintf(buf, "\n")
-
- s.DecRef()
- }
- return nil
-}
-
-func networkToHost16(n uint16) uint16 {
- // n is in network byte order, so is big-endian. The most-significant byte
- // should be stored in the lower address.
- //
- // We manually inline binary.BigEndian.Uint16() because Go does not support
- // non-primitive consts, so binary.BigEndian is a (mutable) var, so calls to
- // binary.BigEndian.Uint16() require a read of binary.BigEndian and an
- // interface method call, defeating inlining.
- buf := [2]byte{byte(n >> 8 & 0xff), byte(n & 0xff)}
- return usermem.ByteOrder.Uint16(buf[:])
-}
-
-func writeInetAddr(w io.Writer, family int, i linux.SockAddr) {
- switch family {
- case linux.AF_INET:
- var a linux.SockAddrInet
- if i != nil {
- a = *i.(*linux.SockAddrInet)
- }
-
- // linux.SockAddrInet.Port is stored in the network byte order and is
- // printed like a number in host byte order. Note that all numbers in host
- // byte order are printed with the most-significant byte first when
- // formatted with %X. See get_tcp4_sock() and udp4_format_sock() in Linux.
- port := networkToHost16(a.Port)
-
- // linux.SockAddrInet.Addr is stored as a byte slice in big-endian order
- // (i.e. most-significant byte in index 0). Linux represents this as a
- // __be32 which is a typedef for an unsigned int, and is printed with
- // %X. This means that for a little-endian machine, Linux prints the
- // least-significant byte of the address first. To emulate this, we first
- // invert the byte order for the address using usermem.ByteOrder.Uint32,
- // which makes it have the equivalent encoding to a __be32 on a little
- // endian machine. Note that this operation is a no-op on a big endian
- // machine. Then similar to Linux, we format it with %X, which will print
- // the most-significant byte of the __be32 address first, which is now
- // actually the least-significant byte of the original address in
- // linux.SockAddrInet.Addr on little endian machines, due to the conversion.
- addr := usermem.ByteOrder.Uint32(a.Addr[:])
-
- fmt.Fprintf(w, "%08X:%04X ", addr, port)
- case linux.AF_INET6:
- var a linux.SockAddrInet6
- if i != nil {
- a = *i.(*linux.SockAddrInet6)
- }
-
- port := networkToHost16(a.Port)
- addr0 := usermem.ByteOrder.Uint32(a.Addr[0:4])
- addr1 := usermem.ByteOrder.Uint32(a.Addr[4:8])
- addr2 := usermem.ByteOrder.Uint32(a.Addr[8:12])
- addr3 := usermem.ByteOrder.Uint32(a.Addr[12:16])
- fmt.Fprintf(w, "%08X%08X%08X%08X:%04X ", addr0, addr1, addr2, addr3, port)
- }
-}
-
-func commonGenerateTCP(ctx context.Context, buf *bytes.Buffer, k *kernel.Kernel, family int) error {
- // t may be nil here if our caller is not part of a task goroutine. This can
- // happen for example if we're here for "sentryctl cat". When t is nil,
- // degrade gracefully and retrieve what we can.
- t := kernel.TaskFromContext(ctx)
-
- for _, se := range k.ListSockets() {
- s := se.Sock.Get()
- if s == nil {
- log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID)
- continue
- }
- sfile := s.(*fs.File)
- sops, ok := sfile.FileOperations.(socket.Socket)
- if !ok {
- panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
- }
- if fa, stype, _ := sops.Type(); !(family == fa && stype == linux.SOCK_STREAM) {
- s.DecRef()
- // Not tcp4 sockets.
- continue
- }
-
- // Linux's documentation for the fields below can be found at
- // https://www.kernel.org/doc/Documentation/networking/proc_net_tcp.txt.
- // For Linux's implementation, see net/ipv4/tcp_ipv4.c:get_tcp4_sock().
- // Note that the header doesn't contain labels for all the fields.
-
- // Field: sl; entry number.
- fmt.Fprintf(buf, "%4d: ", se.ID)
-
- // Field: local_adddress.
- var localAddr linux.SockAddr
- if t != nil {
- if local, _, err := sops.GetSockName(t); err == nil {
- localAddr = local
- }
- }
- writeInetAddr(buf, family, localAddr)
-
- // Field: rem_address.
- var remoteAddr linux.SockAddr
- if t != nil {
- if remote, _, err := sops.GetPeerName(t); err == nil {
- remoteAddr = remote
- }
- }
- writeInetAddr(buf, family, remoteAddr)
-
- // Field: state; socket state.
- fmt.Fprintf(buf, "%02X ", sops.State())
-
- // Field: tx_queue, rx_queue; number of packets in the transmit and
- // receive queue. Unimplemented.
- fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
-
- // Field: tr, tm->when; timer active state and number of jiffies
- // until timer expires. Unimplemented.
- fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
-
- // Field: retrnsmt; number of unrecovered RTO timeouts.
- // Unimplemented.
- fmt.Fprintf(buf, "%08X ", 0)
-
- // Field: uid.
- uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
- if err != nil {
- log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
- fmt.Fprintf(buf, "%5d ", 0)
- } else {
- creds := auth.CredentialsFromContext(ctx)
- fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow()))
- }
-
- // Field: timeout; number of unanswered 0-window probes.
- // Unimplemented.
- fmt.Fprintf(buf, "%8d ", 0)
-
- // Field: inode.
- fmt.Fprintf(buf, "%8d ", sfile.InodeID())
-
- // Field: refcount. Don't count the ref we obtain while deferencing
- // the weakref to this socket.
- fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1)
-
- // Field: Socket struct address. Redacted due to the same reason as
- // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
- fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
-
- // Field: retransmit timeout. Unimplemented.
- fmt.Fprintf(buf, "%d ", 0)
-
- // Field: predicted tick of soft clock (delayed ACK control data).
- // Unimplemented.
- fmt.Fprintf(buf, "%d ", 0)
-
- // Field: (ack.quick<<1)|ack.pingpong, Unimplemented.
- fmt.Fprintf(buf, "%d ", 0)
-
- // Field: sending congestion window, Unimplemented.
- fmt.Fprintf(buf, "%d ", 0)
-
- // Field: Slow start size threshold, -1 if threshold >= 0xFFFF.
- // Unimplemented, report as large threshold.
- fmt.Fprintf(buf, "%d", -1)
-
- fmt.Fprintf(buf, "\n")
-
- s.DecRef()
- }
-
- return nil
-}
-
-// netTCPData implements vfs.DynamicBytesSource for /proc/net/tcp.
-//
-// +stateify savable
-type netTCPData struct {
- kernfs.DynamicBytesFile
-
- kernel *kernel.Kernel
-}
-
-var _ dynamicInode = (*netTCPData)(nil)
-
-func (d *netTCPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- buf.WriteString(" sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode \n")
- return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET)
-}
-
-// netTCP6Data implements vfs.DynamicBytesSource for /proc/net/tcp6.
-//
-// +stateify savable
-type netTCP6Data struct {
- kernfs.DynamicBytesFile
-
- kernel *kernel.Kernel
-}
-
-var _ dynamicInode = (*netTCP6Data)(nil)
-
-func (d *netTCP6Data) Generate(ctx context.Context, buf *bytes.Buffer) error {
- buf.WriteString(" sl local_address remote_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode\n")
- return commonGenerateTCP(ctx, buf, d.kernel, linux.AF_INET6)
-}
-
-// netUDPData implements vfs.DynamicBytesSource for /proc/net/udp.
-//
-// +stateify savable
-type netUDPData struct {
- kernfs.DynamicBytesFile
-
- kernel *kernel.Kernel
-}
-
-var _ dynamicInode = (*netUDPData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *netUDPData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- // t may be nil here if our caller is not part of a task goroutine. This can
- // happen for example if we're here for "sentryctl cat". When t is nil,
- // degrade gracefully and retrieve what we can.
- t := kernel.TaskFromContext(ctx)
-
- for _, se := range d.kernel.ListSockets() {
- s := se.Sock.Get()
- if s == nil {
- log.Debugf("Couldn't resolve weakref with ID %v in socket table, racing with destruction?", se.ID)
- continue
- }
- sfile := s.(*fs.File)
- sops, ok := sfile.FileOperations.(socket.Socket)
- if !ok {
- panic(fmt.Sprintf("Found non-socket file in socket table: %+v", sfile))
- }
- if family, stype, _ := sops.Type(); family != linux.AF_INET || stype != linux.SOCK_DGRAM {
- s.DecRef()
- // Not udp4 socket.
- continue
- }
-
- // For Linux's implementation, see net/ipv4/udp.c:udp4_format_sock().
-
- // Field: sl; entry number.
- fmt.Fprintf(buf, "%5d: ", se.ID)
-
- // Field: local_adddress.
- var localAddr linux.SockAddrInet
- if t != nil {
- if local, _, err := sops.GetSockName(t); err == nil {
- localAddr = *local.(*linux.SockAddrInet)
- }
- }
- writeInetAddr(buf, linux.AF_INET, &localAddr)
-
- // Field: rem_address.
- var remoteAddr linux.SockAddrInet
- if t != nil {
- if remote, _, err := sops.GetPeerName(t); err == nil {
- remoteAddr = *remote.(*linux.SockAddrInet)
- }
- }
- writeInetAddr(buf, linux.AF_INET, &remoteAddr)
-
- // Field: state; socket state.
- fmt.Fprintf(buf, "%02X ", sops.State())
-
- // Field: tx_queue, rx_queue; number of packets in the transmit and
- // receive queue. Unimplemented.
- fmt.Fprintf(buf, "%08X:%08X ", 0, 0)
-
- // Field: tr, tm->when. Always 0 for UDP.
- fmt.Fprintf(buf, "%02X:%08X ", 0, 0)
-
- // Field: retrnsmt. Always 0 for UDP.
- fmt.Fprintf(buf, "%08X ", 0)
-
- // Field: uid.
- uattr, err := sfile.Dirent.Inode.UnstableAttr(ctx)
- if err != nil {
- log.Warningf("Failed to retrieve unstable attr for socket file: %v", err)
- fmt.Fprintf(buf, "%5d ", 0)
- } else {
- creds := auth.CredentialsFromContext(ctx)
- fmt.Fprintf(buf, "%5d ", uint32(uattr.Owner.UID.In(creds.UserNamespace).OrOverflow()))
- }
-
- // Field: timeout. Always 0 for UDP.
- fmt.Fprintf(buf, "%8d ", 0)
-
- // Field: inode.
- fmt.Fprintf(buf, "%8d ", sfile.InodeID())
-
- // Field: ref; reference count on the socket inode. Don't count the ref
- // we obtain while deferencing the weakref to this socket.
- fmt.Fprintf(buf, "%d ", sfile.ReadRefs()-1)
-
- // Field: Socket struct address. Redacted due to the same reason as
- // the 'Num' field in /proc/net/unix, see netUnix.ReadSeqFileData.
- fmt.Fprintf(buf, "%#016p ", (*socket.Socket)(nil))
-
- // Field: drops; number of dropped packets. Unimplemented.
- fmt.Fprintf(buf, "%d", 0)
-
- fmt.Fprintf(buf, "\n")
-
- s.DecRef()
- }
- return nil
-}
-
-// netSnmpData implements vfs.DynamicBytesSource for /proc/net/snmp.
-//
-// +stateify savable
-type netSnmpData struct {
- kernfs.DynamicBytesFile
-
- stack inet.Stack
-}
-
-var _ dynamicInode = (*netSnmpData)(nil)
-
-type snmpLine struct {
- prefix string
- header string
-}
-
-var snmp = []snmpLine{
- {
- prefix: "Ip",
- header: "Forwarding DefaultTTL InReceives InHdrErrors InAddrErrors ForwDatagrams InUnknownProtos InDiscards InDelivers OutRequests OutDiscards OutNoRoutes ReasmTimeout ReasmReqds ReasmOKs ReasmFails FragOKs FragFails FragCreates",
- },
- {
- prefix: "Icmp",
- header: "InMsgs InErrors InCsumErrors InDestUnreachs InTimeExcds InParmProbs InSrcQuenchs InRedirects InEchos InEchoReps InTimestamps InTimestampReps InAddrMasks InAddrMaskReps OutMsgs OutErrors OutDestUnreachs OutTimeExcds OutParmProbs OutSrcQuenchs OutRedirects OutEchos OutEchoReps OutTimestamps OutTimestampReps OutAddrMasks OutAddrMaskReps",
- },
- {
- prefix: "IcmpMsg",
- },
- {
- prefix: "Tcp",
- header: "RtoAlgorithm RtoMin RtoMax MaxConn ActiveOpens PassiveOpens AttemptFails EstabResets CurrEstab InSegs OutSegs RetransSegs InErrs OutRsts InCsumErrors",
- },
- {
- prefix: "Udp",
- header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
- },
- {
- prefix: "UdpLite",
- header: "InDatagrams NoPorts InErrors OutDatagrams RcvbufErrors SndbufErrors InCsumErrors IgnoredMulti",
- },
-}
-
-func toSlice(a interface{}) []uint64 {
- v := reflect.Indirect(reflect.ValueOf(a))
- return v.Slice(0, v.Len()).Interface().([]uint64)
-}
-
-func sprintSlice(s []uint64) string {
- if len(s) == 0 {
- return ""
- }
- r := fmt.Sprint(s)
- return r[1 : len(r)-1] // Remove "[]" introduced by fmt of slice.
-}
-
-// Generate implements vfs.DynamicBytesSource.
-func (d *netSnmpData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- types := []interface{}{
- &inet.StatSNMPIP{},
- &inet.StatSNMPICMP{},
- nil, // TODO(gvisor.dev/issue/628): Support IcmpMsg stats.
- &inet.StatSNMPTCP{},
- &inet.StatSNMPUDP{},
- &inet.StatSNMPUDPLite{},
- }
- for i, stat := range types {
- line := snmp[i]
- if stat == nil {
- fmt.Fprintf(buf, "%s:\n", line.prefix)
- fmt.Fprintf(buf, "%s:\n", line.prefix)
- continue
- }
- if err := d.stack.Statistics(stat, line.prefix); err != nil {
- if err == syserror.EOPNOTSUPP {
- log.Infof("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
- } else {
- log.Warningf("Failed to retrieve %s of /proc/net/snmp: %v", line.prefix, err)
- }
- }
-
- fmt.Fprintf(buf, "%s: %s\n", line.prefix, line.header)
-
- if line.prefix == "Tcp" {
- tcp := stat.(*inet.StatSNMPTCP)
- // "Tcp" needs special processing because MaxConn is signed. RFC 2012.
- fmt.Sprintf("%s: %s %d %s\n", line.prefix, sprintSlice(tcp[:3]), int64(tcp[3]), sprintSlice(tcp[4:]))
- } else {
- fmt.Sprintf("%s: %s\n", line.prefix, sprintSlice(toSlice(stat)))
- }
- }
- return nil
-}
-
-// netRouteData implements vfs.DynamicBytesSource for /proc/net/route.
-//
-// +stateify savable
-type netRouteData struct {
- kernfs.DynamicBytesFile
-
- stack inet.Stack
-}
-
-var _ dynamicInode = (*netRouteData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.
-// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
-func (d *netRouteData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "%-127s\n", "Iface\tDestination\tGateway\tFlags\tRefCnt\tUse\tMetric\tMask\tMTU\tWindow\tIRTT")
-
- interfaces := d.stack.Interfaces()
- for _, rt := range d.stack.RouteTable() {
- // /proc/net/route only includes ipv4 routes.
- if rt.Family != linux.AF_INET {
- continue
- }
-
- // /proc/net/route does not include broadcast or multicast routes.
- if rt.Type == linux.RTN_BROADCAST || rt.Type == linux.RTN_MULTICAST {
- continue
- }
-
- iface, ok := interfaces[rt.OutputInterface]
- if !ok || iface.Name == "lo" {
- continue
- }
-
- var (
- gw uint32
- prefix uint32
- flags = linux.RTF_UP
- )
- if len(rt.GatewayAddr) == header.IPv4AddressSize {
- flags |= linux.RTF_GATEWAY
- gw = usermem.ByteOrder.Uint32(rt.GatewayAddr)
- }
- if len(rt.DstAddr) == header.IPv4AddressSize {
- prefix = usermem.ByteOrder.Uint32(rt.DstAddr)
- }
- l := fmt.Sprintf(
- "%s\t%08X\t%08X\t%04X\t%d\t%d\t%d\t%08X\t%d\t%d\t%d",
- iface.Name,
- prefix,
- gw,
- flags,
- 0, // RefCnt.
- 0, // Use.
- 0, // Metric.
- (uint32(1)<<rt.DstLen)-1,
- 0, // MTU.
- 0, // Window.
- 0, // RTT.
- )
- fmt.Fprintf(buf, "%-127s\n", l)
- }
- return nil
-}
-
-// netStatData implements vfs.DynamicBytesSource for /proc/net/netstat.
-//
-// +stateify savable
-type netStatData struct {
- kernfs.DynamicBytesFile
-
- stack inet.Stack
-}
-
-var _ dynamicInode = (*netStatData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.
-// See Linux's net/ipv4/fib_trie.c:fib_route_seq_show.
-func (d *netStatData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- buf.WriteString("TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed " +
- "EmbryonicRsts PruneCalled RcvPruned OfoPruned OutOfWindowIcmps " +
- "LockDroppedIcmps ArpFilter TW TWRecycled TWKilled PAWSPassive " +
- "PAWSActive PAWSEstab DelayedACKs DelayedACKLocked DelayedACKLost " +
- "ListenOverflows ListenDrops TCPPrequeued TCPDirectCopyFromBacklog " +
- "TCPDirectCopyFromPrequeue TCPPrequeueDropped TCPHPHits TCPHPHitsToUser " +
- "TCPPureAcks TCPHPAcks TCPRenoRecovery TCPSackRecovery TCPSACKReneging " +
- "TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder TCPFullUndo " +
- "TCPPartialUndo TCPDSACKUndo TCPLossUndo TCPLostRetransmit " +
- "TCPRenoFailures TCPSackFailures TCPLossFailures TCPFastRetrans " +
- "TCPForwardRetrans TCPSlowStartRetrans TCPTimeouts TCPLossProbes " +
- "TCPLossProbeRecovery TCPRenoRecoveryFail TCPSackRecoveryFail " +
- "TCPSchedulerFailed TCPRcvCollapsed TCPDSACKOldSent TCPDSACKOfoSent " +
- "TCPDSACKRecv TCPDSACKOfoRecv TCPAbortOnData TCPAbortOnClose " +
- "TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger TCPAbortFailed " +
- "TCPMemoryPressures TCPSACKDiscard TCPDSACKIgnoredOld " +
- "TCPDSACKIgnoredNoUndo TCPSpuriousRTOs TCPMD5NotFound TCPMD5Unexpected " +
- "TCPMD5Failure TCPSackShifted TCPSackMerged TCPSackShiftFallback " +
- "TCPBacklogDrop TCPMinTTLDrop TCPDeferAcceptDrop IPReversePathFilter " +
- "TCPTimeWaitOverflow TCPReqQFullDoCookies TCPReqQFullDrop TCPRetransFail " +
- "TCPRcvCoalesce TCPOFOQueue TCPOFODrop TCPOFOMerge TCPChallengeACK " +
- "TCPSYNChallenge TCPFastOpenActive TCPFastOpenActiveFail " +
- "TCPFastOpenPassive TCPFastOpenPassiveFail TCPFastOpenListenOverflow " +
- "TCPFastOpenCookieReqd TCPSpuriousRtxHostQueues BusyPollRxPackets " +
- "TCPAutoCorking TCPFromZeroWindowAdv TCPToZeroWindowAdv " +
- "TCPWantZeroWindowAdv TCPSynRetrans TCPOrigDataSent TCPHystartTrainDetect " +
- "TCPHystartTrainCwnd TCPHystartDelayDetect TCPHystartDelayCwnd " +
- "TCPACKSkippedSynRecv TCPACKSkippedPAWS TCPACKSkippedSeq " +
- "TCPACKSkippedFinWait2 TCPACKSkippedTimeWait TCPACKSkippedChallenge " +
- "TCPWinProbe TCPKeepAlive TCPMTUPFail TCPMTUPSuccess\n")
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
deleted file mode 100644
index 9f2ef8200..000000000
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ /dev/null
@@ -1,257 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "sort"
- "strconv"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
-)
-
-const (
- selfName = "self"
- threadSelfName = "thread-self"
-)
-
-// InoGenerator generates unique inode numbers for a given filesystem.
-type InoGenerator interface {
- NextIno() uint64
-}
-
-// tasksInode represents the inode for /proc/ directory.
-//
-// +stateify savable
-type tasksInode struct {
- kernfs.InodeNotSymlink
- kernfs.InodeDirectoryNoNewChildren
- kernfs.InodeAttrs
- kernfs.OrderedChildren
- kernfs.AlwaysValid
-
- inoGen InoGenerator
- pidns *kernel.PIDNamespace
-
- // '/proc/self' and '/proc/thread-self' have custom directory offsets in
- // Linux. So handle them outside of OrderedChildren.
- selfSymlink *vfs.Dentry
- threadSelfSymlink *vfs.Dentry
-
- // cgroupControllers is a map of controller name to directory in the
- // cgroup hierarchy. These controllers are immutable and will be listed
- // in /proc/pid/cgroup if not nil.
- cgroupControllers map[string]string
-}
-
-var _ kernfs.Inode = (*tasksInode)(nil)
-
-func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) {
- root := auth.NewRootCredentials(pidns.UserNamespace())
- contents := map[string]*kernfs.Dentry{
- "cpuinfo": newDentry(root, inoGen.NextIno(), 0444, newStaticFileSetStat(cpuInfoData(k))),
- "filesystems": newDentry(root, inoGen.NextIno(), 0444, &filesystemsData{}),
- "loadavg": newDentry(root, inoGen.NextIno(), 0444, &loadavgData{}),
- "sys": newSysDir(root, inoGen, k),
- "meminfo": newDentry(root, inoGen.NextIno(), 0444, &meminfoData{}),
- "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/mounts"),
- "net": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/net"),
- "stat": newDentry(root, inoGen.NextIno(), 0444, &statData{}),
- "uptime": newDentry(root, inoGen.NextIno(), 0444, &uptimeData{}),
- "version": newDentry(root, inoGen.NextIno(), 0444, &versionData{}),
- }
-
- inode := &tasksInode{
- pidns: pidns,
- inoGen: inoGen,
- selfSymlink: newSelfSymlink(root, inoGen.NextIno(), pidns).VFSDentry(),
- threadSelfSymlink: newThreadSelfSymlink(root, inoGen.NextIno(), pidns).VFSDentry(),
- cgroupControllers: cgroupControllers,
- }
- inode.InodeAttrs.Init(root, inoGen.NextIno(), linux.ModeDirectory|0555)
-
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
-
- inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- links := inode.OrderedChildren.Populate(dentry, contents)
- inode.IncLinks(links)
-
- return inode, dentry
-}
-
-// Lookup implements kernfs.inodeDynamicLookup.
-func (i *tasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, error) {
- // Try to lookup a corresponding task.
- tid, err := strconv.ParseUint(name, 10, 64)
- if err != nil {
- // If it failed to parse, check if it's one of the special handled files.
- switch name {
- case selfName:
- return i.selfSymlink, nil
- case threadSelfName:
- return i.threadSelfSymlink, nil
- }
- return nil, syserror.ENOENT
- }
-
- task := i.pidns.TaskWithID(kernel.ThreadID(tid))
- if task == nil {
- return nil, syserror.ENOENT
- }
-
- taskDentry := newTaskInode(i.inoGen, task, i.pidns, true, i.cgroupControllers)
- return taskDentry.VFSDentry(), nil
-}
-
-// IterDirents implements kernfs.inodeDynamicLookup.
-func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) {
- // fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256
- const FIRST_PROCESS_ENTRY = 256
-
- // Use maxTaskID to shortcut searches that will result in 0 entries.
- const maxTaskID = kernel.TasksLimit + 1
- if offset >= maxTaskID {
- return offset, nil
- }
-
- // According to Linux (fs/proc/base.c:proc_pid_readdir()), process directories
- // start at offset FIRST_PROCESS_ENTRY with '/proc/self', followed by
- // '/proc/thread-self' and then '/proc/[pid]'.
- if offset < FIRST_PROCESS_ENTRY {
- offset = FIRST_PROCESS_ENTRY
- }
-
- if offset == FIRST_PROCESS_ENTRY {
- dirent := vfs.Dirent{
- Name: selfName,
- Type: linux.DT_LNK,
- Ino: i.inoGen.NextIno(),
- NextOff: offset + 1,
- }
- if err := cb.Handle(dirent); err != nil {
- return offset, err
- }
- offset++
- }
- if offset == FIRST_PROCESS_ENTRY+1 {
- dirent := vfs.Dirent{
- Name: threadSelfName,
- Type: linux.DT_LNK,
- Ino: i.inoGen.NextIno(),
- NextOff: offset + 1,
- }
- if err := cb.Handle(dirent); err != nil {
- return offset, err
- }
- offset++
- }
-
- // Collect all tasks that TGIDs are greater than the offset specified. Per
- // Linux we only include in directory listings if it's the leader. But for
- // whatever crazy reason, you can still walk to the given node.
- var tids []int
- startTid := offset - FIRST_PROCESS_ENTRY - 2
- for _, tg := range i.pidns.ThreadGroups() {
- tid := i.pidns.IDOfThreadGroup(tg)
- if int64(tid) < startTid {
- continue
- }
- if leader := tg.Leader(); leader != nil {
- tids = append(tids, int(tid))
- }
- }
-
- if len(tids) == 0 {
- return offset, nil
- }
-
- sort.Ints(tids)
- for _, tid := range tids {
- dirent := vfs.Dirent{
- Name: strconv.FormatUint(uint64(tid), 10),
- Type: linux.DT_DIR,
- Ino: i.inoGen.NextIno(),
- NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1,
- }
- if err := cb.Handle(dirent); err != nil {
- return offset, err
- }
- offset++
- }
- return maxTaskID, nil
-}
-
-// Open implements kernfs.Inode.
-func (i *tasksInode) Open(rp *vfs.ResolvingPath, vfsd *vfs.Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
- fd := &kernfs.GenericDirectoryFD{}
- fd.Init(rp.Mount(), vfsd, &i.OrderedChildren, &opts)
- return fd.VFSFileDescription(), nil
-}
-
-func (i *tasksInode) Stat(vsfs *vfs.Filesystem, opts vfs.StatOptions) (linux.Statx, error) {
- stat, err := i.InodeAttrs.Stat(vsfs, opts)
- if err != nil {
- return linux.Statx{}, err
- }
-
- if opts.Mask&linux.STATX_NLINK != 0 {
- // Add dynamic children to link count.
- for _, tg := range i.pidns.ThreadGroups() {
- if leader := tg.Leader(); leader != nil {
- stat.Nlink++
- }
- }
- }
-
- return stat, nil
-}
-
-// staticFileSetStat implements a special static file that allows inode
-// attributes to be set. This is to support /proc files that are readonly, but
-// allow attributes to be set.
-type staticFileSetStat struct {
- dynamicBytesFileSetAttr
- vfs.StaticData
-}
-
-var _ dynamicInode = (*staticFileSetStat)(nil)
-
-func newStaticFileSetStat(data string) *staticFileSetStat {
- return &staticFileSetStat{StaticData: vfs.StaticData{Data: data}}
-}
-
-func cpuInfoData(k *kernel.Kernel) string {
- features := k.FeatureSet()
- if features == nil {
- // Kernel is always initialized with a FeatureSet.
- panic("cpuinfo read with nil FeatureSet")
- }
- var buf bytes.Buffer
- for i, max := uint(0), k.ApplicationCores(); i < max; i++ {
- features.WriteCPUInfoTo(i, &buf)
- }
- return buf.String()
-}
-
-func shmData(v uint64) dynamicInode {
- return newStaticFile(strconv.FormatUint(v, 10))
-}
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
deleted file mode 100644
index 882c1981e..000000000
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ /dev/null
@@ -1,370 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
- "strconv"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/kernel/time"
- "gvisor.dev/gvisor/pkg/sentry/usage"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-type selfSymlink struct {
- kernfs.InodeAttrs
- kernfs.InodeNoopRefCount
- kernfs.InodeSymlink
-
- pidns *kernel.PIDNamespace
-}
-
-var _ kernfs.Inode = (*selfSymlink)(nil)
-
-func newSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
- inode := &selfSymlink{pidns: pidns}
- inode.Init(creds, ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
-}
-
-func (s *selfSymlink) Readlink(ctx context.Context) (string, error) {
- t := kernel.TaskFromContext(ctx)
- if t == nil {
- // Who is reading this link?
- return "", syserror.EINVAL
- }
- tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
- if tgid == 0 {
- return "", syserror.ENOENT
- }
- return strconv.FormatUint(uint64(tgid), 10), nil
-}
-
-// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
-func (*selfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
-}
-
-type threadSelfSymlink struct {
- kernfs.InodeAttrs
- kernfs.InodeNoopRefCount
- kernfs.InodeSymlink
-
- pidns *kernel.PIDNamespace
-}
-
-var _ kernfs.Inode = (*threadSelfSymlink)(nil)
-
-func newThreadSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
- inode := &threadSelfSymlink{pidns: pidns}
- inode.Init(creds, ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
-}
-
-func (s *threadSelfSymlink) Readlink(ctx context.Context) (string, error) {
- t := kernel.TaskFromContext(ctx)
- if t == nil {
- // Who is reading this link?
- return "", syserror.EINVAL
- }
- tgid := s.pidns.IDOfThreadGroup(t.ThreadGroup())
- tid := s.pidns.IDOfTask(t)
- if tid == 0 || tgid == 0 {
- return "", syserror.ENOENT
- }
- return fmt.Sprintf("%d/task/%d", tgid, tid), nil
-}
-
-// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
-func (*threadSelfSymlink) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
- return syserror.EPERM
-}
-
-// dynamicBytesFileSetAttr implements a special file that allows inode
-// attributes to be set. This is to support /proc files that are readonly, but
-// allow attributes to be set.
-type dynamicBytesFileSetAttr struct {
- kernfs.DynamicBytesFile
-}
-
-// SetStat implements Inode.SetStat.
-func (d *dynamicBytesFileSetAttr) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
- return d.DynamicBytesFile.InodeAttrs.SetStat(ctx, fs, creds, opts)
-}
-
-// cpuStats contains the breakdown of CPU time for /proc/stat.
-type cpuStats struct {
- // user is time spent in userspace tasks with non-positive niceness.
- user uint64
-
- // nice is time spent in userspace tasks with positive niceness.
- nice uint64
-
- // system is time spent in non-interrupt kernel context.
- system uint64
-
- // idle is time spent idle.
- idle uint64
-
- // ioWait is time spent waiting for IO.
- ioWait uint64
-
- // irq is time spent in interrupt context.
- irq uint64
-
- // softirq is time spent in software interrupt context.
- softirq uint64
-
- // steal is involuntary wait time.
- steal uint64
-
- // guest is time spent in guests with non-positive niceness.
- guest uint64
-
- // guestNice is time spent in guests with positive niceness.
- guestNice uint64
-}
-
-// String implements fmt.Stringer.
-func (c cpuStats) String() string {
- return fmt.Sprintf("%d %d %d %d %d %d %d %d %d %d", c.user, c.nice, c.system, c.idle, c.ioWait, c.irq, c.softirq, c.steal, c.guest, c.guestNice)
-}
-
-// statData implements vfs.DynamicBytesSource for /proc/stat.
-//
-// +stateify savable
-type statData struct {
- dynamicBytesFileSetAttr
-}
-
-var _ dynamicInode = (*statData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (*statData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- // TODO(b/37226836): We currently export only zero CPU stats. We could
- // at least provide some aggregate stats.
- var cpu cpuStats
- fmt.Fprintf(buf, "cpu %s\n", cpu)
-
- k := kernel.KernelFromContext(ctx)
- for c, max := uint(0), k.ApplicationCores(); c < max; c++ {
- fmt.Fprintf(buf, "cpu%d %s\n", c, cpu)
- }
-
- // The total number of interrupts is dependent on the CPUs and PCI
- // devices on the system. See arch_probe_nr_irqs.
- //
- // Since we don't report real interrupt stats, just choose an arbitrary
- // value from a representative VM.
- const numInterrupts = 256
-
- // The Kernel doesn't handle real interrupts, so report all zeroes.
- // TODO(b/37226836): We could count page faults as #PF.
- fmt.Fprintf(buf, "intr 0") // total
- for i := 0; i < numInterrupts; i++ {
- fmt.Fprintf(buf, " 0")
- }
- fmt.Fprintf(buf, "\n")
-
- // Total number of context switches.
- // TODO(b/37226836): Count this.
- fmt.Fprintf(buf, "ctxt 0\n")
-
- // CLOCK_REALTIME timestamp from boot, in seconds.
- fmt.Fprintf(buf, "btime %d\n", k.Timekeeper().BootTime().Seconds())
-
- // Total number of clones.
- // TODO(b/37226836): Count this.
- fmt.Fprintf(buf, "processes 0\n")
-
- // Number of runnable tasks.
- // TODO(b/37226836): Count this.
- fmt.Fprintf(buf, "procs_running 0\n")
-
- // Number of tasks waiting on IO.
- // TODO(b/37226836): Count this.
- fmt.Fprintf(buf, "procs_blocked 0\n")
-
- // Number of each softirq handled.
- fmt.Fprintf(buf, "softirq 0") // total
- for i := 0; i < linux.NumSoftIRQ; i++ {
- fmt.Fprintf(buf, " 0")
- }
- fmt.Fprintf(buf, "\n")
- return nil
-}
-
-// loadavgData backs /proc/loadavg.
-//
-// +stateify savable
-type loadavgData struct {
- dynamicBytesFileSetAttr
-}
-
-var _ dynamicInode = (*loadavgData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (*loadavgData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- // TODO(b/62345059): Include real data in fields.
- // Column 1-3: CPU and IO utilization of the last 1, 5, and 10 minute periods.
- // Column 4-5: currently running processes and the total number of processes.
- // Column 6: the last process ID used.
- fmt.Fprintf(buf, "%.2f %.2f %.2f %d/%d %d\n", 0.00, 0.00, 0.00, 0, 0, 0)
- return nil
-}
-
-// meminfoData implements vfs.DynamicBytesSource for /proc/meminfo.
-//
-// +stateify savable
-type meminfoData struct {
- dynamicBytesFileSetAttr
-}
-
-var _ dynamicInode = (*meminfoData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (*meminfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- k := kernel.KernelFromContext(ctx)
- mf := k.MemoryFile()
- mf.UpdateUsage()
- snapshot, totalUsage := usage.MemoryAccounting.Copy()
- totalSize := usage.TotalMemory(mf.TotalSize(), totalUsage)
- anon := snapshot.Anonymous + snapshot.Tmpfs
- file := snapshot.PageCache + snapshot.Mapped
- // We don't actually have active/inactive LRUs, so just make up numbers.
- activeFile := (file / 2) &^ (usermem.PageSize - 1)
- inactiveFile := file - activeFile
-
- fmt.Fprintf(buf, "MemTotal: %8d kB\n", totalSize/1024)
- memFree := (totalSize - totalUsage) / 1024
- // We use MemFree as MemAvailable because we don't swap.
- // TODO(rahat): When reclaim is implemented the value of MemAvailable
- // should change.
- fmt.Fprintf(buf, "MemFree: %8d kB\n", memFree)
- fmt.Fprintf(buf, "MemAvailable: %8d kB\n", memFree)
- fmt.Fprintf(buf, "Buffers: 0 kB\n") // memory usage by block devices
- fmt.Fprintf(buf, "Cached: %8d kB\n", (file+snapshot.Tmpfs)/1024)
- // Emulate a system with no swap, which disables inactivation of anon pages.
- fmt.Fprintf(buf, "SwapCache: 0 kB\n")
- fmt.Fprintf(buf, "Active: %8d kB\n", (anon+activeFile)/1024)
- fmt.Fprintf(buf, "Inactive: %8d kB\n", inactiveFile/1024)
- fmt.Fprintf(buf, "Active(anon): %8d kB\n", anon/1024)
- fmt.Fprintf(buf, "Inactive(anon): 0 kB\n")
- fmt.Fprintf(buf, "Active(file): %8d kB\n", activeFile/1024)
- fmt.Fprintf(buf, "Inactive(file): %8d kB\n", inactiveFile/1024)
- fmt.Fprintf(buf, "Unevictable: 0 kB\n") // TODO(b/31823263)
- fmt.Fprintf(buf, "Mlocked: 0 kB\n") // TODO(b/31823263)
- fmt.Fprintf(buf, "SwapTotal: 0 kB\n")
- fmt.Fprintf(buf, "SwapFree: 0 kB\n")
- fmt.Fprintf(buf, "Dirty: 0 kB\n")
- fmt.Fprintf(buf, "Writeback: 0 kB\n")
- fmt.Fprintf(buf, "AnonPages: %8d kB\n", anon/1024)
- fmt.Fprintf(buf, "Mapped: %8d kB\n", file/1024) // doesn't count mapped tmpfs, which we don't know
- fmt.Fprintf(buf, "Shmem: %8d kB\n", snapshot.Tmpfs/1024)
- return nil
-}
-
-// uptimeData implements vfs.DynamicBytesSource for /proc/uptime.
-//
-// +stateify savable
-type uptimeData struct {
- dynamicBytesFileSetAttr
-}
-
-var _ dynamicInode = (*uptimeData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (*uptimeData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- k := kernel.KernelFromContext(ctx)
- now := time.NowFromContext(ctx)
-
- // Pretend that we've spent zero time sleeping (second number).
- fmt.Fprintf(buf, "%.2f 0.00\n", now.Sub(k.Timekeeper().BootTime()).Seconds())
- return nil
-}
-
-// versionData implements vfs.DynamicBytesSource for /proc/version.
-//
-// +stateify savable
-type versionData struct {
- dynamicBytesFileSetAttr
-}
-
-var _ dynamicInode = (*versionData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (*versionData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- k := kernel.KernelFromContext(ctx)
- init := k.GlobalInit()
- if init == nil {
- // Attempted to read before the init Task is created. This can
- // only occur during startup, which should never need to read
- // this file.
- panic("Attempted to read version before initial Task is available")
- }
-
- // /proc/version takes the form:
- //
- // "SYSNAME version RELEASE (COMPILE_USER@COMPILE_HOST)
- // (COMPILER_VERSION) VERSION"
- //
- // where:
- // - SYSNAME, RELEASE, and VERSION are the same as returned by
- // sys_utsname
- // - COMPILE_USER is the user that build the kernel
- // - COMPILE_HOST is the hostname of the machine on which the kernel
- // was built
- // - COMPILER_VERSION is the version reported by the building compiler
- //
- // Since we don't really want to expose build information to
- // applications, those fields are omitted.
- //
- // FIXME(mpratt): Using Version from the init task SyscallTable
- // disregards the different version a task may have (e.g., in a uts
- // namespace).
- ver := init.Leader().SyscallTable().Version
- fmt.Fprintf(buf, "%s version %s %s\n", ver.Sysname, ver.Release, ver.Version)
- return nil
-}
-
-// filesystemsData backs /proc/filesystems.
-//
-// +stateify savable
-type filesystemsData struct {
- kernfs.DynamicBytesFile
-}
-
-var _ dynamicInode = (*filesystemsData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- k := kernel.KernelFromContext(ctx)
- k.VFS().GenerateProcFilesystems(buf)
- return nil
-}
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
deleted file mode 100644
index 3d5dc463c..000000000
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "fmt"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
- "gvisor.dev/gvisor/pkg/sentry/inet"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-// newSysDir returns the dentry corresponding to /proc/sys directory.
-func newSysDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry {
- return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "kernel": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "hostname": newDentry(root, inoGen.NextIno(), 0444, &hostnameData{}),
- "shmall": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMALL)),
- "shmmax": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMAX)),
- "shmmni": newDentry(root, inoGen.NextIno(), 0444, shmData(linux.SHMMNI)),
- }),
- "vm": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "mmap_min_addr": newDentry(root, inoGen.NextIno(), 0444, &mmapMinAddrData{}),
- "overcommit_memory": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0\n")),
- }),
- "net": newSysNetDir(root, inoGen, k),
- })
-}
-
-// newSysNetDir returns the dentry corresponding to /proc/sys/net directory.
-func newSysNetDir(root *auth.Credentials, inoGen InoGenerator, k *kernel.Kernel) *kernfs.Dentry {
- var contents map[string]*kernfs.Dentry
-
- // TODO(gvisor.dev/issue/1833): Support for using the network stack in the
- // network namespace of the calling process.
- if stack := k.RootNetworkNamespace().Stack(); stack != nil {
- contents = map[string]*kernfs.Dentry{
- "ipv4": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "tcp_sack": newDentry(root, inoGen.NextIno(), 0644, &tcpSackData{stack: stack}),
-
- // The following files are simple stubs until they are implemented in
- // netstack, most of these files are configuration related. We use the
- // value closest to the actual netstack behavior or any empty file, all
- // of these files will have mode 0444 (read-only for all users).
- "ip_local_port_range": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("16000 65535")),
- "ip_local_reserved_ports": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
- "ipfrag_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("30")),
- "ip_nonlocal_bind": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "ip_no_pmtu_disc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
-
- // tcp_allowed_congestion_control tell the user what they are able to
- // do as an unprivledged process so we leave it empty.
- "tcp_allowed_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
- "tcp_available_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")),
- "tcp_congestion_control": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("reno")),
-
- // Many of the following stub files are features netstack doesn't
- // support. The unsupported features return "0" to indicate they are
- // disabled.
- "tcp_base_mss": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1280")),
- "tcp_dsack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_early_retrans": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_fack": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_fastopen": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_fastopen_key": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("")),
- "tcp_invalid_ratelimit": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_intvl": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_probes": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_time": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("7200")),
- "tcp_mtu_probing": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_no_metrics_save": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
- "tcp_probe_interval": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_probe_threshold": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "tcp_retries1": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")),
- "tcp_retries2": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("15")),
- "tcp_rfc1337": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
- "tcp_slow_start_after_idle": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
- "tcp_synack_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")),
- "tcp_syn_retries": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("3")),
- "tcp_timestamps": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("1")),
- }),
- "core": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "default_qdisc": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("pfifo_fast")),
- "message_burst": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("10")),
- "message_cost": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("5")),
- "optmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("0")),
- "rmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
- "rmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
- "somaxconn": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("128")),
- "wmem_default": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
- "wmem_max": newDentry(root, inoGen.NextIno(), 0444, newStaticFile("212992")),
- }),
- }
- }
-
- return kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, map[string]*kernfs.Dentry{
- "net": kernfs.NewStaticDir(root, inoGen.NextIno(), 0555, contents),
- })
-}
-
-// mmapMinAddrData implements vfs.DynamicBytesSource for
-// /proc/sys/vm/mmap_min_addr.
-//
-// +stateify savable
-type mmapMinAddrData struct {
- kernfs.DynamicBytesFile
-
- k *kernel.Kernel
-}
-
-var _ dynamicInode = (*mmapMinAddrData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress())
- return nil
-}
-
-// hostnameData implements vfs.DynamicBytesSource for /proc/sys/kernel/hostname.
-//
-// +stateify savable
-type hostnameData struct {
- kernfs.DynamicBytesFile
-}
-
-var _ dynamicInode = (*hostnameData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.Generate.
-func (*hostnameData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- utsns := kernel.UTSNamespaceFromContext(ctx)
- buf.WriteString(utsns.HostName())
- buf.WriteString("\n")
- return nil
-}
-
-// tcpSackData implements vfs.WritableDynamicBytesSource for
-// /proc/sys/net/tcp_sack.
-//
-// +stateify savable
-type tcpSackData struct {
- kernfs.DynamicBytesFile
-
- stack inet.Stack `state:"wait"`
- enabled *bool
-}
-
-var _ vfs.WritableDynamicBytesSource = (*tcpSackData)(nil)
-
-// Generate implements vfs.DynamicBytesSource.
-func (d *tcpSackData) Generate(ctx context.Context, buf *bytes.Buffer) error {
- if d.enabled == nil {
- sack, err := d.stack.TCPSACKEnabled()
- if err != nil {
- return err
- }
- d.enabled = &sack
- }
-
- val := "0\n"
- if *d.enabled {
- // Technically, this is not quite compatible with Linux. Linux stores these
- // as an integer, so if you write "2" into tcp_sack, you should get 2 back.
- // Tough luck.
- val = "1\n"
- }
- buf.WriteString(val)
- return nil
-}
-
-func (d *tcpSackData) Write(ctx context.Context, src usermem.IOSequence, offset int64) (int64, error) {
- if offset != 0 {
- // No need to handle partial writes thus far.
- return 0, syserror.EINVAL
- }
- if src.NumBytes() == 0 {
- return 0, nil
- }
-
- // Limit the amount of memory allocated.
- src = src.TakeFirst(usermem.PageSize - 1)
-
- var v int32
- n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts)
- if err != nil {
- return n, err
- }
- if d.enabled == nil {
- d.enabled = new(bool)
- }
- *d.enabled = v != 0
- return n, d.stack.SetTCPSACKEnabled(*d.enabled)
-}
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys_test.go b/pkg/sentry/fsimpl/proc/tasks_sys_test.go
deleted file mode 100644
index be54897bb..000000000
--- a/pkg/sentry/fsimpl/proc/tasks_sys_test.go
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "bytes"
- "reflect"
- "testing"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/sentry/contexttest"
- "gvisor.dev/gvisor/pkg/sentry/inet"
-)
-
-func newIPv6TestStack() *inet.TestStack {
- s := inet.NewTestStack()
- s.SupportsIPv6Flag = true
- return s
-}
-
-func TestIfinet6NoAddresses(t *testing.T) {
- n := &ifinet6{stack: newIPv6TestStack()}
- var buf bytes.Buffer
- n.Generate(contexttest.Context(t), &buf)
- if buf.Len() > 0 {
- t.Errorf("n.Generate() generated = %v, want = %v", buf.Bytes(), []byte{})
- }
-}
-
-func TestIfinet6(t *testing.T) {
- s := newIPv6TestStack()
- s.InterfacesMap[1] = inet.Interface{Name: "eth0"}
- s.InterfaceAddrsMap[1] = []inet.InterfaceAddr{
- {
- Family: linux.AF_INET6,
- PrefixLen: 128,
- Addr: []byte("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"),
- },
- }
- s.InterfacesMap[2] = inet.Interface{Name: "eth1"}
- s.InterfaceAddrsMap[2] = []inet.InterfaceAddr{
- {
- Family: linux.AF_INET6,
- PrefixLen: 128,
- Addr: []byte("\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"),
- },
- }
- want := map[string]struct{}{
- "000102030405060708090a0b0c0d0e0f 01 80 00 00 eth0\n": {},
- "101112131415161718191a1b1c1d1e1f 02 80 00 00 eth1\n": {},
- }
-
- n := &ifinet6{stack: s}
- contents := n.contents()
- if len(contents) != len(want) {
- t.Errorf("Got len(n.contents()) = %d, want = %d", len(contents), len(want))
- }
- got := map[string]struct{}{}
- for _, l := range contents {
- got[l] = struct{}{}
- }
-
- if !reflect.DeepEqual(got, want) {
- t.Errorf("Got n.contents() = %v, want = %v", got, want)
- }
-}
diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go
deleted file mode 100644
index d0f97c137..000000000
--- a/pkg/sentry/fsimpl/proc/tasks_test.go
+++ /dev/null
@@ -1,505 +0,0 @@
-// Copyright 2019 The gVisor Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package proc
-
-import (
- "fmt"
- "math"
- "path"
- "strconv"
- "testing"
-
- "gvisor.dev/gvisor/pkg/abi/linux"
- "gvisor.dev/gvisor/pkg/context"
- "gvisor.dev/gvisor/pkg/fspath"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/testutil"
- "gvisor.dev/gvisor/pkg/sentry/fsimpl/tmpfs"
- "gvisor.dev/gvisor/pkg/sentry/kernel"
- "gvisor.dev/gvisor/pkg/sentry/kernel/auth"
- "gvisor.dev/gvisor/pkg/sentry/vfs"
- "gvisor.dev/gvisor/pkg/syserror"
- "gvisor.dev/gvisor/pkg/usermem"
-)
-
-var (
- // Next offset 256 by convention. Adds 1 for the next offset.
- selfLink = vfs.Dirent{Type: linux.DT_LNK, NextOff: 256 + 0 + 1}
- threadSelfLink = vfs.Dirent{Type: linux.DT_LNK, NextOff: 256 + 1 + 1}
-
- // /proc/[pid] next offset starts at 256+2 (files above), then adds the
- // PID, and adds 1 for the next offset.
- proc1 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 1 + 1}
- proc2 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 2 + 1}
- proc3 = vfs.Dirent{Type: linux.DT_DIR, NextOff: 258 + 3 + 1}
-)
-
-var (
- tasksStaticFiles = map[string]testutil.DirentType{
- "cpuinfo": linux.DT_REG,
- "filesystems": linux.DT_REG,
- "loadavg": linux.DT_REG,
- "meminfo": linux.DT_REG,
- "mounts": linux.DT_LNK,
- "net": linux.DT_LNK,
- "self": linux.DT_LNK,
- "stat": linux.DT_REG,
- "sys": linux.DT_DIR,
- "thread-self": linux.DT_LNK,
- "uptime": linux.DT_REG,
- "version": linux.DT_REG,
- }
- tasksStaticFilesNextOffs = map[string]int64{
- "self": selfLink.NextOff,
- "thread-self": threadSelfLink.NextOff,
- }
- taskStaticFiles = map[string]testutil.DirentType{
- "auxv": linux.DT_REG,
- "cgroup": linux.DT_REG,
- "cmdline": linux.DT_REG,
- "comm": linux.DT_REG,
- "environ": linux.DT_REG,
- "exe": linux.DT_LNK,
- "fd": linux.DT_DIR,
- "fdinfo": linux.DT_DIR,
- "gid_map": linux.DT_REG,
- "io": linux.DT_REG,
- "maps": linux.DT_REG,
- "mountinfo": linux.DT_REG,
- "mounts": linux.DT_REG,
- "net": linux.DT_DIR,
- "ns": linux.DT_DIR,
- "oom_score": linux.DT_REG,
- "oom_score_adj": linux.DT_REG,
- "smaps": linux.DT_REG,
- "stat": linux.DT_REG,
- "statm": linux.DT_REG,
- "status": linux.DT_REG,
- "task": linux.DT_DIR,
- "uid_map": linux.DT_REG,
- }
-)
-
-func setup(t *testing.T) *testutil.System {
- k, err := testutil.Boot()
- if err != nil {
- t.Fatalf("Error creating kernel: %v", err)
- }
-
- ctx := k.SupervisorContext()
- creds := auth.CredentialsFromContext(ctx)
-
- k.VFS().MustRegisterFilesystemType(Name, &FilesystemType{}, &vfs.RegisterFilesystemTypeOptions{
- AllowUserMount: true,
- })
-
- mntns, err := k.VFS().NewMountNamespace(ctx, creds, "", tmpfs.Name, &vfs.GetFilesystemOptions{})
- if err != nil {
- t.Fatalf("NewMountNamespace(): %v", err)
- }
- pop := &vfs.PathOperation{
- Root: mntns.Root(),
- Start: mntns.Root(),
- Path: fspath.Parse("/proc"),
- }
- if err := k.VFS().MkdirAt(ctx, creds, pop, &vfs.MkdirOptions{Mode: 0777}); err != nil {
- t.Fatalf("MkDir(/proc): %v", err)
- }
-
- pop = &vfs.PathOperation{
- Root: mntns.Root(),
- Start: mntns.Root(),
- Path: fspath.Parse("/proc"),
- }
- mntOpts := &vfs.MountOptions{
- GetFilesystemOptions: vfs.GetFilesystemOptions{
- InternalData: &InternalData{
- Cgroups: map[string]string{
- "cpuset": "/foo/cpuset",
- "memory": "/foo/memory",
- },
- },
- },
- }
- if err := k.VFS().MountAt(ctx, creds, "", pop, Name, mntOpts); err != nil {
- t.Fatalf("MountAt(/proc): %v", err)
- }
- return testutil.NewSystem(ctx, t, k.VFS(), mntns)
-}
-
-func TestTasksEmpty(t *testing.T) {
- s := setup(t)
- defer s.Destroy()
-
- collector := s.ListDirents(s.PathOpAtRoot("/proc"))
- s.AssertAllDirentTypes(collector, tasksStaticFiles)
- s.AssertDirentOffsets(collector, tasksStaticFilesNextOffs)
-}
-
-func TestTasks(t *testing.T) {
- s := setup(t)
- defer s.Destroy()
-
- expectedDirents := make(map[string]testutil.DirentType)
- for n, d := range tasksStaticFiles {
- expectedDirents[n] = d
- }
-
- k := kernel.KernelFromContext(s.Ctx)
- var tasks []*kernel.Task
- for i := 0; i < 5; i++ {
- tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root)
- if err != nil {
- t.Fatalf("CreateTask(): %v", err)
- }
- tasks = append(tasks, task)
- expectedDirents[fmt.Sprintf("%d", i+1)] = linux.DT_DIR
- }
-
- collector := s.ListDirents(s.PathOpAtRoot("/proc"))
- s.AssertAllDirentTypes(collector, expectedDirents)
- s.AssertDirentOffsets(collector, tasksStaticFilesNextOffs)
-
- lastPid := 0
- dirents := collector.OrderedDirents()
- doneSkippingNonTaskDirs := false
- for _, d := range dirents {
- pid, err := strconv.Atoi(d.Name)
- if err != nil {
- if !doneSkippingNonTaskDirs {
- // We haven't gotten to the task dirs yet.
- continue
- }
- t.Fatalf("Invalid process directory %q", d.Name)
- }
- doneSkippingNonTaskDirs = true
- if lastPid > pid {
- t.Errorf("pids not in order: %v", dirents)
- }
- found := false
- for _, t := range tasks {
- if k.TaskSet().Root.IDOfTask(t) == kernel.ThreadID(pid) {
- found = true
- }
- }
- if !found {
- t.Errorf("Additional task ID %d listed: %v", pid, tasks)
- }
- // Next offset starts at 256+2 ('self' and 'thread-self'), then adds the
- // PID, and adds 1 for the next offset.
- if want := int64(256 + 2 + pid + 1); d.NextOff != want {
- t.Errorf("Wrong dirent offset want: %d got: %d: %+v", want, d.NextOff, d)
- }
- }
- if !doneSkippingNonTaskDirs {
- t.Fatalf("Never found any process directories.")
- }
-
- // Test lookup.
- for _, path := range []string{"/proc/1", "/proc/2"} {
- fd, err := s.VFS.OpenAt(
- s.Ctx,
- s.Creds,
- s.PathOpAtRoot(path),
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt(%q) failed: %v", path, err)
- }
- defer fd.DecRef()
- buf := make([]byte, 1)
- bufIOSeq := usermem.BytesIOSequence(buf)
- if _, err := fd.Read(s.Ctx, bufIOSeq, vfs.ReadOptions{}); err != syserror.EISDIR {
- t.Errorf("wrong error reading directory: %v", err)
- }
- }
-
- if _, err := s.VFS.OpenAt(
- s.Ctx,
- s.Creds,
- s.PathOpAtRoot("/proc/9999"),
- &vfs.OpenOptions{},
- ); err != syserror.ENOENT {
- t.Fatalf("wrong error from vfsfs.OpenAt(/proc/9999): %v", err)
- }
-}
-
-func TestTasksOffset(t *testing.T) {
- s := setup(t)
- defer s.Destroy()
-
- k := kernel.KernelFromContext(s.Ctx)
- for i := 0; i < 3; i++ {
- tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- if _, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root); err != nil {
- t.Fatalf("CreateTask(): %v", err)
- }
- }
-
- for _, tc := range []struct {
- name string
- offset int64
- wants map[string]vfs.Dirent
- }{
- {
- name: "small offset",
- offset: 100,
- wants: map[string]vfs.Dirent{
- "self": selfLink,
- "thread-self": threadSelfLink,
- "1": proc1,
- "2": proc2,
- "3": proc3,
- },
- },
- {
- name: "offset at start",
- offset: 256,
- wants: map[string]vfs.Dirent{
- "self": selfLink,
- "thread-self": threadSelfLink,
- "1": proc1,
- "2": proc2,
- "3": proc3,
- },
- },
- {
- name: "skip /proc/self",
- offset: 257,
- wants: map[string]vfs.Dirent{
- "thread-self": threadSelfLink,
- "1": proc1,
- "2": proc2,
- "3": proc3,
- },
- },
- {
- name: "skip symlinks",
- offset: 258,
- wants: map[string]vfs.Dirent{
- "1": proc1,
- "2": proc2,
- "3": proc3,
- },
- },
- {
- name: "skip first process",
- offset: 260,
- wants: map[string]vfs.Dirent{
- "2": proc2,
- "3": proc3,
- },
- },
- {
- name: "last process",
- offset: 261,
- wants: map[string]vfs.Dirent{
- "3": proc3,
- },
- },
- {
- name: "after last",
- offset: 262,
- wants: nil,
- },
- {
- name: "TaskLimit+1",
- offset: kernel.TasksLimit + 1,
- wants: nil,
- },
- {
- name: "max",
- offset: math.MaxInt64,
- wants: nil,
- },
- } {
- t.Run(tc.name, func(t *testing.T) {
- s := s.WithSubtest(t)
- fd, err := s.VFS.OpenAt(
- s.Ctx,
- s.Creds,
- s.PathOpAtRoot("/proc"),
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt(/) failed: %v", err)
- }
- defer fd.DecRef()
- if _, err := fd.Seek(s.Ctx, tc.offset, linux.SEEK_SET); err != nil {
- t.Fatalf("Seek(%d, SEEK_SET): %v", tc.offset, err)
- }
-
- var collector testutil.DirentCollector
- if err := fd.IterDirents(s.Ctx, &collector); err != nil {
- t.Fatalf("IterDirent(): %v", err)
- }
-
- expectedTypes := make(map[string]testutil.DirentType)
- expectedOffsets := make(map[string]int64)
- for name, want := range tc.wants {
- expectedTypes[name] = want.Type
- if want.NextOff != 0 {
- expectedOffsets[name] = want.NextOff
- }
- }
-
- collector.SkipDotsChecks(true) // We seek()ed past the dots.
- s.AssertAllDirentTypes(&collector, expectedTypes)
- s.AssertDirentOffsets(&collector, expectedOffsets)
- })
- }
-}
-
-func TestTask(t *testing.T) {
- s := setup(t)
- defer s.Destroy()
-
- k := kernel.KernelFromContext(s.Ctx)
- tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- _, err := testutil.CreateTask(s.Ctx, "name", tc, s.MntNs, s.Root, s.Root)
- if err != nil {
- t.Fatalf("CreateTask(): %v", err)
- }
-
- collector := s.ListDirents(s.PathOpAtRoot("/proc/1"))
- s.AssertAllDirentTypes(collector, taskStaticFiles)
-}
-
-func TestProcSelf(t *testing.T) {
- s := setup(t)
- defer s.Destroy()
-
- k := kernel.KernelFromContext(s.Ctx)
- tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- task, err := testutil.CreateTask(s.Ctx, "name", tc, s.MntNs, s.Root, s.Root)
- if err != nil {
- t.Fatalf("CreateTask(): %v", err)
- }
-
- collector := s.WithTemporaryContext(task).ListDirents(&vfs.PathOperation{
- Root: s.Root,
- Start: s.Root,
- Path: fspath.Parse("/proc/self/"),
- FollowFinalSymlink: true,
- })
- s.AssertAllDirentTypes(collector, taskStaticFiles)
-}
-
-func iterateDir(ctx context.Context, t *testing.T, s *testutil.System, fd *vfs.FileDescription) {
- t.Logf("Iterating: %s", fd.MappedName(ctx))
-
- var collector testutil.DirentCollector
- if err := fd.IterDirents(ctx, &collector); err != nil {
- t.Fatalf("IterDirents(): %v", err)
- }
- if err := collector.Contains(".", linux.DT_DIR); err != nil {
- t.Error(err.Error())
- }
- if err := collector.Contains("..", linux.DT_DIR); err != nil {
- t.Error(err.Error())
- }
-
- for _, d := range collector.Dirents() {
- if d.Name == "." || d.Name == ".." {
- continue
- }
- childPath := path.Join(fd.MappedName(ctx), d.Name)
- if d.Type == linux.DT_LNK {
- link, err := s.VFS.ReadlinkAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse(childPath)},
- )
- if err != nil {
- t.Errorf("vfsfs.ReadlinkAt(%v) failed: %v", childPath, err)
- } else {
- t.Logf("Skipping symlink: /proc%s => %s", childPath, link)
- }
- continue
- }
-
- t.Logf("Opening: /proc%s", childPath)
- child, err := s.VFS.OpenAt(
- ctx,
- auth.CredentialsFromContext(ctx),
- &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse(childPath)},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Errorf("vfsfs.OpenAt(%v) failed: %v", childPath, err)
- continue
- }
- defer child.DecRef()
- stat, err := child.Stat(ctx, vfs.StatOptions{})
- if err != nil {
- t.Errorf("Stat(%v) failed: %v", childPath, err)
- }
- if got := linux.FileMode(stat.Mode).DirentType(); got != d.Type {
- t.Errorf("wrong file mode, stat: %v, dirent: %v", got, d.Type)
- }
- if d.Type == linux.DT_DIR {
- // Found another dir, let's do it again!
- iterateDir(ctx, t, s, child)
- }
- }
-}
-
-// TestTree iterates all directories and stats every file.
-func TestTree(t *testing.T) {
- s := setup(t)
- defer s.Destroy()
-
- k := kernel.KernelFromContext(s.Ctx)
-
- pop := &vfs.PathOperation{
- Root: s.Root,
- Start: s.Root,
- Path: fspath.Parse("test-file"),
- }
- opts := &vfs.OpenOptions{
- Flags: linux.O_RDONLY | linux.O_CREAT,
- Mode: 0777,
- }
- file, err := s.VFS.OpenAt(s.Ctx, s.Creds, pop, opts)
- if err != nil {
- t.Fatalf("failed to create test file: %v", err)
- }
- defer file.DecRef()
-
- var tasks []*kernel.Task
- for i := 0; i < 5; i++ {
- tc := k.NewThreadGroup(nil, k.RootPIDNamespace(), kernel.NewSignalHandlers(), linux.SIGCHLD, k.GlobalInit().Limits())
- task, err := testutil.CreateTask(s.Ctx, fmt.Sprintf("name-%d", i), tc, s.MntNs, s.Root, s.Root)
- if err != nil {
- t.Fatalf("CreateTask(): %v", err)
- }
- // Add file to populate /proc/[pid]/fd and fdinfo directories.
- task.FDTable().NewFDVFS2(task, 0, file, kernel.FDFlags{})
- tasks = append(tasks, task)
- }
-
- ctx := tasks[0]
- fd, err := s.VFS.OpenAt(
- ctx,
- auth.CredentialsFromContext(s.Ctx),
- &vfs.PathOperation{Root: s.Root, Start: s.Root, Path: fspath.Parse("/proc")},
- &vfs.OpenOptions{},
- )
- if err != nil {
- t.Fatalf("vfsfs.OpenAt(/proc) failed: %v", err)
- }
- iterateDir(ctx, t, s, fd)
- fd.DecRef()
-}