summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/proc/tasks.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry/fsimpl/proc/tasks.go')
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go140
1 files changed, 110 insertions, 30 deletions
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index 50b2a832f..51f634716 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -15,6 +15,7 @@
package proc
import (
+ "bytes"
"sort"
"strconv"
@@ -27,7 +28,10 @@ import (
"gvisor.dev/gvisor/pkg/syserror"
)
-const defaultPermission = 0444
+const (
+ selfName = "self"
+ threadSelfName = "thread-self"
+)
// InoGenerator generates unique inode numbers for a given filesystem.
type InoGenerator interface {
@@ -45,29 +49,41 @@ type tasksInode struct {
inoGen InoGenerator
pidns *kernel.PIDNamespace
+
+ // '/proc/self' and '/proc/thread-self' have custom directory offsets in
+ // Linux. So handle them outside of OrderedChildren.
+ selfSymlink *vfs.Dentry
+ threadSelfSymlink *vfs.Dentry
+
+ // cgroupControllers is a map of controller name to directory in the
+ // cgroup hierarchy. These controllers are immutable and will be listed
+ // in /proc/pid/cgroup if not nil.
+ cgroupControllers map[string]string
}
var _ kernfs.Inode = (*tasksInode)(nil)
-func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace) (*tasksInode, *kernfs.Dentry) {
+func newTasksInode(inoGen InoGenerator, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) {
root := auth.NewRootCredentials(pidns.UserNamespace())
contents := map[string]*kernfs.Dentry{
- //"cpuinfo": newCPUInfo(ctx, msrc),
- //"filesystems": seqfile.NewSeqFileInode(ctx, &filesystemsData{}, msrc),
- "loadavg": newDentry(root, inoGen.NextIno(), defaultPermission, &loadavgData{}),
- "meminfo": newDentry(root, inoGen.NextIno(), defaultPermission, &meminfoData{k: k}),
- "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), defaultPermission, "self/mounts"),
- "self": newSelfSymlink(root, inoGen.NextIno(), defaultPermission, pidns),
- "stat": newDentry(root, inoGen.NextIno(), defaultPermission, &statData{k: k}),
- "thread-self": newThreadSelfSymlink(root, inoGen.NextIno(), defaultPermission, pidns),
- //"uptime": newUptime(ctx, msrc),
- //"version": newVersionData(root, inoGen.NextIno(), k),
- "version": newDentry(root, inoGen.NextIno(), defaultPermission, &versionData{k: k}),
+ "cpuinfo": newDentry(root, inoGen.NextIno(), 0444, newStaticFile(cpuInfoData(k))),
+ //"filesystems": newDentry(root, inoGen.NextIno(), 0444, &filesystemsData{}),
+ "loadavg": newDentry(root, inoGen.NextIno(), 0444, &loadavgData{}),
+ "sys": newSysDir(root, inoGen),
+ "meminfo": newDentry(root, inoGen.NextIno(), 0444, &meminfoData{}),
+ "mounts": kernfs.NewStaticSymlink(root, inoGen.NextIno(), "self/mounts"),
+ "net": newNetDir(root, inoGen, k),
+ "stat": newDentry(root, inoGen.NextIno(), 0444, &statData{}),
+ "uptime": newDentry(root, inoGen.NextIno(), 0444, &uptimeData{}),
+ "version": newDentry(root, inoGen.NextIno(), 0444, &versionData{}),
}
inode := &tasksInode{
- pidns: pidns,
- inoGen: inoGen,
+ pidns: pidns,
+ inoGen: inoGen,
+ selfSymlink: newSelfSymlink(root, inoGen.NextIno(), 0444, pidns).VFSDentry(),
+ threadSelfSymlink: newThreadSelfSymlink(root, inoGen.NextIno(), 0444, pidns).VFSDentry(),
+ cgroupControllers: cgroupControllers,
}
inode.InodeAttrs.Init(root, inoGen.NextIno(), linux.ModeDirectory|0555)
@@ -86,6 +102,13 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro
// Try to lookup a corresponding task.
tid, err := strconv.ParseUint(name, 10, 64)
if err != nil {
+ // If it failed to parse, check if it's one of the special handled files.
+ switch name {
+ case selfName:
+ return i.selfSymlink, nil
+ case threadSelfName:
+ return i.threadSelfSymlink, nil
+ }
return nil, syserror.ENOENT
}
@@ -94,7 +117,7 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (*vfs.Dentry, erro
return nil, syserror.ENOENT
}
- taskDentry := newTaskInode(i.inoGen, task, i.pidns, true)
+ taskDentry := newTaskInode(i.inoGen, task, i.pidns, true, i.cgroupControllers)
return taskDentry.VFSDentry(), nil
}
@@ -104,41 +127,81 @@ func (i *tasksInode) Valid(ctx context.Context) bool {
}
// IterDirents implements kernfs.inodeDynamicLookup.
-//
-// TODO(gvisor.dev/issue/1195): Use tgid N offset = TGID_OFFSET + N.
-func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
- var tids []int
+func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) {
+ // fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256
+ const FIRST_PROCESS_ENTRY = 256
+
+ // Use maxTaskID to shortcut searches that will result in 0 entries.
+ const maxTaskID = kernel.TasksLimit + 1
+ if offset >= maxTaskID {
+ return offset, nil
+ }
+
+ // According to Linux (fs/proc/base.c:proc_pid_readdir()), process directories
+ // start at offset FIRST_PROCESS_ENTRY with '/proc/self', followed by
+ // '/proc/thread-self' and then '/proc/[pid]'.
+ if offset < FIRST_PROCESS_ENTRY {
+ offset = FIRST_PROCESS_ENTRY
+ }
- // Collect all tasks. Per linux we only include it in directory listings if
- // it's the leader. But for whatever crazy reason, you can still walk to the
- // given node.
+ if offset == FIRST_PROCESS_ENTRY {
+ dirent := vfs.Dirent{
+ Name: selfName,
+ Type: linux.DT_LNK,
+ Ino: i.inoGen.NextIno(),
+ NextOff: offset + 1,
+ }
+ if !cb.Handle(dirent) {
+ return offset, nil
+ }
+ offset++
+ }
+ if offset == FIRST_PROCESS_ENTRY+1 {
+ dirent := vfs.Dirent{
+ Name: threadSelfName,
+ Type: linux.DT_LNK,
+ Ino: i.inoGen.NextIno(),
+ NextOff: offset + 1,
+ }
+ if !cb.Handle(dirent) {
+ return offset, nil
+ }
+ offset++
+ }
+
+ // Collect all tasks that TGIDs are greater than the offset specified. Per
+ // Linux we only include in directory listings if it's the leader. But for
+ // whatever crazy reason, you can still walk to the given node.
+ var tids []int
+ startTid := offset - FIRST_PROCESS_ENTRY - 2
for _, tg := range i.pidns.ThreadGroups() {
+ tid := i.pidns.IDOfThreadGroup(tg)
+ if int64(tid) < startTid {
+ continue
+ }
if leader := tg.Leader(); leader != nil {
- tids = append(tids, int(i.pidns.IDOfThreadGroup(tg)))
+ tids = append(tids, int(tid))
}
}
if len(tids) == 0 {
return offset, nil
}
- if relOffset >= int64(len(tids)) {
- return offset, nil
- }
sort.Ints(tids)
- for _, tid := range tids[relOffset:] {
+ for _, tid := range tids {
dirent := vfs.Dirent{
Name: strconv.FormatUint(uint64(tid), 10),
Type: linux.DT_DIR,
Ino: i.inoGen.NextIno(),
- NextOff: offset + 1,
+ NextOff: FIRST_PROCESS_ENTRY + 2 + int64(tid) + 1,
}
if !cb.Handle(dirent) {
return offset, nil
}
offset++
}
- return offset, nil
+ return maxTaskID, nil
}
// Open implements kernfs.Inode.
@@ -160,3 +223,20 @@ func (i *tasksInode) Stat(vsfs *vfs.Filesystem) linux.Statx {
return stat
}
+
+func cpuInfoData(k *kernel.Kernel) string {
+ features := k.FeatureSet()
+ if features == nil {
+ // Kernel is always initialized with a FeatureSet.
+ panic("cpuinfo read with nil FeatureSet")
+ }
+ var buf bytes.Buffer
+ for i, max := uint(0), k.ApplicationCores(); i < max; i++ {
+ features.WriteCPUInfoTo(i, &buf)
+ }
+ return buf.String()
+}
+
+func shmData(v uint64) dynamicInode {
+ return newStaticFile(strconv.FormatUint(v, 10))
+}