diff options
author | gVisor bot <gvisor-bot@google.com> | 2021-04-03 04:15:14 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-04-03 04:15:14 +0000 |
commit | 0e1d141ffcf7878ff60a3ed1f1c696ecfa8d099e (patch) | |
tree | 9755d118b4801cb9743df8617128888ab06470e4 /pkg/sentry/fsimpl/proc | |
parent | 3dee9c57344555ab56d44c18c2b9f7c667d2d593 (diff) | |
parent | 932c8abd0f739bec295ff62cf8fce3dcb7e2d866 (diff) |
Merge release-20210322.0-38-g932c8abd0 (automated)
Diffstat (limited to 'pkg/sentry/fsimpl/proc')
-rw-r--r-- | pkg/sentry/fsimpl/proc/filesystem.go | 6 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/proc_state_autogen.go | 61 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/task.go | 23 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/task_files.go | 29 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks.go | 19 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/proc/tasks_files.go | 16 |
6 files changed, 132 insertions, 22 deletions
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index 254a8b062..ce8f55b1f 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -86,13 +86,13 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF procfs.MaxCachedDentries = maxCachedDentries procfs.VFSFilesystem().Init(vfsObj, &ft, procfs) - var cgroups map[string]string + var fakeCgroupControllers map[string]string if opts.InternalData != nil { data := opts.InternalData.(*InternalData) - cgroups = data.Cgroups + fakeCgroupControllers = data.Cgroups } - inode := procfs.newTasksInode(ctx, k, pidns, cgroups) + inode := procfs.newTasksInode(ctx, k, pidns, fakeCgroupControllers) var dentry kernfs.Dentry dentry.InitRoot(&procfs.Filesystem, inode) return procfs.VFSFilesystem(), dentry.VFSDentry(), nil diff --git a/pkg/sentry/fsimpl/proc/proc_state_autogen.go b/pkg/sentry/fsimpl/proc/proc_state_autogen.go index 60e3a7485..8303e846d 100644 --- a/pkg/sentry/fsimpl/proc/proc_state_autogen.go +++ b/pkg/sentry/fsimpl/proc/proc_state_autogen.go @@ -1233,6 +1233,34 @@ func (fd *namespaceFD) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(3, &fd.inode) } +func (d *taskCgroupData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.taskCgroupData" +} + +func (d *taskCgroupData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + "task", + } +} + +func (d *taskCgroupData) beforeSave() {} + +// +checklocksignore +func (d *taskCgroupData) StateSave(stateSinkObject state.Sink) { + d.beforeSave() + stateSinkObject.Save(0, &d.dynamicBytesFileSetAttr) + stateSinkObject.Save(1, &d.task) +} + +func (d *taskCgroupData) afterLoad() {} + +// +checklocksignore +func (d *taskCgroupData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &d.dynamicBytesFileSetAttr) + stateSourceObject.Load(1, &d.task) +} + func (r *taskInodeRefs) StateTypeName() string { return "pkg/sentry/fsimpl/proc.taskInodeRefs" } @@ -1554,7 +1582,7 @@ func (i *tasksInode) StateFields() []string { "locks", "fs", "pidns", - "cgroupControllers", + "fakeCgroupControllers", } } @@ -1574,7 +1602,7 @@ func (i *tasksInode) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(8, &i.locks) stateSinkObject.Save(9, &i.fs) stateSinkObject.Save(10, &i.pidns) - stateSinkObject.Save(11, &i.cgroupControllers) + stateSinkObject.Save(11, &i.fakeCgroupControllers) } func (i *tasksInode) afterLoad() {} @@ -1592,7 +1620,7 @@ func (i *tasksInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(8, &i.locks) stateSourceObject.Load(9, &i.fs) stateSourceObject.Load(10, &i.pidns) - stateSourceObject.Load(11, &i.cgroupControllers) + stateSourceObject.Load(11, &i.fakeCgroupControllers) } func (s *staticFileSetStat) StateTypeName() string { @@ -1924,6 +1952,31 @@ func (d *filesystemsData) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &d.DynamicBytesFile) } +func (c *cgroupsData) StateTypeName() string { + return "pkg/sentry/fsimpl/proc.cgroupsData" +} + +func (c *cgroupsData) StateFields() []string { + return []string{ + "dynamicBytesFileSetAttr", + } +} + +func (c *cgroupsData) beforeSave() {} + +// +checklocksignore +func (c *cgroupsData) StateSave(stateSinkObject state.Sink) { + c.beforeSave() + stateSinkObject.Save(0, &c.dynamicBytesFileSetAttr) +} + +func (c *cgroupsData) afterLoad() {} + +// +checklocksignore +func (c *cgroupsData) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &c.dynamicBytesFileSetAttr) +} + func (r *tasksInodeRefs) StateTypeName() string { return "pkg/sentry/fsimpl/proc.tasksInodeRefs" } @@ -2231,6 +2284,7 @@ func init() { state.Register((*namespaceSymlink)(nil)) state.Register((*namespaceInode)(nil)) state.Register((*namespaceFD)(nil)) + state.Register((*taskCgroupData)(nil)) state.Register((*taskInodeRefs)(nil)) state.Register((*ifinet6)(nil)) state.Register((*netDevData)(nil)) @@ -2254,6 +2308,7 @@ func init() { state.Register((*uptimeData)(nil)) state.Register((*versionData)(nil)) state.Register((*filesystemsData)(nil)) + state.Register((*cgroupsData)(nil)) state.Register((*tasksInodeRefs)(nil)) state.Register((*tcpMemDir)(nil)) state.Register((*mmapMinAddrData)(nil)) diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index fea138f93..d05cc1508 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -47,7 +47,7 @@ type taskInode struct { var _ kernfs.Inode = (*taskInode)(nil) -func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) (kernfs.Inode, error) { +func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, fakeCgroupControllers map[string]string) (kernfs.Inode, error) { if task.ExitState() == kernel.TaskExitDead { return nil, syserror.ESRCH } @@ -82,10 +82,12 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns "uid_map": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}), } if isThreadGroup { - contents["task"] = fs.newSubtasks(ctx, task, pidns, cgroupControllers) + contents["task"] = fs.newSubtasks(ctx, task, pidns, fakeCgroupControllers) } - if len(cgroupControllers) > 0 { - contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newCgroupData(cgroupControllers)) + if len(fakeCgroupControllers) > 0 { + contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newFakeCgroupData(fakeCgroupControllers)) + } else { + contents["cgroup"] = fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskCgroupData{task: task}) } taskInode := &taskInode{task: task} @@ -226,11 +228,14 @@ func newIO(t *kernel.Task, isThreadGroup bool) *ioData { return &ioData{ioUsage: t} } -// newCgroupData creates inode that shows cgroup information. -// From man 7 cgroups: "For each cgroup hierarchy of which the process is a -// member, there is one entry containing three colon-separated fields: -// hierarchy-ID:controller-list:cgroup-path" -func newCgroupData(controllers map[string]string) dynamicInode { +// newFakeCgroupData creates an inode that shows fake cgroup +// information passed in as mount options. From man 7 cgroups: "For +// each cgroup hierarchy of which the process is a member, there is +// one entry containing three colon-separated fields: +// hierarchy-ID:controller-list:cgroup-path" +// +// TODO(b/182488796): Remove once all users adopt cgroupfs. +func newFakeCgroupData(controllers map[string]string) dynamicInode { var buf bytes.Buffer // The hierarchy ids must be positive integers (for cgroup v1), but the diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 85909d551..b294dfd6a 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -1100,3 +1100,32 @@ func (fd *namespaceFD) SetStat(ctx context.Context, opts vfs.SetStatOptions) err func (fd *namespaceFD) Release(ctx context.Context) { fd.inode.DecRef(ctx) } + +// taskCgroupData generates data for /proc/[pid]/cgroup. +// +// +stateify savable +type taskCgroupData struct { + dynamicBytesFileSetAttr + task *kernel.Task +} + +var _ dynamicInode = (*taskCgroupData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (d *taskCgroupData) Generate(ctx context.Context, buf *bytes.Buffer) error { + // When a task is existing on Linux, a task's cgroup set is cleared and + // reset to the initial cgroup set, which is essentially the set of root + // cgroups. Because of this, the /proc/<pid>/cgroup file is always readable + // on Linux throughout a task's lifetime. + // + // The sentry removes tasks from cgroups during the exit process, but + // doesn't move them into an initial cgroup set, so partway through task + // exit this file show a task is in no cgroups, which is incorrect. Instead, + // once a task has left its cgroups, we return an error. + if d.task.ExitState() >= kernel.TaskExitInitiated { + return syserror.ESRCH + } + + d.task.GenerateProcTaskCgroup(buf) + return nil +} diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index fdc580610..7c7543f14 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -54,15 +54,15 @@ type tasksInode struct { // '/proc/self' and '/proc/thread-self' have custom directory offsets in // Linux. So handle them outside of OrderedChildren. - // cgroupControllers is a map of controller name to directory in the + // fakeCgroupControllers is a map of controller name to directory in the // cgroup hierarchy. These controllers are immutable and will be listed // in /proc/pid/cgroup if not nil. - cgroupControllers map[string]string + fakeCgroupControllers map[string]string } var _ kernfs.Inode = (*tasksInode)(nil) -func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) *tasksInode { +func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns *kernel.PIDNamespace, fakeCgroupControllers map[string]string) *tasksInode { root := auth.NewRootCredentials(pidns.UserNamespace()) contents := map[string]kernfs.Inode{ "cpuinfo": fs.newInode(ctx, root, 0444, newStaticFileSetStat(cpuInfoData(k))), @@ -76,11 +76,16 @@ func (fs *filesystem) newTasksInode(ctx context.Context, k *kernel.Kernel, pidns "uptime": fs.newInode(ctx, root, 0444, &uptimeData{}), "version": fs.newInode(ctx, root, 0444, &versionData{}), } + // If fakeCgroupControllers are provided, don't create a cgroupfs backed + // /proc/cgroup as it will not match the fake controllers. + if len(fakeCgroupControllers) == 0 { + contents["cgroups"] = fs.newInode(ctx, root, 0444, &cgroupsData{}) + } inode := &tasksInode{ - pidns: pidns, - fs: fs, - cgroupControllers: cgroupControllers, + pidns: pidns, + fs: fs, + fakeCgroupControllers: fakeCgroupControllers, } inode.InodeAttrs.Init(ctx, root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) inode.InitRefs() @@ -118,7 +123,7 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, err return nil, syserror.ENOENT } - return i.fs.newTaskInode(ctx, task, i.pidns, true, i.cgroupControllers) + return i.fs.newTaskInode(ctx, task, i.pidns, true, i.fakeCgroupControllers) } // IterDirents implements kernfs.inodeDirectory.IterDirents. diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index f0029cda6..e1a8b4409 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -384,3 +384,19 @@ func (d *filesystemsData) Generate(ctx context.Context, buf *bytes.Buffer) error k.VFS().GenerateProcFilesystems(buf) return nil } + +// cgroupsData backs /proc/cgroups. +// +// +stateify savable +type cgroupsData struct { + dynamicBytesFileSetAttr +} + +var _ dynamicInode = (*cgroupsData)(nil) + +// Generate implements vfs.DynamicBytesSource.Generate. +func (*cgroupsData) Generate(ctx context.Context, buf *bytes.Buffer) error { + r := kernel.KernelFromContext(ctx).CgroupRegistry() + r.GenerateProcCgroups(buf) + return nil +} |