summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/cgroupfs
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2021-08-12 00:26:22 +0000
committergVisor bot <gvisor-bot@google.com>2021-08-12 00:26:22 +0000
commit3486648e75a831840219939f41ac89ef64dea0f3 (patch)
treea7afe5244eed1fdfc9d3ccb550bf526c45e35a83 /pkg/sentry/fsimpl/cgroupfs
parentddd6507953d90452314553fedce6bbf7f372c635 (diff)
parenta50596874a4971167f97a05181363e91292a2885 (diff)
Merge release-20210726.0-50-ga50596874 (automated)
Diffstat (limited to 'pkg/sentry/fsimpl/cgroupfs')
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/base.go9
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cgroupfs.go181
-rw-r--r--pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go25
3 files changed, 189 insertions, 26 deletions
diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go
index 4290ffe0d..71bb0a9c8 100644
--- a/pkg/sentry/fsimpl/cgroupfs/base.go
+++ b/pkg/sentry/fsimpl/cgroupfs/base.go
@@ -88,7 +88,6 @@ type controller interface {
// +stateify savable
type cgroupInode struct {
dir
- fs *filesystem
// ts is the list of tasks in this cgroup. The kernel is responsible for
// removing tasks from this list before they're destroyed, so any tasks on
@@ -102,9 +101,10 @@ var _ kernel.CgroupImpl = (*cgroupInode)(nil)
func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
c := &cgroupInode{
- fs: fs,
- ts: make(map[*kernel.Task]struct{}),
+ dir: dir{fs: fs},
+ ts: make(map[*kernel.Task]struct{}),
}
+ c.dir.cgi = c
contents := make(map[string]kernfs.Inode)
contents["cgroup.procs"] = fs.newControllerFile(ctx, creds, &cgroupProcsData{c})
@@ -115,8 +115,7 @@ func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credential
}
c.dir.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555))
- c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- c.dir.InitRefs()
+ c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true})
c.dir.IncLinks(c.dir.OrderedChildren.Populate(contents))
atomic.AddUint64(&fs.numCgroups, 1)
diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
index 22c8b7fda..edc3b50b9 100644
--- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
+++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go
@@ -32,7 +32,8 @@
// controllers associated with them.
//
// Since cgroupfs doesn't allow hardlinks, there is a unique mapping between
-// cgroupfs dentries and inodes.
+// cgroupfs dentries and inodes. Thus, cgroupfs inodes don't need to be ref
+// counted and exist until they're unlinked once or the FS is destroyed.
//
// # Synchronization
//
@@ -48,10 +49,11 @@
// Lock order:
//
// kernel.CgroupRegistry.mu
-// cgroupfs.filesystem.mu
-// kernel.TaskSet.mu
-// kernel.Task.mu
-// cgroupfs.filesystem.tasksMu.
+// kernfs.filesystem.mu
+// kernel.TaskSet.mu
+// kernel.Task.mu
+// cgroupfs.filesystem.tasksMu.
+// cgroupfs.dir.OrderedChildren.mu
package cgroupfs
import (
@@ -63,6 +65,7 @@ import (
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
+ "gvisor.dev/gvisor/pkg/fspath"
"gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
@@ -108,6 +111,7 @@ type FilesystemType struct{}
// +stateify savable
type InternalData struct {
DefaultControlValues map[string]int64
+ InitialCgroupPath string
}
// filesystem implements vfs.FilesystemImpl and kernel.cgroupFS.
@@ -134,6 +138,11 @@ type filesystem struct {
numCgroups uint64 // Protected by atomic ops.
root *kernfs.Dentry
+ // effectiveRoot is the initial cgroup new tasks are created in. Unless
+ // overwritten by internal mount options, root == effectiveRoot. If
+ // effectiveRoot != root, an extra reference is held on effectiveRoot for
+ // the lifetime of the filesystem.
+ effectiveRoot *kernfs.Dentry
// tasksMu serializes task membership changes across all cgroups within a
// filesystem.
@@ -229,6 +238,9 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
fs := vfsfs.Impl().(*filesystem)
ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: mounting new view to hierarchy %v", fs.hierarchyID)
fs.root.IncRef()
+ if fs.effectiveRoot != fs.root {
+ fs.effectiveRoot.IncRef()
+ }
return vfsfs, fs.root.VFSDentry(), nil
}
@@ -245,8 +257,8 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
var defaults map[string]int64
if opts.InternalData != nil {
- ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults)
defaults = opts.InternalData.(*InternalData).DefaultControlValues
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults)
}
for _, ty := range wantControllers {
@@ -286,6 +298,14 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
var rootD kernfs.Dentry
rootD.InitRoot(&fs.Filesystem, root)
fs.root = &rootD
+ fs.effectiveRoot = fs.root
+
+ if err := fs.prepareInitialCgroup(ctx, vfsObj, opts); err != nil {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: failed to prepare initial cgroup: %v", err)
+ rootD.DecRef(ctx)
+ fs.VFSFilesystem().DecRef(ctx)
+ return nil, nil, err
+ }
// Register controllers. The registry may be modified concurrently, so if we
// get an error, we raced with someone else who registered the same
@@ -303,10 +323,47 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
return fs.VFSFilesystem(), rootD.VFSDentry(), nil
}
+// prepareInitialCgroup creates the initial cgroup according to opts. An initial
+// cgroup is optional, and if not specified, this function is a no-op.
+func (fs *filesystem) prepareInitialCgroup(ctx context.Context, vfsObj *vfs.VirtualFilesystem, opts vfs.GetFilesystemOptions) error {
+ if opts.InternalData == nil {
+ return nil
+ }
+ initPathStr := opts.InternalData.(*InternalData).InitialCgroupPath
+ if initPathStr == "" {
+ return nil
+ }
+ ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path: %v", initPathStr)
+ initPath := fspath.Parse(initPathStr)
+ if !initPath.Absolute || !initPath.HasComponents() {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path invalid: %+v", initPath)
+ return linuxerr.EINVAL
+ }
+
+ // Have initial cgroup target, create the tree.
+ cgDir := fs.root.Inode().(*cgroupInode)
+ for pit := initPath.Begin; pit.Ok(); pit = pit.Next() {
+ cgDirI, err := cgDir.NewDir(ctx, pit.String(), vfs.MkdirOptions{})
+ if err != nil {
+ return err
+ }
+ cgDir = cgDirI.(*cgroupInode)
+ }
+
+ // Walk to target dentry.
+ initDentry, err := fs.root.WalkDentryTree(ctx, vfsObj, initPath)
+ if err != nil {
+ ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup dentry not found: %v", err)
+ return linuxerr.ENOENT
+ }
+ fs.effectiveRoot = initDentry // Reference from WalkDentryTree transferred here.
+ return nil
+}
+
func (fs *filesystem) rootCgroup() kernel.Cgroup {
return kernel.Cgroup{
- Dentry: fs.root,
- CgroupImpl: fs.root.Inode().(kernel.CgroupImpl),
+ Dentry: fs.effectiveRoot,
+ CgroupImpl: fs.effectiveRoot.Inode().(kernel.CgroupImpl),
}
}
@@ -320,6 +377,10 @@ func (fs *filesystem) Release(ctx context.Context) {
r.Unregister(fs.hierarchyID)
}
+ if fs.root != fs.effectiveRoot {
+ fs.effectiveRoot.DecRef(ctx)
+ }
+
fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor)
fs.Filesystem.Release(ctx)
}
@@ -346,15 +407,18 @@ func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error
//
// +stateify savable
type dir struct {
- dirRefs
+ kernfs.InodeNoopRefCount
kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeNotSymlink
- kernfs.InodeDirectoryNoNewChildren // TODO(b/183137098): Implement mkdir.
+ kernfs.InodeDirectoryNoNewChildren
kernfs.OrderedChildren
implStatFS
locks vfs.FileLocks
+
+ fs *filesystem // Immutable.
+ cgi *cgroupInode // Immutable.
}
// Keep implements kernfs.Inode.Keep.
@@ -378,9 +442,100 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry
return fd.VFSFileDescription(), nil
}
-// DecRef implements kernfs.Inode.DecRef.
-func (d *dir) DecRef(ctx context.Context) {
- d.dirRefs.DecRef(func() { d.Destroy(ctx) })
+// NewDir implements kernfs.Inode.NewDir.
+func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) {
+ // "Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable."
+ // -- Linux, kernel/cgroup.c:cgroup_mkdir().
+ if strings.Contains(name, "\n") {
+ return nil, linuxerr.EINVAL
+ }
+ return d.OrderedChildren.Inserter(name, func() kernfs.Inode {
+ d.IncLinks(1)
+ return d.fs.newCgroupInode(ctx, auth.CredentialsFromContext(ctx))
+ })
+}
+
+// Rename implements kernfs.Inode.Rename. Cgroupfs only allows renaming of
+// cgroup directories, and the rename may only change the name within the same
+// parent. See linux, kernel/cgroup.c:cgroup_rename().
+func (d *dir) Rename(ctx context.Context, oldname, newname string, child, dst kernfs.Inode) error {
+ if _, ok := child.(*cgroupInode); !ok {
+ // Not a cgroup directory. Control files are backed by different types.
+ return linuxerr.ENOTDIR
+ }
+
+ dstCGInode, ok := dst.(*cgroupInode)
+ if !ok {
+ // Not a cgroup inode, so definitely can't be *this* inode.
+ return linuxerr.EIO
+ }
+ // Note: We're intentionally comparing addresses, since two different dirs
+ // could plausibly be identical in memory, but would occupy different
+ // locations in memory.
+ if d != &dstCGInode.dir {
+ // Destination dir is a different cgroup inode. Cross directory renames
+ // aren't allowed.
+ return linuxerr.EIO
+ }
+
+ // Rename moves oldname to newname within d. Proceed.
+ return d.OrderedChildren.Rename(ctx, oldname, newname, child, dst)
+}
+
+// Unlink implements kernfs.Inode.Unlink. Cgroupfs disallows unlink, as the only
+// files in the filesystem are control files, which can't be deleted.
+func (d *dir) Unlink(ctx context.Context, name string, child kernfs.Inode) error {
+ return linuxerr.EPERM
+}
+
+// hasChildrenLocked returns whether the cgroup dir contains any objects that
+// prevent it from being deleted.
+func (d *dir) hasChildrenLocked() bool {
+ // Subdirs take a link on the parent, so checks if there are any direct
+ // children cgroups. Exclude the dir's self link and the link from ".".
+ if d.InodeAttrs.Links()-2 > 0 {
+ return true
+ }
+ return len(d.cgi.ts) > 0
+}
+
+// HasChildren implements kernfs.Inode.HasChildren.
+//
+// The empty check for a cgroupfs directory is unlike a regular directory since
+// a cgroupfs directory will always have control files. A cgroupfs directory can
+// be deleted if cgroup contains no tasks and has no sub-cgroups.
+func (d *dir) HasChildren() bool {
+ d.fs.tasksMu.RLock()
+ defer d.fs.tasksMu.RUnlock()
+ return d.hasChildrenLocked()
+}
+
+// RmDir implements kernfs.Inode.RmDir.
+func (d *dir) RmDir(ctx context.Context, name string, child kernfs.Inode) error {
+ // Unlike a normal directory, we need to recheck if d is empty again, since
+ // vfs/kernfs can't stop tasks from entering or leaving the cgroup.
+ d.fs.tasksMu.RLock()
+ defer d.fs.tasksMu.RUnlock()
+
+ cgi, ok := child.(*cgroupInode)
+ if !ok {
+ return linuxerr.ENOTDIR
+ }
+ if cgi.dir.hasChildrenLocked() {
+ return linuxerr.ENOTEMPTY
+ }
+
+ // Disallow deletion of the effective root cgroup.
+ if cgi == d.fs.effectiveRoot.Inode().(*cgroupInode) {
+ ctx.Warningf("Cannot delete initial cgroup for new tasks %q", d.fs.effectiveRoot.FSLocalPath())
+ return linuxerr.EBUSY
+ }
+
+ err := d.OrderedChildren.RmDir(ctx, name, child)
+ if err == nil {
+ d.InodeAttrs.DecLinks()
+ }
+ return err
}
// controllerFile represents a generic control file that appears within a cgroup
diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go
index 3142ab6f8..aa40bb193 100644
--- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go
+++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go
@@ -41,7 +41,6 @@ func (c *cgroupInode) StateTypeName() string {
func (c *cgroupInode) StateFields() []string {
return []string{
"dir",
- "fs",
"ts",
}
}
@@ -52,8 +51,7 @@ func (c *cgroupInode) beforeSave() {}
func (c *cgroupInode) StateSave(stateSinkObject state.Sink) {
c.beforeSave()
stateSinkObject.Save(0, &c.dir)
- stateSinkObject.Save(1, &c.fs)
- stateSinkObject.Save(2, &c.ts)
+ stateSinkObject.Save(1, &c.ts)
}
func (c *cgroupInode) afterLoad() {}
@@ -61,8 +59,7 @@ func (c *cgroupInode) afterLoad() {}
// +checklocksignore
func (c *cgroupInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &c.dir)
- stateSourceObject.Load(1, &c.fs)
- stateSourceObject.Load(2, &c.ts)
+ stateSourceObject.Load(1, &c.ts)
}
func (d *cgroupProcsData) StateTypeName() string {
@@ -143,6 +140,7 @@ func (i *InternalData) StateTypeName() string {
func (i *InternalData) StateFields() []string {
return []string{
"DefaultControlValues",
+ "InitialCgroupPath",
}
}
@@ -152,6 +150,7 @@ func (i *InternalData) beforeSave() {}
func (i *InternalData) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
stateSinkObject.Save(0, &i.DefaultControlValues)
+ stateSinkObject.Save(1, &i.InitialCgroupPath)
}
func (i *InternalData) afterLoad() {}
@@ -159,6 +158,7 @@ func (i *InternalData) afterLoad() {}
// +checklocksignore
func (i *InternalData) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.DefaultControlValues)
+ stateSourceObject.Load(1, &i.InitialCgroupPath)
}
func (fs *filesystem) StateTypeName() string {
@@ -174,6 +174,7 @@ func (fs *filesystem) StateFields() []string {
"kcontrollers",
"numCgroups",
"root",
+ "effectiveRoot",
}
}
@@ -189,6 +190,7 @@ func (fs *filesystem) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(4, &fs.kcontrollers)
stateSinkObject.Save(5, &fs.numCgroups)
stateSinkObject.Save(6, &fs.root)
+ stateSinkObject.Save(7, &fs.effectiveRoot)
}
func (fs *filesystem) afterLoad() {}
@@ -202,6 +204,7 @@ func (fs *filesystem) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(4, &fs.kcontrollers)
stateSourceObject.Load(5, &fs.numCgroups)
stateSourceObject.Load(6, &fs.root)
+ stateSourceObject.Load(7, &fs.effectiveRoot)
}
func (i *implStatFS) StateTypeName() string {
@@ -231,7 +234,7 @@ func (d *dir) StateTypeName() string {
func (d *dir) StateFields() []string {
return []string{
- "dirRefs",
+ "InodeNoopRefCount",
"InodeAlwaysValid",
"InodeAttrs",
"InodeNotSymlink",
@@ -239,6 +242,8 @@ func (d *dir) StateFields() []string {
"OrderedChildren",
"implStatFS",
"locks",
+ "fs",
+ "cgi",
}
}
@@ -247,7 +252,7 @@ func (d *dir) beforeSave() {}
// +checklocksignore
func (d *dir) StateSave(stateSinkObject state.Sink) {
d.beforeSave()
- stateSinkObject.Save(0, &d.dirRefs)
+ stateSinkObject.Save(0, &d.InodeNoopRefCount)
stateSinkObject.Save(1, &d.InodeAlwaysValid)
stateSinkObject.Save(2, &d.InodeAttrs)
stateSinkObject.Save(3, &d.InodeNotSymlink)
@@ -255,13 +260,15 @@ func (d *dir) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(5, &d.OrderedChildren)
stateSinkObject.Save(6, &d.implStatFS)
stateSinkObject.Save(7, &d.locks)
+ stateSinkObject.Save(8, &d.fs)
+ stateSinkObject.Save(9, &d.cgi)
}
func (d *dir) afterLoad() {}
// +checklocksignore
func (d *dir) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &d.dirRefs)
+ stateSourceObject.Load(0, &d.InodeNoopRefCount)
stateSourceObject.Load(1, &d.InodeAlwaysValid)
stateSourceObject.Load(2, &d.InodeAttrs)
stateSourceObject.Load(3, &d.InodeNotSymlink)
@@ -269,6 +276,8 @@ func (d *dir) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(5, &d.OrderedChildren)
stateSourceObject.Load(6, &d.implStatFS)
stateSourceObject.Load(7, &d.locks)
+ stateSourceObject.Load(8, &d.fs)
+ stateSourceObject.Load(9, &d.cgi)
}
func (c *controllerFile) StateTypeName() string {