diff options
author | gVisor bot <gvisor-bot@google.com> | 2021-08-12 00:26:22 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2021-08-12 00:26:22 +0000 |
commit | 3486648e75a831840219939f41ac89ef64dea0f3 (patch) | |
tree | a7afe5244eed1fdfc9d3ccb550bf526c45e35a83 /pkg/sentry | |
parent | ddd6507953d90452314553fedce6bbf7f372c635 (diff) | |
parent | a50596874a4971167f97a05181363e91292a2885 (diff) |
Merge release-20210726.0-50-ga50596874 (automated)
Diffstat (limited to 'pkg/sentry')
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/base.go | 9 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/cgroupfs.go | 181 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go | 25 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 38 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/kernfs.go | 67 | ||||
-rw-r--r-- | pkg/sentry/kernel/cgroup.go | 1 |
6 files changed, 287 insertions, 34 deletions
diff --git a/pkg/sentry/fsimpl/cgroupfs/base.go b/pkg/sentry/fsimpl/cgroupfs/base.go index 4290ffe0d..71bb0a9c8 100644 --- a/pkg/sentry/fsimpl/cgroupfs/base.go +++ b/pkg/sentry/fsimpl/cgroupfs/base.go @@ -88,7 +88,6 @@ type controller interface { // +stateify savable type cgroupInode struct { dir - fs *filesystem // ts is the list of tasks in this cgroup. The kernel is responsible for // removing tasks from this list before they're destroyed, so any tasks on @@ -102,9 +101,10 @@ var _ kernel.CgroupImpl = (*cgroupInode)(nil) func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credentials) kernfs.Inode { c := &cgroupInode{ - fs: fs, - ts: make(map[*kernel.Task]struct{}), + dir: dir{fs: fs}, + ts: make(map[*kernel.Task]struct{}), } + c.dir.cgi = c contents := make(map[string]kernfs.Inode) contents["cgroup.procs"] = fs.newControllerFile(ctx, creds, &cgroupProcsData{c}) @@ -115,8 +115,7 @@ func (fs *filesystem) newCgroupInode(ctx context.Context, creds *auth.Credential } c.dir.InodeAttrs.Init(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|linux.FileMode(0555)) - c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - c.dir.InitRefs() + c.dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{Writable: true}) c.dir.IncLinks(c.dir.OrderedChildren.Populate(contents)) atomic.AddUint64(&fs.numCgroups, 1) diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go index 22c8b7fda..edc3b50b9 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs.go @@ -32,7 +32,8 @@ // controllers associated with them. // // Since cgroupfs doesn't allow hardlinks, there is a unique mapping between -// cgroupfs dentries and inodes. +// cgroupfs dentries and inodes. Thus, cgroupfs inodes don't need to be ref +// counted and exist until they're unlinked once or the FS is destroyed. // // # Synchronization // @@ -48,10 +49,11 @@ // Lock order: // // kernel.CgroupRegistry.mu -// cgroupfs.filesystem.mu -// kernel.TaskSet.mu -// kernel.Task.mu -// cgroupfs.filesystem.tasksMu. +// kernfs.filesystem.mu +// kernel.TaskSet.mu +// kernel.Task.mu +// cgroupfs.filesystem.tasksMu. +// cgroupfs.dir.OrderedChildren.mu package cgroupfs import ( @@ -63,6 +65,7 @@ import ( "gvisor.dev/gvisor/pkg/abi/linux" "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" + "gvisor.dev/gvisor/pkg/fspath" "gvisor.dev/gvisor/pkg/sentry/fsimpl/kernfs" "gvisor.dev/gvisor/pkg/sentry/kernel" "gvisor.dev/gvisor/pkg/sentry/kernel/auth" @@ -108,6 +111,7 @@ type FilesystemType struct{} // +stateify savable type InternalData struct { DefaultControlValues map[string]int64 + InitialCgroupPath string } // filesystem implements vfs.FilesystemImpl and kernel.cgroupFS. @@ -134,6 +138,11 @@ type filesystem struct { numCgroups uint64 // Protected by atomic ops. root *kernfs.Dentry + // effectiveRoot is the initial cgroup new tasks are created in. Unless + // overwritten by internal mount options, root == effectiveRoot. If + // effectiveRoot != root, an extra reference is held on effectiveRoot for + // the lifetime of the filesystem. + effectiveRoot *kernfs.Dentry // tasksMu serializes task membership changes across all cgroups within a // filesystem. @@ -229,6 +238,9 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt fs := vfsfs.Impl().(*filesystem) ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: mounting new view to hierarchy %v", fs.hierarchyID) fs.root.IncRef() + if fs.effectiveRoot != fs.root { + fs.effectiveRoot.IncRef() + } return vfsfs, fs.root.VFSDentry(), nil } @@ -245,8 +257,8 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt var defaults map[string]int64 if opts.InternalData != nil { - ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults) defaults = opts.InternalData.(*InternalData).DefaultControlValues + ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: default control values: %v", defaults) } for _, ty := range wantControllers { @@ -286,6 +298,14 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt var rootD kernfs.Dentry rootD.InitRoot(&fs.Filesystem, root) fs.root = &rootD + fs.effectiveRoot = fs.root + + if err := fs.prepareInitialCgroup(ctx, vfsObj, opts); err != nil { + ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: failed to prepare initial cgroup: %v", err) + rootD.DecRef(ctx) + fs.VFSFilesystem().DecRef(ctx) + return nil, nil, err + } // Register controllers. The registry may be modified concurrently, so if we // get an error, we raced with someone else who registered the same @@ -303,10 +323,47 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt return fs.VFSFilesystem(), rootD.VFSDentry(), nil } +// prepareInitialCgroup creates the initial cgroup according to opts. An initial +// cgroup is optional, and if not specified, this function is a no-op. +func (fs *filesystem) prepareInitialCgroup(ctx context.Context, vfsObj *vfs.VirtualFilesystem, opts vfs.GetFilesystemOptions) error { + if opts.InternalData == nil { + return nil + } + initPathStr := opts.InternalData.(*InternalData).InitialCgroupPath + if initPathStr == "" { + return nil + } + ctx.Debugf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path: %v", initPathStr) + initPath := fspath.Parse(initPathStr) + if !initPath.Absolute || !initPath.HasComponents() { + ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup path invalid: %+v", initPath) + return linuxerr.EINVAL + } + + // Have initial cgroup target, create the tree. + cgDir := fs.root.Inode().(*cgroupInode) + for pit := initPath.Begin; pit.Ok(); pit = pit.Next() { + cgDirI, err := cgDir.NewDir(ctx, pit.String(), vfs.MkdirOptions{}) + if err != nil { + return err + } + cgDir = cgDirI.(*cgroupInode) + } + + // Walk to target dentry. + initDentry, err := fs.root.WalkDentryTree(ctx, vfsObj, initPath) + if err != nil { + ctx.Warningf("cgroupfs.FilesystemType.GetFilesystem: initial cgroup dentry not found: %v", err) + return linuxerr.ENOENT + } + fs.effectiveRoot = initDentry // Reference from WalkDentryTree transferred here. + return nil +} + func (fs *filesystem) rootCgroup() kernel.Cgroup { return kernel.Cgroup{ - Dentry: fs.root, - CgroupImpl: fs.root.Inode().(kernel.CgroupImpl), + Dentry: fs.effectiveRoot, + CgroupImpl: fs.effectiveRoot.Inode().(kernel.CgroupImpl), } } @@ -320,6 +377,10 @@ func (fs *filesystem) Release(ctx context.Context) { r.Unregister(fs.hierarchyID) } + if fs.root != fs.effectiveRoot { + fs.effectiveRoot.DecRef(ctx) + } + fs.Filesystem.VFSFilesystem().VirtualFilesystem().PutAnonBlockDevMinor(fs.devMinor) fs.Filesystem.Release(ctx) } @@ -346,15 +407,18 @@ func (*implStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error // // +stateify savable type dir struct { - dirRefs + kernfs.InodeNoopRefCount kernfs.InodeAlwaysValid kernfs.InodeAttrs kernfs.InodeNotSymlink - kernfs.InodeDirectoryNoNewChildren // TODO(b/183137098): Implement mkdir. + kernfs.InodeDirectoryNoNewChildren kernfs.OrderedChildren implStatFS locks vfs.FileLocks + + fs *filesystem // Immutable. + cgi *cgroupInode // Immutable. } // Keep implements kernfs.Inode.Keep. @@ -378,9 +442,100 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry return fd.VFSFileDescription(), nil } -// DecRef implements kernfs.Inode.DecRef. -func (d *dir) DecRef(ctx context.Context) { - d.dirRefs.DecRef(func() { d.Destroy(ctx) }) +// NewDir implements kernfs.Inode.NewDir. +func (d *dir) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) { + // "Do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable." + // -- Linux, kernel/cgroup.c:cgroup_mkdir(). + if strings.Contains(name, "\n") { + return nil, linuxerr.EINVAL + } + return d.OrderedChildren.Inserter(name, func() kernfs.Inode { + d.IncLinks(1) + return d.fs.newCgroupInode(ctx, auth.CredentialsFromContext(ctx)) + }) +} + +// Rename implements kernfs.Inode.Rename. Cgroupfs only allows renaming of +// cgroup directories, and the rename may only change the name within the same +// parent. See linux, kernel/cgroup.c:cgroup_rename(). +func (d *dir) Rename(ctx context.Context, oldname, newname string, child, dst kernfs.Inode) error { + if _, ok := child.(*cgroupInode); !ok { + // Not a cgroup directory. Control files are backed by different types. + return linuxerr.ENOTDIR + } + + dstCGInode, ok := dst.(*cgroupInode) + if !ok { + // Not a cgroup inode, so definitely can't be *this* inode. + return linuxerr.EIO + } + // Note: We're intentionally comparing addresses, since two different dirs + // could plausibly be identical in memory, but would occupy different + // locations in memory. + if d != &dstCGInode.dir { + // Destination dir is a different cgroup inode. Cross directory renames + // aren't allowed. + return linuxerr.EIO + } + + // Rename moves oldname to newname within d. Proceed. + return d.OrderedChildren.Rename(ctx, oldname, newname, child, dst) +} + +// Unlink implements kernfs.Inode.Unlink. Cgroupfs disallows unlink, as the only +// files in the filesystem are control files, which can't be deleted. +func (d *dir) Unlink(ctx context.Context, name string, child kernfs.Inode) error { + return linuxerr.EPERM +} + +// hasChildrenLocked returns whether the cgroup dir contains any objects that +// prevent it from being deleted. +func (d *dir) hasChildrenLocked() bool { + // Subdirs take a link on the parent, so checks if there are any direct + // children cgroups. Exclude the dir's self link and the link from ".". + if d.InodeAttrs.Links()-2 > 0 { + return true + } + return len(d.cgi.ts) > 0 +} + +// HasChildren implements kernfs.Inode.HasChildren. +// +// The empty check for a cgroupfs directory is unlike a regular directory since +// a cgroupfs directory will always have control files. A cgroupfs directory can +// be deleted if cgroup contains no tasks and has no sub-cgroups. +func (d *dir) HasChildren() bool { + d.fs.tasksMu.RLock() + defer d.fs.tasksMu.RUnlock() + return d.hasChildrenLocked() +} + +// RmDir implements kernfs.Inode.RmDir. +func (d *dir) RmDir(ctx context.Context, name string, child kernfs.Inode) error { + // Unlike a normal directory, we need to recheck if d is empty again, since + // vfs/kernfs can't stop tasks from entering or leaving the cgroup. + d.fs.tasksMu.RLock() + defer d.fs.tasksMu.RUnlock() + + cgi, ok := child.(*cgroupInode) + if !ok { + return linuxerr.ENOTDIR + } + if cgi.dir.hasChildrenLocked() { + return linuxerr.ENOTEMPTY + } + + // Disallow deletion of the effective root cgroup. + if cgi == d.fs.effectiveRoot.Inode().(*cgroupInode) { + ctx.Warningf("Cannot delete initial cgroup for new tasks %q", d.fs.effectiveRoot.FSLocalPath()) + return linuxerr.EBUSY + } + + err := d.OrderedChildren.RmDir(ctx, name, child) + if err == nil { + d.InodeAttrs.DecLinks() + } + return err } // controllerFile represents a generic control file that appears within a cgroup diff --git a/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go b/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go index 3142ab6f8..aa40bb193 100644 --- a/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go +++ b/pkg/sentry/fsimpl/cgroupfs/cgroupfs_state_autogen.go @@ -41,7 +41,6 @@ func (c *cgroupInode) StateTypeName() string { func (c *cgroupInode) StateFields() []string { return []string{ "dir", - "fs", "ts", } } @@ -52,8 +51,7 @@ func (c *cgroupInode) beforeSave() {} func (c *cgroupInode) StateSave(stateSinkObject state.Sink) { c.beforeSave() stateSinkObject.Save(0, &c.dir) - stateSinkObject.Save(1, &c.fs) - stateSinkObject.Save(2, &c.ts) + stateSinkObject.Save(1, &c.ts) } func (c *cgroupInode) afterLoad() {} @@ -61,8 +59,7 @@ func (c *cgroupInode) afterLoad() {} // +checklocksignore func (c *cgroupInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &c.dir) - stateSourceObject.Load(1, &c.fs) - stateSourceObject.Load(2, &c.ts) + stateSourceObject.Load(1, &c.ts) } func (d *cgroupProcsData) StateTypeName() string { @@ -143,6 +140,7 @@ func (i *InternalData) StateTypeName() string { func (i *InternalData) StateFields() []string { return []string{ "DefaultControlValues", + "InitialCgroupPath", } } @@ -152,6 +150,7 @@ func (i *InternalData) beforeSave() {} func (i *InternalData) StateSave(stateSinkObject state.Sink) { i.beforeSave() stateSinkObject.Save(0, &i.DefaultControlValues) + stateSinkObject.Save(1, &i.InitialCgroupPath) } func (i *InternalData) afterLoad() {} @@ -159,6 +158,7 @@ func (i *InternalData) afterLoad() {} // +checklocksignore func (i *InternalData) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.DefaultControlValues) + stateSourceObject.Load(1, &i.InitialCgroupPath) } func (fs *filesystem) StateTypeName() string { @@ -174,6 +174,7 @@ func (fs *filesystem) StateFields() []string { "kcontrollers", "numCgroups", "root", + "effectiveRoot", } } @@ -189,6 +190,7 @@ func (fs *filesystem) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(4, &fs.kcontrollers) stateSinkObject.Save(5, &fs.numCgroups) stateSinkObject.Save(6, &fs.root) + stateSinkObject.Save(7, &fs.effectiveRoot) } func (fs *filesystem) afterLoad() {} @@ -202,6 +204,7 @@ func (fs *filesystem) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(4, &fs.kcontrollers) stateSourceObject.Load(5, &fs.numCgroups) stateSourceObject.Load(6, &fs.root) + stateSourceObject.Load(7, &fs.effectiveRoot) } func (i *implStatFS) StateTypeName() string { @@ -231,7 +234,7 @@ func (d *dir) StateTypeName() string { func (d *dir) StateFields() []string { return []string{ - "dirRefs", + "InodeNoopRefCount", "InodeAlwaysValid", "InodeAttrs", "InodeNotSymlink", @@ -239,6 +242,8 @@ func (d *dir) StateFields() []string { "OrderedChildren", "implStatFS", "locks", + "fs", + "cgi", } } @@ -247,7 +252,7 @@ func (d *dir) beforeSave() {} // +checklocksignore func (d *dir) StateSave(stateSinkObject state.Sink) { d.beforeSave() - stateSinkObject.Save(0, &d.dirRefs) + stateSinkObject.Save(0, &d.InodeNoopRefCount) stateSinkObject.Save(1, &d.InodeAlwaysValid) stateSinkObject.Save(2, &d.InodeAttrs) stateSinkObject.Save(3, &d.InodeNotSymlink) @@ -255,13 +260,15 @@ func (d *dir) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(5, &d.OrderedChildren) stateSinkObject.Save(6, &d.implStatFS) stateSinkObject.Save(7, &d.locks) + stateSinkObject.Save(8, &d.fs) + stateSinkObject.Save(9, &d.cgi) } func (d *dir) afterLoad() {} // +checklocksignore func (d *dir) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &d.dirRefs) + stateSourceObject.Load(0, &d.InodeNoopRefCount) stateSourceObject.Load(1, &d.InodeAlwaysValid) stateSourceObject.Load(2, &d.InodeAttrs) stateSourceObject.Load(3, &d.InodeNotSymlink) @@ -269,6 +276,8 @@ func (d *dir) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(5, &d.OrderedChildren) stateSourceObject.Load(6, &d.implStatFS) stateSourceObject.Load(7, &d.locks) + stateSourceObject.Load(8, &d.fs) + stateSourceObject.Load(9, &d.cgi) } func (c *controllerFile) StateTypeName() string { diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index a42fc79b4..b96dc9ef7 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -26,7 +26,6 @@ import ( ktime "gvisor.dev/gvisor/pkg/sentry/kernel/time" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // InodeNoopRefCount partially implements the Inode interface, specifically the @@ -234,6 +233,11 @@ func (a *InodeAttrs) Mode() linux.FileMode { return linux.FileMode(atomic.LoadUint32(&a.mode)) } +// Links returns the link count. +func (a *InodeAttrs) Links() uint32 { + return atomic.LoadUint32(&a.nlink) +} + // TouchAtime updates a.atime to the current time. func (a *InodeAttrs) TouchAtime(ctx context.Context, mnt *vfs.Mount) { if mnt.Flags.NoATime || mnt.ReadOnly() { @@ -289,7 +293,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut return linuxerr.EPERM } if opts.Stat.Mask&linux.STATX_SIZE != 0 && a.Mode().IsDir() { - return syserror.EISDIR + return linuxerr.EISDIR } if err := vfs.CheckSetStat(ctx, creds, &opts, a.Mode(), auth.KUID(atomic.LoadUint32(&a.uid)), auth.KGID(atomic.LoadUint32(&a.gid))); err != nil { return err @@ -475,7 +479,7 @@ func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error s, ok := o.set[name] if !ok { - return nil, syserror.ENOENT + return nil, linuxerr.ENOENT } s.inode.IncRef() // This ref is passed to the dentry upon creation via Init. @@ -502,6 +506,30 @@ func (o *OrderedChildren) Insert(name string, child Inode) error { return o.insert(name, child, false) } +// Inserter is like Insert, but obtains the child to insert by calling +// makeChild. makeChild is only called if the insert will succeed. This allows +// the caller to atomically check and insert a child without having to +// clean up the child on failure. +func (o *OrderedChildren) Inserter(name string, makeChild func() Inode) (Inode, error) { + o.mu.Lock() + defer o.mu.Unlock() + if _, ok := o.set[name]; ok { + return nil, linuxerr.EEXIST + } + + // Note: We must not fail after we call makeChild(). + + child := makeChild() + s := &slot{ + name: name, + inode: child, + static: false, + } + o.order.PushBack(s) + o.set[name] = s + return child, nil +} + // insert inserts child into o. // // Precondition: Caller must be holding a ref on child if static is true. @@ -559,7 +587,7 @@ func (o *OrderedChildren) replaceChildLocked(ctx context.Context, name string, n func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error { s, ok := o.set[name] if !ok { - return syserror.ENOENT + return linuxerr.ENOENT } if s.inode != child { panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! OrderedChild: %+v, kernfs: %+v", s.inode, child)) @@ -746,5 +774,5 @@ type InodeNoStatFS struct{} // StatFS implements Inode.StatFS. func (*InodeNoStatFS) StatFS(context.Context, *vfs.Filesystem) (linux.Statfs, error) { - return linux.Statfs{}, syserror.ENOSYS + return linux.Statfs{}, linuxerr.ENOSYS } diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 0e2867d49..90c8b75d1 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -66,6 +66,7 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" + "gvisor.dev/gvisor/pkg/syserror" ) // Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory @@ -542,6 +543,63 @@ func (d *Dentry) FSLocalPath() string { return b.String() } +// WalkDentryTree traverses p in the dentry tree for this filesystem. Note that +// this only traverses the dentry tree and is not a general path traversal. No +// symlinks and dynamic children are resolved, and no permission checks are +// performed. The caller is responsible for ensuring the returned Dentry exists +// for an appropriate lifetime. +// +// p is interpreted starting at d, and may be absolute or relative (absolute vs +// relative paths both refer to the same target here, since p is absolute from +// d). p may contain "." and "..", but will not allow traversal above d (similar +// to ".." at the root dentry). +// +// This is useful for filesystem internals, where the filesystem may not be +// mounted yet. For a mounted filesystem, use GetDentryAt. +func (d *Dentry) WalkDentryTree(ctx context.Context, vfsObj *vfs.VirtualFilesystem, p fspath.Path) (*Dentry, error) { + d.fs.mu.RLock() + defer d.fs.processDeferredDecRefs(ctx) + defer d.fs.mu.RUnlock() + + target := d + + for pit := p.Begin; pit.Ok(); pit = pit.Next() { + pc := pit.String() + + switch { + case target == nil: + return nil, syserror.ENOENT + case pc == ".": + // No-op, consume component and continue. + case pc == "..": + if target == d { + // Don't let .. traverse above the start point of the walk. + continue + } + target = target.parent + // Parent doesn't need revalidation since we revalidated it on the + // way to the child, and we're still holding fs.mu. + default: + var err error + + d.dirMu.Lock() + target, err = d.fs.revalidateChildLocked(ctx, vfsObj, target, pc, target.children[pc]) + d.dirMu.Unlock() + + if err != nil { + return nil, err + } + } + } + + if target == nil { + return nil, syserror.ENOENT + } + + target.IncRef() + return target, nil +} + // The Inode interface maps filesystem-level operations that operate on paths to // equivalent operations on specific filesystem nodes. // @@ -667,12 +725,15 @@ type inodeDirectory interface { // RmDir removes an empty child directory from this directory // inode. Implementations must update the parent directory's link count, // if required. Implementations are not responsible for checking that child - // is a directory, checking for an empty directory. + // is a directory, or checking for an empty directory. RmDir(ctx context.Context, name string, child Inode) error // Rename is called on the source directory containing an inode being - // renamed. child should point to the resolved child in the source - // directory. + // renamed. child points to the resolved child in the source directory. + // dstDir is guaranteed to be a directory inode. + // + // On a successful call to Rename, the caller updates the dentry tree to + // reflect the name change. // // Precondition: Caller must serialize concurrent calls to Rename. Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error diff --git a/pkg/sentry/kernel/cgroup.go b/pkg/sentry/kernel/cgroup.go index c93ef6ac1..a0e291f58 100644 --- a/pkg/sentry/kernel/cgroup.go +++ b/pkg/sentry/kernel/cgroup.go @@ -196,6 +196,7 @@ func (r *CgroupRegistry) FindHierarchy(ctypes []CgroupControllerType) *vfs.Files // uniqueness of controllers enforced by Register, drop the // dying hierarchy now. The eventual unregister by the FS // teardown will become a no-op. + r.unregisterLocked(h.id) return nil } return h.fs |