diff options
author | gVisor bot <gvisor-bot@google.com> | 2020-10-12 23:33:20 +0000 |
---|---|---|
committer | gVisor bot <gvisor-bot@google.com> | 2020-10-12 23:33:20 +0000 |
commit | 213e252ab30dc0646d3de409dedd58f5c55e05af (patch) | |
tree | 42375535ec08b69adc388d81f22cba5a92bb1cd2 /pkg/sentry/fsimpl/kernfs | |
parent | 1b455257f0fb4494bfd10802d8011a073a0672e4 (diff) | |
parent | e7bbe70f79aa9308c2eb54b057ee5779b22f478e (diff) |
Merge release-20200928.0-94-ge7bbe70f7 (automated)
Diffstat (limited to 'pkg/sentry/fsimpl/kernfs')
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/fd_impl_util.go | 4 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/filesystem.go | 192 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/inode_impl_util.go | 235 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/kernfs.go | 173 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go | 207 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/symlink.go | 7 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/synthetic_directory.go | 40 | ||||
-rw-r--r-- | pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go | 118 |
8 files changed, 608 insertions, 368 deletions
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 0a4cd4057..abf1905d6 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -201,12 +201,12 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent // these. childIdx := fd.off - 2 for it := fd.children.nthLocked(childIdx); it != nil; it = it.Next() { - stat, err := it.Dentry.inode.Stat(ctx, fd.filesystem(), opts) + stat, err := it.inode.Stat(ctx, fd.filesystem(), opts) if err != nil { return err } dirent := vfs.Dirent{ - Name: it.Name, + Name: it.name, Type: linux.FileMode(stat.Mode).DirentType(), Ino: stat.Ino, NextOff: fd.off + 1, diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 5cc1c4281..6426a55f6 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -89,7 +89,7 @@ afterSymlink: } if targetVD.Ok() { err := rp.HandleJump(targetVD) - targetVD.DecRef(ctx) + fs.deferDecRefVD(ctx, targetVD) if err != nil { return nil, err } @@ -120,22 +120,33 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir // Cached dentry exists, revalidate. if !child.inode.Valid(ctx) { delete(parent.children, name) - vfsObj.InvalidateDentry(ctx, &child.vfsd) - fs.deferDecRef(child) // Reference from Lookup. + if child.inode.Keep() { + // Drop the ref owned by kernfs. + fs.deferDecRef(child) + } + vfsObj.InvalidateDentry(ctx, child.VFSDentry()) child = nil } } if child == nil { // Dentry isn't cached; it either doesn't exist or failed revalidation. // Attempt to resolve it via Lookup. - c, err := parent.inode.Lookup(ctx, name) + childInode, err := parent.inode.Lookup(ctx, name) if err != nil { return nil, err } - // Reference on c (provided by Lookup) will be dropped when the dentry - // fails validation. - parent.InsertChildLocked(name, c) - child = c + var newChild Dentry + newChild.Init(fs, childInode) // childInode's ref is transferred to newChild. + parent.insertChildLocked(name, &newChild) + child = &newChild + + // Drop the ref on newChild. This will cause the dentry to get pruned + // from the dentry tree by the end of current filesystem operation + // (before returning to the VFS layer) if another ref is not picked on + // this dentry. + if !childInode.Keep() { + fs.deferDecRef(&newChild) + } } return child, nil } @@ -191,7 +202,7 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving } // checkCreateLocked checks that a file named rp.Component() may be created in -// directory parentVFSD, then returns rp.Component(). +// directory parent, then returns rp.Component(). // // Preconditions: // * Filesystem.mu must be locked for at least reading. @@ -298,9 +309,9 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EEXIST } fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -324,11 +335,13 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EPERM } - child, err := parent.inode.NewLink(ctx, pc, d.inode) + childI, err := parent.inode.NewLink(ctx, pc, d.inode) if err != nil { return err } - parent.InsertChildLocked(pc, child) + var child Dentry + child.Init(fs, childI) + parent.insertChildLocked(pc, &child) return nil } @@ -338,9 +351,9 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return syserror.EEXIST } fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -355,14 +368,16 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return err } defer rp.Mount().EndWrite() - child, err := parent.inode.NewDir(ctx, pc, opts) + childI, err := parent.inode.NewDir(ctx, pc, opts) if err != nil { if !opts.ForSyntheticMountpoint || err == syserror.EEXIST { return err } - child = newSyntheticDirectory(rp.Credentials(), opts.Mode) + childI = newSyntheticDirectory(rp.Credentials(), opts.Mode) } - parent.InsertChildLocked(pc, child) + var child Dentry + child.Init(fs, childI) + parent.insertChildLocked(pc, &child) return nil } @@ -372,9 +387,9 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return syserror.EEXIST } fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -389,11 +404,13 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return err } defer rp.Mount().EndWrite() - newD, err := parent.inode.NewNode(ctx, pc, opts) + newI, err := parent.inode.NewNode(ctx, pc, opts) if err != nil { return err } - parent.InsertChildLocked(pc, newD) + var newD Dentry + newD.Init(fs, newI) + parent.insertChildLocked(pc, &newD) return nil } @@ -409,22 +426,23 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // Do not create new file. if opts.Flags&linux.O_CREAT == 0 { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) d, err := fs.walkExistingLocked(ctx, rp) if err != nil { fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) return nil, err } if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) return nil, err } - d.inode.IncRef() - defer d.inode.DecRef(ctx) + // Open may block so we need to unlock fs.mu. IncRef d to prevent + // its destruction while fs.mu is unlocked. + d.IncRef() fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) - return d.inode.Open(ctx, rp, d, opts) + fd, err := d.inode.Open(ctx, rp, d, opts) + d.DecRef(ctx) + return fd, err } // May create new file. @@ -438,6 +456,10 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf unlocked = true } } + // Process all to-be-decref'd dentries at the end at once. + // Since we defer unlock() AFTER this, fs.mu is guaranteed to be unlocked + // when this is executed. + defer fs.processDeferredDecRefs(ctx) defer unlock() if rp.Done() { if rp.MustBeDir() { @@ -449,14 +471,16 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { return nil, err } - d.inode.IncRef() - defer d.inode.DecRef(ctx) + // Open may block so we need to unlock fs.mu. IncRef d to prevent + // its destruction while fs.mu is unlocked. + d.IncRef() unlock() - return d.inode.Open(ctx, rp, d, opts) + fd, err := d.inode.Open(ctx, rp, d, opts) + d.DecRef(ctx) + return fd, err } afterTrailingSymlink: parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return nil, err } @@ -487,18 +511,23 @@ afterTrailingSymlink: } defer rp.Mount().EndWrite() // Create and open the child. - child, err := parent.inode.NewFile(ctx, pc, opts) + childI, err := parent.inode.NewFile(ctx, pc, opts) if err != nil { return nil, err } + var child Dentry + child.Init(fs, childI) // FIXME(gvisor.dev/issue/1193): Race between checking existence with - // fs.stepExistingLocked and parent.InsertChild. If possible, we should hold + // fs.stepExistingLocked and parent.insertChild. If possible, we should hold // dirMu from one to the other. - parent.InsertChild(pc, child) - child.inode.IncRef() - defer child.inode.DecRef(ctx) + parent.insertChild(pc, &child) + // Open may block so we need to unlock fs.mu. IncRef child to prevent + // its destruction while fs.mu is unlocked. + child.IncRef() unlock() - return child.inode.Open(ctx, rp, child, opts) + fd, err := child.inode.Open(ctx, rp, &child, opts) + child.DecRef(ctx) + return fd, err } if err != nil { return nil, err @@ -514,7 +543,7 @@ afterTrailingSymlink: } if targetVD.Ok() { err := rp.HandleJump(targetVD) - targetVD.DecRef(ctx) + fs.deferDecRefVD(ctx, targetVD) if err != nil { return nil, err } @@ -530,18 +559,21 @@ afterTrailingSymlink: if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { return nil, err } - child.inode.IncRef() - defer child.inode.DecRef(ctx) + // Open may block so we need to unlock fs.mu. IncRef child to prevent + // its destruction while fs.mu is unlocked. + child.IncRef() unlock() - return child.inode.Open(ctx, rp, child, opts) + fd, err := child.inode.Open(ctx, rp, child, opts) + child.DecRef(ctx) + return fd, err } // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return "", err } @@ -560,7 +592,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 fs.mu.Lock() - defer fs.processDeferredDecRefsLocked(ctx) + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() // Resolve the destination directory first to verify that it's on this @@ -632,24 +664,27 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil { return err } - replaced, err := srcDir.inode.Rename(ctx, src.name, pc, src, dstDir) + err = srcDir.inode.Rename(ctx, src.name, pc, src.inode, dstDir.inode) if err != nil { virtfs.AbortRenameDentry(srcVFSD, dstVFSD) return err } delete(srcDir.children, src.name) if srcDir != dstDir { - fs.deferDecRef(srcDir) - dstDir.IncRef() + fs.deferDecRef(srcDir) // child (src) drops ref on old parent. + dstDir.IncRef() // child (src) takes a ref on the new parent. } src.parent = dstDir src.name = pc if dstDir.children == nil { dstDir.children = make(map[string]*Dentry) } + replaced := dstDir.children[pc] dstDir.children[pc] = src var replaceVFSD *vfs.Dentry if replaced != nil { + // deferDecRef so that fs.mu and dstDir.mu are unlocked by then. + fs.deferDecRef(replaced) replaceVFSD = replaced.VFSDentry() } virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) @@ -659,10 +694,10 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa // RmdirAt implements vfs.FilesystemImpl.RmdirAt. func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -691,10 +726,13 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } - if err := parentDentry.inode.RmDir(ctx, d.name, d); err != nil { + if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil { virtfs.AbortDeleteDentry(vfsd) return err } + delete(parentDentry.children, d.name) + // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then. + fs.deferDecRef(d) virtfs.CommitDeleteDentry(ctx, vfsd) return nil } @@ -702,9 +740,9 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error // SetStatAt implements vfs.FilesystemImpl.SetStatAt. func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return err } @@ -717,9 +755,9 @@ func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts // StatAt implements vfs.FilesystemImpl.StatAt. func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return linux.Statx{}, err } @@ -729,9 +767,9 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // StatFSAt implements vfs.FilesystemImpl.StatFSAt. func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return linux.Statfs{}, err } @@ -744,9 +782,9 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ return syserror.EEXIST } fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -761,21 +799,23 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ return err } defer rp.Mount().EndWrite() - child, err := parent.inode.NewSymlink(ctx, pc, target) + childI, err := parent.inode.NewSymlink(ctx, pc, target) if err != nil { return err } - parent.InsertChildLocked(pc, child) + var child Dentry + child.Init(fs, childI) + parent.insertChildLocked(pc, &child) return nil } // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -799,10 +839,13 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { return err } - if err := parentDentry.inode.Unlink(ctx, d.name, d); err != nil { + if err := parentDentry.inode.Unlink(ctx, d.name, d.inode); err != nil { virtfs.AbortDeleteDentry(vfsd) return err } + delete(parentDentry.children, d.name) + // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then. + fs.deferDecRef(d) virtfs.CommitDeleteDentry(ctx, vfsd) return nil } @@ -810,9 +853,9 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return nil, err } @@ -825,9 +868,9 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return nil, err } @@ -838,9 +881,9 @@ func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return "", err } @@ -851,9 +894,9 @@ func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return err } @@ -864,9 +907,9 @@ func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return err } @@ -880,3 +923,16 @@ func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe defer fs.mu.RUnlock() return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b) } + +func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) { + if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs { + // The following is equivalent to vd.DecRef(ctx). This is needed + // because if d belongs to this filesystem, we can not DecRef it right + // away as we may be holding fs.mu. d.DecRef may acquire fs.mu. So we + // defer the DecRef to when locks are dropped. + vd.Mount().DecRef(ctx) + fs.deferDecRef(d) + } else { + vd.DecRef(ctx) + } +} diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 49210e748..122b10591 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -34,6 +34,7 @@ import ( // // +stateify savable type InodeNoopRefCount struct { + InodeTemporary } // IncRef implements Inode.IncRef. @@ -57,27 +58,27 @@ func (InodeNoopRefCount) TryIncRef() bool { type InodeDirectoryNoNewChildren struct{} // NewFile implements Inode.NewFile. -func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) { return nil, syserror.EPERM } // NewDir implements Inode.NewDir. -func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) { return nil, syserror.EPERM } // NewLink implements Inode.NewLink. -func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (Inode, error) { return nil, syserror.EPERM } // NewSymlink implements Inode.NewSymlink. -func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (Inode, error) { return nil, syserror.EPERM } // NewNode implements Inode.NewNode. -func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) { return nil, syserror.EPERM } @@ -88,6 +89,7 @@ func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOpt // // +stateify savable type InodeNotDirectory struct { + InodeAlwaysValid } // HasChildren implements Inode.HasChildren. @@ -96,47 +98,47 @@ func (InodeNotDirectory) HasChildren() bool { } // NewFile implements Inode.NewFile. -func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) { +func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) { panic("NewFile called on non-directory inode") } // NewDir implements Inode.NewDir. -func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) { +func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) { panic("NewDir called on non-directory inode") } // NewLink implements Inode.NewLinkink. -func (InodeNotDirectory) NewLink(context.Context, string, Inode) (*Dentry, error) { +func (InodeNotDirectory) NewLink(context.Context, string, Inode) (Inode, error) { panic("NewLink called on non-directory inode") } // NewSymlink implements Inode.NewSymlink. -func (InodeNotDirectory) NewSymlink(context.Context, string, string) (*Dentry, error) { +func (InodeNotDirectory) NewSymlink(context.Context, string, string) (Inode, error) { panic("NewSymlink called on non-directory inode") } // NewNode implements Inode.NewNode. -func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) { +func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) { panic("NewNode called on non-directory inode") } // Unlink implements Inode.Unlink. -func (InodeNotDirectory) Unlink(context.Context, string, *Dentry) error { +func (InodeNotDirectory) Unlink(context.Context, string, Inode) error { panic("Unlink called on non-directory inode") } // RmDir implements Inode.RmDir. -func (InodeNotDirectory) RmDir(context.Context, string, *Dentry) error { +func (InodeNotDirectory) RmDir(context.Context, string, Inode) error { panic("RmDir called on non-directory inode") } // Rename implements Inode.Rename. -func (InodeNotDirectory) Rename(context.Context, string, string, *Dentry, *Dentry) (*Dentry, error) { +func (InodeNotDirectory) Rename(context.Context, string, string, Inode, Inode) error { panic("Rename called on non-directory inode") } // Lookup implements Inode.Lookup. -func (InodeNotDirectory) Lookup(ctx context.Context, name string) (*Dentry, error) { +func (InodeNotDirectory) Lookup(ctx context.Context, name string) (Inode, error) { panic("Lookup called on non-directory inode") } @@ -145,35 +147,6 @@ func (InodeNotDirectory) IterDirents(ctx context.Context, callback vfs.IterDiren panic("IterDirents called on non-directory inode") } -// Valid implements Inode.Valid. -func (InodeNotDirectory) Valid(context.Context) bool { - return true -} - -// InodeNoDynamicLookup partially implements the Inode interface, specifically -// the inodeDynamicLookup sub interface. Directory inodes that do not support -// dymanic entries (i.e. entries that are not "hashed" into the -// vfs.Dentry.children) can embed this to provide no-op implementations for -// functions related to dynamic entries. -// -// +stateify savable -type InodeNoDynamicLookup struct{} - -// Lookup implements Inode.Lookup. -func (InodeNoDynamicLookup) Lookup(ctx context.Context, name string) (*Dentry, error) { - return nil, syserror.ENOENT -} - -// IterDirents implements Inode.IterDirents. -func (InodeNoDynamicLookup) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { - return offset, nil -} - -// Valid implements Inode.Valid. -func (InodeNoDynamicLookup) Valid(ctx context.Context) bool { - return true -} - // InodeNotSymlink partially implements the Inode interface, specifically the // inodeSymlink sub interface. All inodes that are not symlinks may embed this // to return the appropriate errors from symlink-related functions. @@ -273,7 +246,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut // SetInodeStat sets the corresponding attributes from opts to InodeAttrs. // This function can be used by other kernfs-based filesystem implementation to -// sets the unexported attributes into kernfs.InodeAttrs. +// sets the unexported attributes into InodeAttrs. func (a *InodeAttrs) SetInodeStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { if opts.Stat.Mask == 0 { return nil @@ -344,8 +317,9 @@ func (a *InodeAttrs) DecLinks() { // +stateify savable type slot struct { - Name string - Dentry *Dentry + name string + inode Inode + static bool slotEntry } @@ -361,10 +335,18 @@ type OrderedChildrenOptions struct { } // OrderedChildren partially implements the Inode interface. OrderedChildren can -// be embedded in directory inodes to keep track of the children in the +// be embedded in directory inodes to keep track of children in the // directory, and can then be used to implement a generic directory FD -- see -// GenericDirectoryFD. OrderedChildren is not compatible with dynamic -// directories. +// GenericDirectoryFD. +// +// OrderedChildren can represent a node in an Inode tree. The children inodes +// might be directories themselves using OrderedChildren; hence extending the +// tree. The parent inode (OrderedChildren user) holds a ref on all its static +// children. This lets the static inodes outlive their associated dentry. +// While the dentry might have to be regenerated via a Lookup() call, we can +// keep reusing the same static inode. These static children inodes are finally +// DecRef'd when this directory inode is being destroyed. This makes +// OrderedChildren suitable for static directory entries as well. // // Must be initialize with Init before first use. // @@ -388,33 +370,63 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) { // Destroy clears the children stored in o. It should be called by structs // embedding OrderedChildren upon destruction, i.e. when their reference count // reaches zero. -func (o *OrderedChildren) Destroy() { +func (o *OrderedChildren) Destroy(ctx context.Context) { o.mu.Lock() defer o.mu.Unlock() + // Drop the ref that o owns on the static inodes it holds. + for _, s := range o.set { + if s.static { + s.inode.DecRef(ctx) + } + } o.order.Reset() o.set = nil } -// Populate inserts children into this OrderedChildren, and d's dentry -// cache. Populate returns the number of directories inserted, which the caller +// Populate inserts static children into this OrderedChildren. +// Populate returns the number of directories inserted, which the caller // may use to update the link count for the parent directory. // -// Precondition: d must represent a directory inode. children must not contain -// any conflicting entries already in o. -func (o *OrderedChildren) Populate(d *Dentry, children map[string]*Dentry) uint32 { +// Precondition: +// * d must represent a directory inode. +// * children must not contain any conflicting entries already in o. +// * Caller must hold a reference on all inodes passed. +// +// Postcondition: Caller's references on inodes are transferred to o. +func (o *OrderedChildren) Populate(children map[string]Inode) uint32 { var links uint32 for name, child := range children { - if child.isDir() { + if child.Mode().IsDir() { links++ } - if err := o.Insert(name, child); err != nil { - panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v) into %+v", name, child, d)) + if err := o.insert(name, child, true); err != nil { + panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v)", name, child)) } - d.InsertChild(name, child) } return links } +// Lookup implements Inode.Lookup. +func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error) { + o.mu.RLock() + defer o.mu.RUnlock() + + s, ok := o.set[name] + if !ok { + return nil, syserror.ENOENT + } + + s.inode.IncRef() // This ref is passed to the dentry upon creation via Init. + return s.inode, nil +} + +// IterDirents implements Inode.IterDirents. +func (o *OrderedChildren) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) { + // All entries from OrderedChildren have already been handled in + // GenericDirectoryFD.IterDirents. + return offset, nil +} + // HasChildren implements Inode.HasChildren. func (o *OrderedChildren) HasChildren() bool { o.mu.RLock() @@ -422,17 +434,27 @@ func (o *OrderedChildren) HasChildren() bool { return len(o.set) > 0 } -// Insert inserts child into o. This ignores the writability of o, as this is -// not part of the vfs.FilesystemImpl interface, and is a lower-level operation. -func (o *OrderedChildren) Insert(name string, child *Dentry) error { +// Insert inserts a dynamic child into o. This ignores the writability of o, as +// this is not part of the vfs.FilesystemImpl interface, and is a lower-level operation. +func (o *OrderedChildren) Insert(name string, child Inode) error { + return o.insert(name, child, false) +} + +// insert inserts child into o. +// +// Precondition: Caller must be holding a ref on child if static is true. +// +// Postcondition: Caller's ref on child is transferred to o if static is true. +func (o *OrderedChildren) insert(name string, child Inode, static bool) error { o.mu.Lock() defer o.mu.Unlock() if _, ok := o.set[name]; ok { return syserror.EEXIST } s := &slot{ - Name: name, - Dentry: child, + name: name, + inode: child, + static: static, } o.order.PushBack(s) o.set[name] = s @@ -442,44 +464,49 @@ func (o *OrderedChildren) Insert(name string, child *Dentry) error { // Precondition: caller must hold o.mu for writing. func (o *OrderedChildren) removeLocked(name string) { if s, ok := o.set[name]; ok { + if s.static { + panic(fmt.Sprintf("removeLocked called on a static inode: %v", s.inode)) + } delete(o.set, name) o.order.Remove(s) } } // Precondition: caller must hold o.mu for writing. -func (o *OrderedChildren) replaceChildLocked(name string, new *Dentry) *Dentry { +func (o *OrderedChildren) replaceChildLocked(ctx context.Context, name string, newI Inode) { if s, ok := o.set[name]; ok { + if s.static { + panic(fmt.Sprintf("replacing a static inode: %v", s.inode)) + } + // Existing slot with given name, simply replace the dentry. - var old *Dentry - old, s.Dentry = s.Dentry, new - return old + s.inode = newI } // No existing slot with given name, create and hash new slot. s := &slot{ - Name: name, - Dentry: new, + name: name, + inode: newI, + static: false, } o.order.PushBack(s) o.set[name] = s - return nil } // Precondition: caller must hold o.mu for reading or writing. -func (o *OrderedChildren) checkExistingLocked(name string, child *Dentry) error { +func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error { s, ok := o.set[name] if !ok { return syserror.ENOENT } - if s.Dentry != child { - panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! OrderedChild: %+v, vfs: %+v", s.Dentry, child)) + if s.inode != child { + panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! OrderedChild: %+v, kernfs: %+v", s.inode, child)) } return nil } // Unlink implements Inode.Unlink. -func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry) error { +func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode) error { if !o.writable { return syserror.EPERM } @@ -494,8 +521,8 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry return nil } -// Rmdir implements Inode.Rmdir. -func (o *OrderedChildren) RmDir(ctx context.Context, name string, child *Dentry) error { +// RmDir implements Inode.RmDir. +func (o *OrderedChildren) RmDir(ctx context.Context, name string, child Inode) error { // We're not responsible for checking that child is a directory, that it's // empty, or updating any link counts; so this is the same as unlink. return o.Unlink(ctx, name, child) @@ -517,13 +544,13 @@ func (renameAcrossDifferentImplementationsError) Error() string { // that will support Rename. // // Postcondition: reference on any replaced dentry transferred to caller. -func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (*Dentry, error) { - dst, ok := dstDir.inode.(interface{}).(*OrderedChildren) +func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error { + dst, ok := dstDir.(interface{}).(*OrderedChildren) if !ok { - return nil, renameAcrossDifferentImplementationsError{} + return renameAcrossDifferentImplementationsError{} } if !o.writable || !dst.writable { - return nil, syserror.EPERM + return syserror.EPERM } // Note: There's a potential deadlock below if concurrent calls to Rename // refer to the same src and dst directories in reverse. We avoid any @@ -536,12 +563,12 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c defer dst.mu.Unlock() } if err := o.checkExistingLocked(oldname, child); err != nil { - return nil, err + return err } // TODO(gvisor.dev/issue/3027): Check sticky bit before removing. - replaced := dst.replaceChildLocked(newname, child) - return replaced, nil + dst.replaceChildLocked(ctx, newname, child) + return nil } // nthLocked returns an iterator to the nth child tracked by this object. The @@ -576,11 +603,12 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, // // +stateify savable type StaticDirectory struct { + InodeAlwaysValid InodeAttrs InodeDirectoryNoNewChildren - InodeNoDynamicLookup InodeNoStatFS InodeNotSymlink + InodeTemporary OrderedChildren StaticDirectoryRefs @@ -591,19 +619,16 @@ type StaticDirectory struct { var _ Inode = (*StaticDirectory)(nil) // NewStaticDir creates a new static directory and returns its dentry. -func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*Dentry, fdOpts GenericDirectoryFDOptions) *Dentry { +func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]Inode, fdOpts GenericDirectoryFDOptions) Inode { inode := &StaticDirectory{} inode.Init(creds, devMajor, devMinor, ino, perm, fdOpts) inode.EnableLeakCheck() - dentry := &Dentry{} - dentry.Init(inode) - inode.OrderedChildren.Init(OrderedChildrenOptions{}) - links := inode.OrderedChildren.Populate(dentry, children) + links := inode.OrderedChildren.Populate(children) inode.IncLinks(links) - return dentry + return inode } // Init initializes StaticDirectory. @@ -615,7 +640,7 @@ func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint3 s.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeDirectory|perm) } -// Open implements kernfs.Inode.Open. +// Open implements Inode.Open. func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { fd, err := NewGenericDirectoryFD(rp.Mount(), d, &s.OrderedChildren, &s.locks, &opts, s.fdOpts) if err != nil { @@ -624,26 +649,36 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *De return fd.VFSFileDescription(), nil } -// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. +// SetStat implements Inode.SetStat not allowing inode attributes to be changed. func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { return syserror.EPERM } -// DecRef implements kernfs.Inode.DecRef. -func (s *StaticDirectory) DecRef(context.Context) { - s.StaticDirectoryRefs.DecRef(s.Destroy) +// DecRef implements Inode.DecRef. +func (s *StaticDirectory) DecRef(ctx context.Context) { + s.StaticDirectoryRefs.DecRef(func() { s.Destroy(ctx) }) } -// AlwaysValid partially implements kernfs.inodeDynamicLookup. +// InodeAlwaysValid partially implements Inode. // // +stateify savable -type AlwaysValid struct{} +type InodeAlwaysValid struct{} -// Valid implements kernfs.inodeDynamicLookup.Valid. -func (*AlwaysValid) Valid(context.Context) bool { +// Valid implements Inode.Valid. +func (*InodeAlwaysValid) Valid(context.Context) bool { return true } +// InodeTemporary partially implements Inode. +// +// +stateify savable +type InodeTemporary struct{} + +// Keep implements Inode.Keep. +func (*InodeTemporary) Keep() bool { + return false +} + // InodeNoStatFS partially implements the Inode interface, where the client // filesystem doesn't support statfs(2). // diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 6d3d79333..606081e68 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -29,12 +29,16 @@ // // Reference Model: // -// Kernfs dentries represents named pointers to inodes. Dentries and inodes have +// Kernfs dentries represents named pointers to inodes. Kernfs is solely +// reponsible for maintaining and modifying its dentry tree; inode +// implementations can not access the tree. Dentries and inodes have // independent lifetimes and reference counts. A child dentry unconditionally // holds a reference on its parent directory's dentry. A dentry also holds a -// reference on the inode it points to. Multiple dentries can point to the same -// inode (for example, in the case of hardlinks). File descriptors hold a -// reference to the dentry they're opened on. +// reference on the inode it points to (although that might not be the only +// reference on the inode). Due to this inodes can outlive the dentries that +// point to them. Multiple dentries can point to the same inode (for example, +// in the case of hardlinks). File descriptors hold a reference to the dentry +// they're opened on. // // Dentries are guaranteed to exist while holding Filesystem.mu for // reading. Dropping dentries require holding Filesystem.mu for writing. To @@ -47,8 +51,8 @@ // kernfs.Dentry.dirMu // vfs.VirtualFilesystem.mountMu // vfs.Dentry.mu -// kernfs.Filesystem.droppedDentriesMu // (inode implementation locks, if any) +// kernfs.Filesystem.droppedDentriesMu package kernfs import ( @@ -60,7 +64,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory @@ -95,7 +98,7 @@ type Filesystem struct { // example: // // fs.mu.RLock() - // fs.mu.processDeferredDecRefs() + // defer fs.processDeferredDecRefs() // defer fs.mu.RUnlock() // ... // fs.deferDecRef(dentry) @@ -108,8 +111,7 @@ type Filesystem struct { // deferDecRef defers dropping a dentry ref until the next call to // processDeferredDecRefs{,Locked}. See comment on Filesystem.mu. -// -// Precondition: d must not already be pending destruction. +// This may be called while Filesystem.mu or Dentry.dirMu is locked. func (fs *Filesystem) deferDecRef(d *Dentry) { fs.droppedDentriesMu.Lock() fs.droppedDentries = append(fs.droppedDentries, d) @@ -118,17 +120,14 @@ func (fs *Filesystem) deferDecRef(d *Dentry) { // processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the // droppedDentries list. See comment on Filesystem.mu. +// +// Precondition: Filesystem.mu or Dentry.dirMu must NOT be locked. func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) { - fs.mu.Lock() - fs.processDeferredDecRefsLocked(ctx) - fs.mu.Unlock() -} - -// Precondition: fs.mu must be held for writing. -func (fs *Filesystem) processDeferredDecRefsLocked(ctx context.Context) { fs.droppedDentriesMu.Lock() for _, d := range fs.droppedDentries { - d.DecRef(ctx) + // Defer the DecRef call so that we are not holding droppedDentriesMu + // when DecRef is called. + defer d.DecRef(ctx) } fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse. fs.droppedDentriesMu.Unlock() @@ -157,17 +156,19 @@ const ( // // A kernfs dentry is similar to a dentry in a traditional filesystem: it's a // named reference to an inode. A dentry generally lives as long as it's part of -// a mounted filesystem tree. Kernfs doesn't cache dentries once all references -// to them are removed. Dentries hold a single reference to the inode they point +// a mounted filesystem tree. Kernfs drops dentries once all references to them +// are dropped. Dentries hold a single reference to the inode they point // to, and child dentries hold a reference on their parent. // // Must be initialized by Init prior to first use. // // +stateify savable type Dentry struct { + vfsd vfs.Dentry DentryRefs - vfsd vfs.Dentry + // fs is the owning filesystem. fs is immutable. + fs *Filesystem // flags caches useful information about the dentry from the inode. See the // dflags* consts above. Must be accessed by atomic ops. @@ -192,8 +193,9 @@ type Dentry struct { // Precondition: Caller must hold a reference on inode. // // Postcondition: Caller's reference on inode is transferred to the dentry. -func (d *Dentry) Init(inode Inode) { +func (d *Dentry) Init(fs *Filesystem, inode Inode) { d.vfsd.Init(d) + d.fs = fs d.inode = inode ftype := inode.Mode().FileType() if ftype == linux.ModeDirectory { @@ -222,14 +224,28 @@ func (d *Dentry) isSymlink() bool { // DecRef implements vfs.DentryImpl.DecRef. func (d *Dentry) DecRef(ctx context.Context) { - // Before the destructor is called, Dentry must be removed from VFS' dentry cache. + decRefParent := false + d.fs.mu.Lock() d.DentryRefs.DecRef(func() { d.inode.DecRef(ctx) // IncRef from Init. d.inode = nil if d.parent != nil { - d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild. + // We will DecRef d.parent once all locks are dropped. + decRefParent = true + d.parent.dirMu.Lock() + // Remove d from parent.children. It might already have been + // removed due to invalidation. + if _, ok := d.parent.children[d.name]; ok { + delete(d.parent.children, d.name) + d.fs.VFSFilesystem().VirtualFilesystem().InvalidateDentry(ctx, d.VFSDentry()) + } + d.parent.dirMu.Unlock() } }) + d.fs.mu.Unlock() + if decRefParent { + d.parent.DecRef(ctx) // IncRef from Dentry.insertChild. + } } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. @@ -247,26 +263,26 @@ func (d *Dentry) Watches() *vfs.Watches { // OnZeroWatches implements vfs.Dentry.OnZeroWatches. func (d *Dentry) OnZeroWatches(context.Context) {} -// InsertChild inserts child into the vfs dentry cache with the given name under +// insertChild inserts child into the vfs dentry cache with the given name under // this dentry. This does not update the directory inode, so calling this on its // own isn't sufficient to insert a child into a directory. // // Precondition: d must represent a directory inode. -func (d *Dentry) InsertChild(name string, child *Dentry) { +func (d *Dentry) insertChild(name string, child *Dentry) { d.dirMu.Lock() - d.InsertChildLocked(name, child) + d.insertChildLocked(name, child) d.dirMu.Unlock() } -// InsertChildLocked is equivalent to InsertChild, with additional +// insertChildLocked is equivalent to insertChild, with additional // preconditions. // // Preconditions: // * d must represent a directory inode. // * d.dirMu must be locked. -func (d *Dentry) InsertChildLocked(name string, child *Dentry) { +func (d *Dentry) insertChildLocked(name string, child *Dentry) { if !d.isDir() { - panic(fmt.Sprintf("InsertChildLocked called on non-directory Dentry: %+v.", d)) + panic(fmt.Sprintf("insertChildLocked called on non-directory Dentry: %+v.", d)) } d.IncRef() // DecRef in child's Dentry.destroy. child.parent = d @@ -277,36 +293,6 @@ func (d *Dentry) InsertChildLocked(name string, child *Dentry) { d.children[name] = child } -// RemoveChild removes child from the vfs dentry cache. This does not update the -// directory inode or modify the inode to be unlinked. So calling this on its own -// isn't sufficient to remove a child from a directory. -// -// Precondition: d must represent a directory inode. -func (d *Dentry) RemoveChild(name string, child *Dentry) error { - d.dirMu.Lock() - defer d.dirMu.Unlock() - return d.RemoveChildLocked(name, child) -} - -// RemoveChildLocked is equivalent to RemoveChild, with additional -// preconditions. -// -// Precondition: d.dirMu must be locked. -func (d *Dentry) RemoveChildLocked(name string, child *Dentry) error { - if !d.isDir() { - panic(fmt.Sprintf("RemoveChild called on non-directory Dentry: %+v.", d)) - } - c, ok := d.children[name] - if !ok { - return syserror.ENOENT - } - if c != child { - panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! Child: %+v, vfs: %+v", c, child)) - } - delete(d.children, name) - return nil -} - // Inode returns the dentry's inode. func (d *Dentry) Inode() Inode { return d.inode @@ -348,11 +334,6 @@ type Inode interface { // a blanket implementation for all non-directory inodes. inodeDirectory - // Method for inodes that represent dynamic directories and their - // children. InodeNoDynamicLookup provides a blanket implementation for all - // non-dynamic-directory inodes. - inodeDynamicLookup - // Open creates a file description for the filesystem object represented by // this inode. The returned file description should hold a reference on the // dentry for its lifetime. @@ -365,6 +346,14 @@ type Inode interface { // corresponds to vfs.FilesystemImpl.StatFSAt. If the client filesystem // doesn't support statfs(2), this should return ENOSYS. StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) + + // Keep indicates whether the dentry created after Inode.Lookup should be + // kept in the kernfs dentry tree. + Keep() bool + + // Valid should return true if this inode is still valid, or needs to + // be resolved again by a call to Lookup. + Valid(ctx context.Context) bool } type inodeRefs interface { @@ -397,8 +386,8 @@ type inodeMetadata interface { // Precondition: All methods in this interface may only be called on directory // inodes. type inodeDirectory interface { - // The New{File,Dir,Node,Symlink} methods below should return a new inode - // hashed into this inode. + // The New{File,Dir,Node,Link,Symlink} methods below should return a new inode + // that will be hashed into the dentry tree. // // These inode constructors are inode-level operations rather than // filesystem-level operations to allow client filesystems to mix different @@ -409,60 +398,54 @@ type inodeDirectory interface { HasChildren() bool // NewFile creates a new regular file inode. - NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error) + NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) // NewDir creates a new directory inode. - NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error) + NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) // NewLink creates a new hardlink to a specified inode in this // directory. Implementations should create a new kernfs Dentry pointing to // target, and update target's link count. - NewLink(ctx context.Context, name string, target Inode) (*Dentry, error) + NewLink(ctx context.Context, name string, target Inode) (Inode, error) // NewSymlink creates a new symbolic link inode. - NewSymlink(ctx context.Context, name, target string) (*Dentry, error) + NewSymlink(ctx context.Context, name, target string) (Inode, error) // NewNode creates a new filesystem node for a mknod syscall. - NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error) + NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) // Unlink removes a child dentry from this directory inode. - Unlink(ctx context.Context, name string, child *Dentry) error + Unlink(ctx context.Context, name string, child Inode) error // RmDir removes an empty child directory from this directory // inode. Implementations must update the parent directory's link count, // if required. Implementations are not responsible for checking that child // is a directory, checking for an empty directory. - RmDir(ctx context.Context, name string, child *Dentry) error + RmDir(ctx context.Context, name string, child Inode) error // Rename is called on the source directory containing an inode being // renamed. child should point to the resolved child in the source - // directory. If Rename replaces a dentry in the destination directory, it - // should return the replaced dentry or nil otherwise. + // directory. // // Precondition: Caller must serialize concurrent calls to Rename. - Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (replaced *Dentry, err error) -} + Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error -type inodeDynamicLookup interface { - // Lookup should return an appropriate dentry if name should resolve to a - // child of this dynamic directory inode. This gives the directory an - // opportunity on every lookup to resolve additional entries that aren't - // hashed into the directory. This is only called when the inode is a - // directory. If the inode is not a directory, or if the directory only - // contains a static set of children, the implementer can unconditionally - // return an appropriate error (ENOTDIR and ENOENT respectively). + // Lookup should return an appropriate inode if name should resolve to a + // child of this directory inode. This gives the directory an opportunity + // on every lookup to resolve additional entries. This is only called when + // the inode is a directory. // - // The child returned by Lookup will be hashed into the VFS dentry tree. Its - // lifetime can be controlled by the filesystem implementation with an - // appropriate implementation of Valid. + // The child returned by Lookup will be hashed into the VFS dentry tree, + // atleast for the duration of the current FS operation. // - // Lookup returns the child with an extra reference and the caller owns this - // reference. - Lookup(ctx context.Context, name string) (*Dentry, error) - - // Valid should return true if this inode is still valid, or needs to - // be resolved again by a call to Lookup. - Valid(ctx context.Context) bool + // Lookup must return the child with an extra reference whose ownership is + // transferred to the dentry that is created to point to that inode. If + // Inode.Keep returns false, that new dentry will be dropped at the end of + // the current filesystem operation (before returning back to the VFS + // layer) if no other ref is picked on that dentry. If Inode.Keep returns + // true, then the dentry will be cached into the dentry tree until it is + // Unlink'd or RmDir'd. + Lookup(ctx context.Context, name string) (Inode, error) // IterDirents is used to iterate over dynamically created entries. It invokes // cb on each entry in the directory represented by the Inode. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go index 7d9420725..f87782ee1 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go @@ -182,18 +182,22 @@ func (i *InodeNoopRefCount) StateTypeName() string { } func (i *InodeNoopRefCount) StateFields() []string { - return []string{} + return []string{ + "InodeTemporary", + } } func (i *InodeNoopRefCount) beforeSave() {} func (i *InodeNoopRefCount) StateSave(stateSinkObject state.Sink) { i.beforeSave() + stateSinkObject.Save(0, &i.InodeTemporary) } func (i *InodeNoopRefCount) afterLoad() {} func (i *InodeNoopRefCount) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeTemporary) } func (i *InodeDirectoryNoNewChildren) StateTypeName() string { @@ -220,37 +224,22 @@ func (i *InodeNotDirectory) StateTypeName() string { } func (i *InodeNotDirectory) StateFields() []string { - return []string{} + return []string{ + "InodeAlwaysValid", + } } func (i *InodeNotDirectory) beforeSave() {} func (i *InodeNotDirectory) StateSave(stateSinkObject state.Sink) { i.beforeSave() + stateSinkObject.Save(0, &i.InodeAlwaysValid) } func (i *InodeNotDirectory) afterLoad() {} func (i *InodeNotDirectory) StateLoad(stateSourceObject state.Source) { -} - -func (i *InodeNoDynamicLookup) StateTypeName() string { - return "pkg/sentry/fsimpl/kernfs.InodeNoDynamicLookup" -} - -func (i *InodeNoDynamicLookup) StateFields() []string { - return []string{} -} - -func (i *InodeNoDynamicLookup) beforeSave() {} - -func (i *InodeNoDynamicLookup) StateSave(stateSinkObject state.Sink) { - i.beforeSave() -} - -func (i *InodeNoDynamicLookup) afterLoad() {} - -func (i *InodeNoDynamicLookup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeAlwaysValid) } func (i *InodeNotSymlink) StateTypeName() string { @@ -319,8 +308,9 @@ func (s *slot) StateTypeName() string { func (s *slot) StateFields() []string { return []string{ - "Name", - "Dentry", + "name", + "inode", + "static", "slotEntry", } } @@ -329,17 +319,19 @@ func (s *slot) beforeSave() {} func (s *slot) StateSave(stateSinkObject state.Sink) { s.beforeSave() - stateSinkObject.Save(0, &s.Name) - stateSinkObject.Save(1, &s.Dentry) - stateSinkObject.Save(2, &s.slotEntry) + stateSinkObject.Save(0, &s.name) + stateSinkObject.Save(1, &s.inode) + stateSinkObject.Save(2, &s.static) + stateSinkObject.Save(3, &s.slotEntry) } func (s *slot) afterLoad() {} func (s *slot) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &s.Name) - stateSourceObject.Load(1, &s.Dentry) - stateSourceObject.Load(2, &s.slotEntry) + stateSourceObject.Load(0, &s.name) + stateSourceObject.Load(1, &s.inode) + stateSourceObject.Load(2, &s.static) + stateSourceObject.Load(3, &s.slotEntry) } func (o *OrderedChildrenOptions) StateTypeName() string { @@ -442,11 +434,12 @@ func (s *StaticDirectory) StateTypeName() string { func (s *StaticDirectory) StateFields() []string { return []string{ + "InodeAlwaysValid", "InodeAttrs", "InodeDirectoryNoNewChildren", - "InodeNoDynamicLookup", "InodeNoStatFS", "InodeNotSymlink", + "InodeTemporary", "OrderedChildren", "StaticDirectoryRefs", "locks", @@ -458,48 +451,69 @@ func (s *StaticDirectory) beforeSave() {} func (s *StaticDirectory) StateSave(stateSinkObject state.Sink) { s.beforeSave() - stateSinkObject.Save(0, &s.InodeAttrs) - stateSinkObject.Save(1, &s.InodeDirectoryNoNewChildren) - stateSinkObject.Save(2, &s.InodeNoDynamicLookup) + stateSinkObject.Save(0, &s.InodeAlwaysValid) + stateSinkObject.Save(1, &s.InodeAttrs) + stateSinkObject.Save(2, &s.InodeDirectoryNoNewChildren) stateSinkObject.Save(3, &s.InodeNoStatFS) stateSinkObject.Save(4, &s.InodeNotSymlink) - stateSinkObject.Save(5, &s.OrderedChildren) - stateSinkObject.Save(6, &s.StaticDirectoryRefs) - stateSinkObject.Save(7, &s.locks) - stateSinkObject.Save(8, &s.fdOpts) + stateSinkObject.Save(5, &s.InodeTemporary) + stateSinkObject.Save(6, &s.OrderedChildren) + stateSinkObject.Save(7, &s.StaticDirectoryRefs) + stateSinkObject.Save(8, &s.locks) + stateSinkObject.Save(9, &s.fdOpts) } func (s *StaticDirectory) afterLoad() {} func (s *StaticDirectory) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &s.InodeAttrs) - stateSourceObject.Load(1, &s.InodeDirectoryNoNewChildren) - stateSourceObject.Load(2, &s.InodeNoDynamicLookup) + stateSourceObject.Load(0, &s.InodeAlwaysValid) + stateSourceObject.Load(1, &s.InodeAttrs) + stateSourceObject.Load(2, &s.InodeDirectoryNoNewChildren) stateSourceObject.Load(3, &s.InodeNoStatFS) stateSourceObject.Load(4, &s.InodeNotSymlink) - stateSourceObject.Load(5, &s.OrderedChildren) - stateSourceObject.Load(6, &s.StaticDirectoryRefs) - stateSourceObject.Load(7, &s.locks) - stateSourceObject.Load(8, &s.fdOpts) + stateSourceObject.Load(5, &s.InodeTemporary) + stateSourceObject.Load(6, &s.OrderedChildren) + stateSourceObject.Load(7, &s.StaticDirectoryRefs) + stateSourceObject.Load(8, &s.locks) + stateSourceObject.Load(9, &s.fdOpts) } -func (a *AlwaysValid) StateTypeName() string { - return "pkg/sentry/fsimpl/kernfs.AlwaysValid" +func (i *InodeAlwaysValid) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeAlwaysValid" } -func (a *AlwaysValid) StateFields() []string { +func (i *InodeAlwaysValid) StateFields() []string { return []string{} } -func (a *AlwaysValid) beforeSave() {} +func (i *InodeAlwaysValid) beforeSave() {} -func (a *AlwaysValid) StateSave(stateSinkObject state.Sink) { - a.beforeSave() +func (i *InodeAlwaysValid) StateSave(stateSinkObject state.Sink) { + i.beforeSave() } -func (a *AlwaysValid) afterLoad() {} +func (i *InodeAlwaysValid) afterLoad() {} -func (a *AlwaysValid) StateLoad(stateSourceObject state.Source) { +func (i *InodeAlwaysValid) StateLoad(stateSourceObject state.Source) { +} + +func (i *InodeTemporary) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeTemporary" +} + +func (i *InodeTemporary) StateFields() []string { + return []string{} +} + +func (i *InodeTemporary) beforeSave() {} + +func (i *InodeTemporary) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *InodeTemporary) afterLoad() {} + +func (i *InodeTemporary) StateLoad(stateSourceObject state.Source) { } func (i *InodeNoStatFS) StateTypeName() string { @@ -556,8 +570,9 @@ func (d *Dentry) StateTypeName() string { func (d *Dentry) StateFields() []string { return []string{ - "DentryRefs", "vfsd", + "DentryRefs", + "fs", "flags", "parent", "name", @@ -570,25 +585,27 @@ func (d *Dentry) beforeSave() {} func (d *Dentry) StateSave(stateSinkObject state.Sink) { d.beforeSave() - stateSinkObject.Save(0, &d.DentryRefs) - stateSinkObject.Save(1, &d.vfsd) - stateSinkObject.Save(2, &d.flags) - stateSinkObject.Save(3, &d.parent) - stateSinkObject.Save(4, &d.name) - stateSinkObject.Save(5, &d.children) - stateSinkObject.Save(6, &d.inode) + stateSinkObject.Save(0, &d.vfsd) + stateSinkObject.Save(1, &d.DentryRefs) + stateSinkObject.Save(2, &d.fs) + stateSinkObject.Save(3, &d.flags) + stateSinkObject.Save(4, &d.parent) + stateSinkObject.Save(5, &d.name) + stateSinkObject.Save(6, &d.children) + stateSinkObject.Save(7, &d.inode) } func (d *Dentry) afterLoad() {} func (d *Dentry) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &d.DentryRefs) - stateSourceObject.Load(1, &d.vfsd) - stateSourceObject.Load(2, &d.flags) - stateSourceObject.Load(3, &d.parent) - stateSourceObject.Load(4, &d.name) - stateSourceObject.Load(5, &d.children) - stateSourceObject.Load(6, &d.inode) + stateSourceObject.Load(0, &d.vfsd) + stateSourceObject.Load(1, &d.DentryRefs) + stateSourceObject.Load(2, &d.fs) + stateSourceObject.Load(3, &d.flags) + stateSourceObject.Load(4, &d.parent) + stateSourceObject.Load(5, &d.name) + stateSourceObject.Load(6, &d.children) + stateSourceObject.Load(7, &d.inode) } func (l *slotList) StateTypeName() string { @@ -707,12 +724,12 @@ func (dir *syntheticDirectory) StateTypeName() string { func (dir *syntheticDirectory) StateFields() []string { return []string{ + "InodeAlwaysValid", "InodeAttrs", "InodeNoStatFS", - "InodeNoopRefCount", - "InodeNoDynamicLookup", "InodeNotSymlink", "OrderedChildren", + "syntheticDirectoryRefs", "locks", } } @@ -721,27 +738,50 @@ func (dir *syntheticDirectory) beforeSave() {} func (dir *syntheticDirectory) StateSave(stateSinkObject state.Sink) { dir.beforeSave() - stateSinkObject.Save(0, &dir.InodeAttrs) - stateSinkObject.Save(1, &dir.InodeNoStatFS) - stateSinkObject.Save(2, &dir.InodeNoopRefCount) - stateSinkObject.Save(3, &dir.InodeNoDynamicLookup) - stateSinkObject.Save(4, &dir.InodeNotSymlink) - stateSinkObject.Save(5, &dir.OrderedChildren) + stateSinkObject.Save(0, &dir.InodeAlwaysValid) + stateSinkObject.Save(1, &dir.InodeAttrs) + stateSinkObject.Save(2, &dir.InodeNoStatFS) + stateSinkObject.Save(3, &dir.InodeNotSymlink) + stateSinkObject.Save(4, &dir.OrderedChildren) + stateSinkObject.Save(5, &dir.syntheticDirectoryRefs) stateSinkObject.Save(6, &dir.locks) } func (dir *syntheticDirectory) afterLoad() {} func (dir *syntheticDirectory) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &dir.InodeAttrs) - stateSourceObject.Load(1, &dir.InodeNoStatFS) - stateSourceObject.Load(2, &dir.InodeNoopRefCount) - stateSourceObject.Load(3, &dir.InodeNoDynamicLookup) - stateSourceObject.Load(4, &dir.InodeNotSymlink) - stateSourceObject.Load(5, &dir.OrderedChildren) + stateSourceObject.Load(0, &dir.InodeAlwaysValid) + stateSourceObject.Load(1, &dir.InodeAttrs) + stateSourceObject.Load(2, &dir.InodeNoStatFS) + stateSourceObject.Load(3, &dir.InodeNotSymlink) + stateSourceObject.Load(4, &dir.OrderedChildren) + stateSourceObject.Load(5, &dir.syntheticDirectoryRefs) stateSourceObject.Load(6, &dir.locks) } +func (r *syntheticDirectoryRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.syntheticDirectoryRefs" +} + +func (r *syntheticDirectoryRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *syntheticDirectoryRefs) beforeSave() {} + +func (r *syntheticDirectoryRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +func (r *syntheticDirectoryRefs) afterLoad() {} + +func (r *syntheticDirectoryRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) +} + func init() { state.Register((*DentryRefs)(nil)) state.Register((*DynamicBytesFile)(nil)) @@ -752,7 +792,6 @@ func init() { state.Register((*InodeNoopRefCount)(nil)) state.Register((*InodeDirectoryNoNewChildren)(nil)) state.Register((*InodeNotDirectory)(nil)) - state.Register((*InodeNoDynamicLookup)(nil)) state.Register((*InodeNotSymlink)(nil)) state.Register((*InodeAttrs)(nil)) state.Register((*slot)(nil)) @@ -761,7 +800,8 @@ func init() { state.Register((*renameAcrossDifferentImplementationsError)(nil)) state.Register((*InodeSymlink)(nil)) state.Register((*StaticDirectory)(nil)) - state.Register((*AlwaysValid)(nil)) + state.Register((*InodeAlwaysValid)(nil)) + state.Register((*InodeTemporary)(nil)) state.Register((*InodeNoStatFS)(nil)) state.Register((*Filesystem)(nil)) state.Register((*Dentry)(nil)) @@ -770,4 +810,5 @@ func init() { state.Register((*StaticDirectoryRefs)(nil)) state.Register((*StaticSymlink)(nil)) state.Register((*syntheticDirectory)(nil)) + state.Register((*syntheticDirectoryRefs)(nil)) } diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go index 58a93eaac..934cc6c9e 100644 --- a/pkg/sentry/fsimpl/kernfs/symlink.go +++ b/pkg/sentry/fsimpl/kernfs/symlink.go @@ -38,13 +38,10 @@ type StaticSymlink struct { var _ Inode = (*StaticSymlink)(nil) // NewStaticSymlink creates a new symlink file pointing to 'target'. -func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) *Dentry { +func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) Inode { inode := &StaticSymlink{} inode.Init(creds, devMajor, devMinor, ino, target) - - d := &Dentry{} - d.Init(inode) - return d + return inode } // Init initializes the instance. diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go index ea7f073eb..d0ed17b18 100644 --- a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go +++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go @@ -29,24 +29,22 @@ import ( // // +stateify savable type syntheticDirectory struct { + InodeAlwaysValid InodeAttrs InodeNoStatFS - InodeNoopRefCount - InodeNoDynamicLookup InodeNotSymlink OrderedChildren + syntheticDirectoryRefs locks vfs.FileLocks } var _ Inode = (*syntheticDirectory)(nil) -func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) *Dentry { +func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) Inode { inode := &syntheticDirectory{} inode.Init(creds, 0 /* devMajor */, 0 /* devMinor */, 0 /* ino */, perm) - d := &Dentry{} - d.Init(inode) - return d + return inode } func (dir *syntheticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) { @@ -69,34 +67,46 @@ func (dir *syntheticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, } // NewFile implements Inode.NewFile. -func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error) { +func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) { return nil, syserror.EPERM } // NewDir implements Inode.NewDir. -func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error) { +func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) { if !opts.ForSyntheticMountpoint { return nil, syserror.EPERM } - subdird := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask) - if err := dir.OrderedChildren.Insert(name, subdird); err != nil { - subdird.DecRef(ctx) + subdirI := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask) + if err := dir.OrderedChildren.Insert(name, subdirI); err != nil { + subdirI.DecRef(ctx) return nil, err } - return subdird, nil + return subdirI, nil } // NewLink implements Inode.NewLink. -func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (*Dentry, error) { +func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (Inode, error) { return nil, syserror.EPERM } // NewSymlink implements Inode.NewSymlink. -func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (*Dentry, error) { +func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (Inode, error) { return nil, syserror.EPERM } // NewNode implements Inode.NewNode. -func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error) { +func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) { return nil, syserror.EPERM } + +// DecRef implements Inode.DecRef. +func (dir *syntheticDirectory) DecRef(ctx context.Context) { + dir.syntheticDirectoryRefs.DecRef(func() { dir.Destroy(ctx) }) +} + +// Keep implements Inode.Keep. This is redundant because inodes will never be +// created via Lookup and inodes are always valid. Makes sense to return true +// because these inodes are not temporary and should only be removed on RmDir. +func (dir *syntheticDirectory) Keep() bool { + return true +} diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go new file mode 100644 index 000000000..28d556b42 --- /dev/null +++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go @@ -0,0 +1,118 @@ +package kernfs + +import ( + "fmt" + "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" +) + +// ownerType is used to customize logging. Note that we use a pointer to T so +// that we do not copy the entire object when passed as a format parameter. +var syntheticDirectoryownerType *syntheticDirectory + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// Note that the number of references is actually refCount + 1 so that a default +// zero-value Refs object contains one reference. +// +// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in +// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount. +// This will allow us to add stack trace information to the leak messages +// without growing the size of Refs. +// +// +stateify savable +type syntheticDirectoryRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +func (r *syntheticDirectoryRefs) finalize() { + var note string + switch refs_vfs1.GetLeakMode() { + case refs_vfs1.NoLeakChecking: + return + case refs_vfs1.UninitializedLeakChecking: + note = "(Leak checker uninitialized): " + } + if n := r.ReadRefs(); n != 0 { + log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, syntheticDirectoryownerType, n) + } +} + +// EnableLeakCheck checks for reference leaks when Refs gets garbage collected. +func (r *syntheticDirectoryRefs) EnableLeakCheck() { + if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking { + runtime.SetFinalizer(r, (*syntheticDirectoryRefs).finalize) + } +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *syntheticDirectoryRefs) ReadRefs() int64 { + + return atomic.LoadInt64(&r.refCount) + 1 +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *syntheticDirectoryRefs) IncRef() { + if v := atomic.AddInt64(&r.refCount, 1); v <= 0 { + panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, syntheticDirectoryownerType)) + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *syntheticDirectoryRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + v := atomic.AddInt64(&r.refCount, speculativeRef) + if int32(v) < 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + atomic.AddInt64(&r.refCount, -speculativeRef+1) + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *syntheticDirectoryRefs) DecRef(destroy func()) { + switch v := atomic.AddInt64(&r.refCount, -1); { + case v < -1: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, syntheticDirectoryownerType)) + + case v == -1: + + if destroy != nil { + destroy() + } + } +} |