summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry/fsimpl/kernfs
diff options
context:
space:
mode:
authorgVisor bot <gvisor-bot@google.com>2020-10-12 23:33:20 +0000
committergVisor bot <gvisor-bot@google.com>2020-10-12 23:33:20 +0000
commit213e252ab30dc0646d3de409dedd58f5c55e05af (patch)
tree42375535ec08b69adc388d81f22cba5a92bb1cd2 /pkg/sentry/fsimpl/kernfs
parent1b455257f0fb4494bfd10802d8011a073a0672e4 (diff)
parente7bbe70f79aa9308c2eb54b057ee5779b22f478e (diff)
Merge release-20200928.0-94-ge7bbe70f7 (automated)
Diffstat (limited to 'pkg/sentry/fsimpl/kernfs')
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go192
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go235
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go173
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go207
-rw-r--r--pkg/sentry/fsimpl/kernfs/symlink.go7
-rw-r--r--pkg/sentry/fsimpl/kernfs/synthetic_directory.go40
-rw-r--r--pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go118
8 files changed, 608 insertions, 368 deletions
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index 0a4cd4057..abf1905d6 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -201,12 +201,12 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent
// these.
childIdx := fd.off - 2
for it := fd.children.nthLocked(childIdx); it != nil; it = it.Next() {
- stat, err := it.Dentry.inode.Stat(ctx, fd.filesystem(), opts)
+ stat, err := it.inode.Stat(ctx, fd.filesystem(), opts)
if err != nil {
return err
}
dirent := vfs.Dirent{
- Name: it.Name,
+ Name: it.name,
Type: linux.FileMode(stat.Mode).DirentType(),
Ino: stat.Ino,
NextOff: fd.off + 1,
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 5cc1c4281..6426a55f6 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -89,7 +89,7 @@ afterSymlink:
}
if targetVD.Ok() {
err := rp.HandleJump(targetVD)
- targetVD.DecRef(ctx)
+ fs.deferDecRefVD(ctx, targetVD)
if err != nil {
return nil, err
}
@@ -120,22 +120,33 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
// Cached dentry exists, revalidate.
if !child.inode.Valid(ctx) {
delete(parent.children, name)
- vfsObj.InvalidateDentry(ctx, &child.vfsd)
- fs.deferDecRef(child) // Reference from Lookup.
+ if child.inode.Keep() {
+ // Drop the ref owned by kernfs.
+ fs.deferDecRef(child)
+ }
+ vfsObj.InvalidateDentry(ctx, child.VFSDentry())
child = nil
}
}
if child == nil {
// Dentry isn't cached; it either doesn't exist or failed revalidation.
// Attempt to resolve it via Lookup.
- c, err := parent.inode.Lookup(ctx, name)
+ childInode, err := parent.inode.Lookup(ctx, name)
if err != nil {
return nil, err
}
- // Reference on c (provided by Lookup) will be dropped when the dentry
- // fails validation.
- parent.InsertChildLocked(name, c)
- child = c
+ var newChild Dentry
+ newChild.Init(fs, childInode) // childInode's ref is transferred to newChild.
+ parent.insertChildLocked(name, &newChild)
+ child = &newChild
+
+ // Drop the ref on newChild. This will cause the dentry to get pruned
+ // from the dentry tree by the end of current filesystem operation
+ // (before returning to the VFS layer) if another ref is not picked on
+ // this dentry.
+ if !childInode.Keep() {
+ fs.deferDecRef(&newChild)
+ }
}
return child, nil
}
@@ -191,7 +202,7 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
}
// checkCreateLocked checks that a file named rp.Component() may be created in
-// directory parentVFSD, then returns rp.Component().
+// directory parent, then returns rp.Component().
//
// Preconditions:
// * Filesystem.mu must be locked for at least reading.
@@ -298,9 +309,9 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return syserror.EEXIST
}
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -324,11 +335,13 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return syserror.EPERM
}
- child, err := parent.inode.NewLink(ctx, pc, d.inode)
+ childI, err := parent.inode.NewLink(ctx, pc, d.inode)
if err != nil {
return err
}
- parent.InsertChildLocked(pc, child)
+ var child Dentry
+ child.Init(fs, childI)
+ parent.insertChildLocked(pc, &child)
return nil
}
@@ -338,9 +351,9 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return syserror.EEXIST
}
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -355,14 +368,16 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
defer rp.Mount().EndWrite()
- child, err := parent.inode.NewDir(ctx, pc, opts)
+ childI, err := parent.inode.NewDir(ctx, pc, opts)
if err != nil {
if !opts.ForSyntheticMountpoint || err == syserror.EEXIST {
return err
}
- child = newSyntheticDirectory(rp.Credentials(), opts.Mode)
+ childI = newSyntheticDirectory(rp.Credentials(), opts.Mode)
}
- parent.InsertChildLocked(pc, child)
+ var child Dentry
+ child.Init(fs, childI)
+ parent.insertChildLocked(pc, &child)
return nil
}
@@ -372,9 +387,9 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return syserror.EEXIST
}
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -389,11 +404,13 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
defer rp.Mount().EndWrite()
- newD, err := parent.inode.NewNode(ctx, pc, opts)
+ newI, err := parent.inode.NewNode(ctx, pc, opts)
if err != nil {
return err
}
- parent.InsertChildLocked(pc, newD)
+ var newD Dentry
+ newD.Init(fs, newI)
+ parent.insertChildLocked(pc, &newD)
return nil
}
@@ -409,22 +426,23 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
// Do not create new file.
if opts.Flags&linux.O_CREAT == 0 {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
d, err := fs.walkExistingLocked(ctx, rp)
if err != nil {
fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
return nil, err
}
if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
return nil, err
}
- d.inode.IncRef()
- defer d.inode.DecRef(ctx)
+ // Open may block so we need to unlock fs.mu. IncRef d to prevent
+ // its destruction while fs.mu is unlocked.
+ d.IncRef()
fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
- return d.inode.Open(ctx, rp, d, opts)
+ fd, err := d.inode.Open(ctx, rp, d, opts)
+ d.DecRef(ctx)
+ return fd, err
}
// May create new file.
@@ -438,6 +456,10 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
unlocked = true
}
}
+ // Process all to-be-decref'd dentries at the end at once.
+ // Since we defer unlock() AFTER this, fs.mu is guaranteed to be unlocked
+ // when this is executed.
+ defer fs.processDeferredDecRefs(ctx)
defer unlock()
if rp.Done() {
if rp.MustBeDir() {
@@ -449,14 +471,16 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
- d.inode.IncRef()
- defer d.inode.DecRef(ctx)
+ // Open may block so we need to unlock fs.mu. IncRef d to prevent
+ // its destruction while fs.mu is unlocked.
+ d.IncRef()
unlock()
- return d.inode.Open(ctx, rp, d, opts)
+ fd, err := d.inode.Open(ctx, rp, d, opts)
+ d.DecRef(ctx)
+ return fd, err
}
afterTrailingSymlink:
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return nil, err
}
@@ -487,18 +511,23 @@ afterTrailingSymlink:
}
defer rp.Mount().EndWrite()
// Create and open the child.
- child, err := parent.inode.NewFile(ctx, pc, opts)
+ childI, err := parent.inode.NewFile(ctx, pc, opts)
if err != nil {
return nil, err
}
+ var child Dentry
+ child.Init(fs, childI)
// FIXME(gvisor.dev/issue/1193): Race between checking existence with
- // fs.stepExistingLocked and parent.InsertChild. If possible, we should hold
+ // fs.stepExistingLocked and parent.insertChild. If possible, we should hold
// dirMu from one to the other.
- parent.InsertChild(pc, child)
- child.inode.IncRef()
- defer child.inode.DecRef(ctx)
+ parent.insertChild(pc, &child)
+ // Open may block so we need to unlock fs.mu. IncRef child to prevent
+ // its destruction while fs.mu is unlocked.
+ child.IncRef()
unlock()
- return child.inode.Open(ctx, rp, child, opts)
+ fd, err := child.inode.Open(ctx, rp, &child, opts)
+ child.DecRef(ctx)
+ return fd, err
}
if err != nil {
return nil, err
@@ -514,7 +543,7 @@ afterTrailingSymlink:
}
if targetVD.Ok() {
err := rp.HandleJump(targetVD)
- targetVD.DecRef(ctx)
+ fs.deferDecRefVD(ctx, targetVD)
if err != nil {
return nil, err
}
@@ -530,18 +559,21 @@ afterTrailingSymlink:
if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
- child.inode.IncRef()
- defer child.inode.DecRef(ctx)
+ // Open may block so we need to unlock fs.mu. IncRef child to prevent
+ // its destruction while fs.mu is unlocked.
+ child.IncRef()
unlock()
- return child.inode.Open(ctx, rp, child, opts)
+ fd, err := child.inode.Open(ctx, rp, child, opts)
+ child.DecRef(ctx)
+ return fd, err
}
// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return "", err
}
@@ -560,7 +592,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0
fs.mu.Lock()
- defer fs.processDeferredDecRefsLocked(ctx)
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
// Resolve the destination directory first to verify that it's on this
@@ -632,24 +664,27 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil {
return err
}
- replaced, err := srcDir.inode.Rename(ctx, src.name, pc, src, dstDir)
+ err = srcDir.inode.Rename(ctx, src.name, pc, src.inode, dstDir.inode)
if err != nil {
virtfs.AbortRenameDentry(srcVFSD, dstVFSD)
return err
}
delete(srcDir.children, src.name)
if srcDir != dstDir {
- fs.deferDecRef(srcDir)
- dstDir.IncRef()
+ fs.deferDecRef(srcDir) // child (src) drops ref on old parent.
+ dstDir.IncRef() // child (src) takes a ref on the new parent.
}
src.parent = dstDir
src.name = pc
if dstDir.children == nil {
dstDir.children = make(map[string]*Dentry)
}
+ replaced := dstDir.children[pc]
dstDir.children[pc] = src
var replaceVFSD *vfs.Dentry
if replaced != nil {
+ // deferDecRef so that fs.mu and dstDir.mu are unlocked by then.
+ fs.deferDecRef(replaced)
replaceVFSD = replaced.VFSDentry()
}
virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD)
@@ -659,10 +694,10 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -691,10 +726,13 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
- if err := parentDentry.inode.RmDir(ctx, d.name, d); err != nil {
+ if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil {
virtfs.AbortDeleteDentry(vfsd)
return err
}
+ delete(parentDentry.children, d.name)
+ // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then.
+ fs.deferDecRef(d)
virtfs.CommitDeleteDentry(ctx, vfsd)
return nil
}
@@ -702,9 +740,9 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return err
}
@@ -717,9 +755,9 @@ func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
// StatAt implements vfs.FilesystemImpl.StatAt.
func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return linux.Statx{}, err
}
@@ -729,9 +767,9 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return linux.Statfs{}, err
}
@@ -744,9 +782,9 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
return syserror.EEXIST
}
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -761,21 +799,23 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
return err
}
defer rp.Mount().EndWrite()
- child, err := parent.inode.NewSymlink(ctx, pc, target)
+ childI, err := parent.inode.NewSymlink(ctx, pc, target)
if err != nil {
return err
}
- parent.InsertChildLocked(pc, child)
+ var child Dentry
+ child.Init(fs, childI)
+ parent.insertChildLocked(pc, &child)
return nil
}
// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -799,10 +839,13 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
return err
}
- if err := parentDentry.inode.Unlink(ctx, d.name, d); err != nil {
+ if err := parentDentry.inode.Unlink(ctx, d.name, d.inode); err != nil {
virtfs.AbortDeleteDentry(vfsd)
return err
}
+ delete(parentDentry.children, d.name)
+ // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then.
+ fs.deferDecRef(d)
virtfs.CommitDeleteDentry(ctx, vfsd)
return nil
}
@@ -810,9 +853,9 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
// BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return nil, err
}
@@ -825,9 +868,9 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
_, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return nil, err
}
@@ -838,9 +881,9 @@ func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
_, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return "", err
}
@@ -851,9 +894,9 @@ func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
_, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return err
}
@@ -864,9 +907,9 @@ func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
_, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return err
}
@@ -880,3 +923,16 @@ func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe
defer fs.mu.RUnlock()
return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b)
}
+
+func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) {
+ if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs {
+ // The following is equivalent to vd.DecRef(ctx). This is needed
+ // because if d belongs to this filesystem, we can not DecRef it right
+ // away as we may be holding fs.mu. d.DecRef may acquire fs.mu. So we
+ // defer the DecRef to when locks are dropped.
+ vd.Mount().DecRef(ctx)
+ fs.deferDecRef(d)
+ } else {
+ vd.DecRef(ctx)
+ }
+}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 49210e748..122b10591 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -34,6 +34,7 @@ import (
//
// +stateify savable
type InodeNoopRefCount struct {
+ InodeTemporary
}
// IncRef implements Inode.IncRef.
@@ -57,27 +58,27 @@ func (InodeNoopRefCount) TryIncRef() bool {
type InodeDirectoryNoNewChildren struct{}
// NewFile implements Inode.NewFile.
-func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) {
return nil, syserror.EPERM
}
// NewDir implements Inode.NewDir.
-func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) {
return nil, syserror.EPERM
}
// NewLink implements Inode.NewLink.
-func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (Inode, error) {
return nil, syserror.EPERM
}
// NewSymlink implements Inode.NewSymlink.
-func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (Inode, error) {
return nil, syserror.EPERM
}
// NewNode implements Inode.NewNode.
-func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) {
return nil, syserror.EPERM
}
@@ -88,6 +89,7 @@ func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOpt
//
// +stateify savable
type InodeNotDirectory struct {
+ InodeAlwaysValid
}
// HasChildren implements Inode.HasChildren.
@@ -96,47 +98,47 @@ func (InodeNotDirectory) HasChildren() bool {
}
// NewFile implements Inode.NewFile.
-func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) {
+func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) {
panic("NewFile called on non-directory inode")
}
// NewDir implements Inode.NewDir.
-func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) {
+func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) {
panic("NewDir called on non-directory inode")
}
// NewLink implements Inode.NewLinkink.
-func (InodeNotDirectory) NewLink(context.Context, string, Inode) (*Dentry, error) {
+func (InodeNotDirectory) NewLink(context.Context, string, Inode) (Inode, error) {
panic("NewLink called on non-directory inode")
}
// NewSymlink implements Inode.NewSymlink.
-func (InodeNotDirectory) NewSymlink(context.Context, string, string) (*Dentry, error) {
+func (InodeNotDirectory) NewSymlink(context.Context, string, string) (Inode, error) {
panic("NewSymlink called on non-directory inode")
}
// NewNode implements Inode.NewNode.
-func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) {
+func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) {
panic("NewNode called on non-directory inode")
}
// Unlink implements Inode.Unlink.
-func (InodeNotDirectory) Unlink(context.Context, string, *Dentry) error {
+func (InodeNotDirectory) Unlink(context.Context, string, Inode) error {
panic("Unlink called on non-directory inode")
}
// RmDir implements Inode.RmDir.
-func (InodeNotDirectory) RmDir(context.Context, string, *Dentry) error {
+func (InodeNotDirectory) RmDir(context.Context, string, Inode) error {
panic("RmDir called on non-directory inode")
}
// Rename implements Inode.Rename.
-func (InodeNotDirectory) Rename(context.Context, string, string, *Dentry, *Dentry) (*Dentry, error) {
+func (InodeNotDirectory) Rename(context.Context, string, string, Inode, Inode) error {
panic("Rename called on non-directory inode")
}
// Lookup implements Inode.Lookup.
-func (InodeNotDirectory) Lookup(ctx context.Context, name string) (*Dentry, error) {
+func (InodeNotDirectory) Lookup(ctx context.Context, name string) (Inode, error) {
panic("Lookup called on non-directory inode")
}
@@ -145,35 +147,6 @@ func (InodeNotDirectory) IterDirents(ctx context.Context, callback vfs.IterDiren
panic("IterDirents called on non-directory inode")
}
-// Valid implements Inode.Valid.
-func (InodeNotDirectory) Valid(context.Context) bool {
- return true
-}
-
-// InodeNoDynamicLookup partially implements the Inode interface, specifically
-// the inodeDynamicLookup sub interface. Directory inodes that do not support
-// dymanic entries (i.e. entries that are not "hashed" into the
-// vfs.Dentry.children) can embed this to provide no-op implementations for
-// functions related to dynamic entries.
-//
-// +stateify savable
-type InodeNoDynamicLookup struct{}
-
-// Lookup implements Inode.Lookup.
-func (InodeNoDynamicLookup) Lookup(ctx context.Context, name string) (*Dentry, error) {
- return nil, syserror.ENOENT
-}
-
-// IterDirents implements Inode.IterDirents.
-func (InodeNoDynamicLookup) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
- return offset, nil
-}
-
-// Valid implements Inode.Valid.
-func (InodeNoDynamicLookup) Valid(ctx context.Context) bool {
- return true
-}
-
// InodeNotSymlink partially implements the Inode interface, specifically the
// inodeSymlink sub interface. All inodes that are not symlinks may embed this
// to return the appropriate errors from symlink-related functions.
@@ -273,7 +246,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut
// SetInodeStat sets the corresponding attributes from opts to InodeAttrs.
// This function can be used by other kernfs-based filesystem implementation to
-// sets the unexported attributes into kernfs.InodeAttrs.
+// sets the unexported attributes into InodeAttrs.
func (a *InodeAttrs) SetInodeStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
if opts.Stat.Mask == 0 {
return nil
@@ -344,8 +317,9 @@ func (a *InodeAttrs) DecLinks() {
// +stateify savable
type slot struct {
- Name string
- Dentry *Dentry
+ name string
+ inode Inode
+ static bool
slotEntry
}
@@ -361,10 +335,18 @@ type OrderedChildrenOptions struct {
}
// OrderedChildren partially implements the Inode interface. OrderedChildren can
-// be embedded in directory inodes to keep track of the children in the
+// be embedded in directory inodes to keep track of children in the
// directory, and can then be used to implement a generic directory FD -- see
-// GenericDirectoryFD. OrderedChildren is not compatible with dynamic
-// directories.
+// GenericDirectoryFD.
+//
+// OrderedChildren can represent a node in an Inode tree. The children inodes
+// might be directories themselves using OrderedChildren; hence extending the
+// tree. The parent inode (OrderedChildren user) holds a ref on all its static
+// children. This lets the static inodes outlive their associated dentry.
+// While the dentry might have to be regenerated via a Lookup() call, we can
+// keep reusing the same static inode. These static children inodes are finally
+// DecRef'd when this directory inode is being destroyed. This makes
+// OrderedChildren suitable for static directory entries as well.
//
// Must be initialize with Init before first use.
//
@@ -388,33 +370,63 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) {
// Destroy clears the children stored in o. It should be called by structs
// embedding OrderedChildren upon destruction, i.e. when their reference count
// reaches zero.
-func (o *OrderedChildren) Destroy() {
+func (o *OrderedChildren) Destroy(ctx context.Context) {
o.mu.Lock()
defer o.mu.Unlock()
+ // Drop the ref that o owns on the static inodes it holds.
+ for _, s := range o.set {
+ if s.static {
+ s.inode.DecRef(ctx)
+ }
+ }
o.order.Reset()
o.set = nil
}
-// Populate inserts children into this OrderedChildren, and d's dentry
-// cache. Populate returns the number of directories inserted, which the caller
+// Populate inserts static children into this OrderedChildren.
+// Populate returns the number of directories inserted, which the caller
// may use to update the link count for the parent directory.
//
-// Precondition: d must represent a directory inode. children must not contain
-// any conflicting entries already in o.
-func (o *OrderedChildren) Populate(d *Dentry, children map[string]*Dentry) uint32 {
+// Precondition:
+// * d must represent a directory inode.
+// * children must not contain any conflicting entries already in o.
+// * Caller must hold a reference on all inodes passed.
+//
+// Postcondition: Caller's references on inodes are transferred to o.
+func (o *OrderedChildren) Populate(children map[string]Inode) uint32 {
var links uint32
for name, child := range children {
- if child.isDir() {
+ if child.Mode().IsDir() {
links++
}
- if err := o.Insert(name, child); err != nil {
- panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v) into %+v", name, child, d))
+ if err := o.insert(name, child, true); err != nil {
+ panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v)", name, child))
}
- d.InsertChild(name, child)
}
return links
}
+// Lookup implements Inode.Lookup.
+func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error) {
+ o.mu.RLock()
+ defer o.mu.RUnlock()
+
+ s, ok := o.set[name]
+ if !ok {
+ return nil, syserror.ENOENT
+ }
+
+ s.inode.IncRef() // This ref is passed to the dentry upon creation via Init.
+ return s.inode, nil
+}
+
+// IterDirents implements Inode.IterDirents.
+func (o *OrderedChildren) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) {
+ // All entries from OrderedChildren have already been handled in
+ // GenericDirectoryFD.IterDirents.
+ return offset, nil
+}
+
// HasChildren implements Inode.HasChildren.
func (o *OrderedChildren) HasChildren() bool {
o.mu.RLock()
@@ -422,17 +434,27 @@ func (o *OrderedChildren) HasChildren() bool {
return len(o.set) > 0
}
-// Insert inserts child into o. This ignores the writability of o, as this is
-// not part of the vfs.FilesystemImpl interface, and is a lower-level operation.
-func (o *OrderedChildren) Insert(name string, child *Dentry) error {
+// Insert inserts a dynamic child into o. This ignores the writability of o, as
+// this is not part of the vfs.FilesystemImpl interface, and is a lower-level operation.
+func (o *OrderedChildren) Insert(name string, child Inode) error {
+ return o.insert(name, child, false)
+}
+
+// insert inserts child into o.
+//
+// Precondition: Caller must be holding a ref on child if static is true.
+//
+// Postcondition: Caller's ref on child is transferred to o if static is true.
+func (o *OrderedChildren) insert(name string, child Inode, static bool) error {
o.mu.Lock()
defer o.mu.Unlock()
if _, ok := o.set[name]; ok {
return syserror.EEXIST
}
s := &slot{
- Name: name,
- Dentry: child,
+ name: name,
+ inode: child,
+ static: static,
}
o.order.PushBack(s)
o.set[name] = s
@@ -442,44 +464,49 @@ func (o *OrderedChildren) Insert(name string, child *Dentry) error {
// Precondition: caller must hold o.mu for writing.
func (o *OrderedChildren) removeLocked(name string) {
if s, ok := o.set[name]; ok {
+ if s.static {
+ panic(fmt.Sprintf("removeLocked called on a static inode: %v", s.inode))
+ }
delete(o.set, name)
o.order.Remove(s)
}
}
// Precondition: caller must hold o.mu for writing.
-func (o *OrderedChildren) replaceChildLocked(name string, new *Dentry) *Dentry {
+func (o *OrderedChildren) replaceChildLocked(ctx context.Context, name string, newI Inode) {
if s, ok := o.set[name]; ok {
+ if s.static {
+ panic(fmt.Sprintf("replacing a static inode: %v", s.inode))
+ }
+
// Existing slot with given name, simply replace the dentry.
- var old *Dentry
- old, s.Dentry = s.Dentry, new
- return old
+ s.inode = newI
}
// No existing slot with given name, create and hash new slot.
s := &slot{
- Name: name,
- Dentry: new,
+ name: name,
+ inode: newI,
+ static: false,
}
o.order.PushBack(s)
o.set[name] = s
- return nil
}
// Precondition: caller must hold o.mu for reading or writing.
-func (o *OrderedChildren) checkExistingLocked(name string, child *Dentry) error {
+func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error {
s, ok := o.set[name]
if !ok {
return syserror.ENOENT
}
- if s.Dentry != child {
- panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! OrderedChild: %+v, vfs: %+v", s.Dentry, child))
+ if s.inode != child {
+ panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! OrderedChild: %+v, kernfs: %+v", s.inode, child))
}
return nil
}
// Unlink implements Inode.Unlink.
-func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry) error {
+func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode) error {
if !o.writable {
return syserror.EPERM
}
@@ -494,8 +521,8 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry
return nil
}
-// Rmdir implements Inode.Rmdir.
-func (o *OrderedChildren) RmDir(ctx context.Context, name string, child *Dentry) error {
+// RmDir implements Inode.RmDir.
+func (o *OrderedChildren) RmDir(ctx context.Context, name string, child Inode) error {
// We're not responsible for checking that child is a directory, that it's
// empty, or updating any link counts; so this is the same as unlink.
return o.Unlink(ctx, name, child)
@@ -517,13 +544,13 @@ func (renameAcrossDifferentImplementationsError) Error() string {
// that will support Rename.
//
// Postcondition: reference on any replaced dentry transferred to caller.
-func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (*Dentry, error) {
- dst, ok := dstDir.inode.(interface{}).(*OrderedChildren)
+func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error {
+ dst, ok := dstDir.(interface{}).(*OrderedChildren)
if !ok {
- return nil, renameAcrossDifferentImplementationsError{}
+ return renameAcrossDifferentImplementationsError{}
}
if !o.writable || !dst.writable {
- return nil, syserror.EPERM
+ return syserror.EPERM
}
// Note: There's a potential deadlock below if concurrent calls to Rename
// refer to the same src and dst directories in reverse. We avoid any
@@ -536,12 +563,12 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c
defer dst.mu.Unlock()
}
if err := o.checkExistingLocked(oldname, child); err != nil {
- return nil, err
+ return err
}
// TODO(gvisor.dev/issue/3027): Check sticky bit before removing.
- replaced := dst.replaceChildLocked(newname, child)
- return replaced, nil
+ dst.replaceChildLocked(ctx, newname, child)
+ return nil
}
// nthLocked returns an iterator to the nth child tracked by this object. The
@@ -576,11 +603,12 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry,
//
// +stateify savable
type StaticDirectory struct {
+ InodeAlwaysValid
InodeAttrs
InodeDirectoryNoNewChildren
- InodeNoDynamicLookup
InodeNoStatFS
InodeNotSymlink
+ InodeTemporary
OrderedChildren
StaticDirectoryRefs
@@ -591,19 +619,16 @@ type StaticDirectory struct {
var _ Inode = (*StaticDirectory)(nil)
// NewStaticDir creates a new static directory and returns its dentry.
-func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*Dentry, fdOpts GenericDirectoryFDOptions) *Dentry {
+func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]Inode, fdOpts GenericDirectoryFDOptions) Inode {
inode := &StaticDirectory{}
inode.Init(creds, devMajor, devMinor, ino, perm, fdOpts)
inode.EnableLeakCheck()
- dentry := &Dentry{}
- dentry.Init(inode)
-
inode.OrderedChildren.Init(OrderedChildrenOptions{})
- links := inode.OrderedChildren.Populate(dentry, children)
+ links := inode.OrderedChildren.Populate(children)
inode.IncLinks(links)
- return dentry
+ return inode
}
// Init initializes StaticDirectory.
@@ -615,7 +640,7 @@ func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint3
s.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeDirectory|perm)
}
-// Open implements kernfs.Inode.Open.
+// Open implements Inode.Open.
func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := NewGenericDirectoryFD(rp.Mount(), d, &s.OrderedChildren, &s.locks, &opts, s.fdOpts)
if err != nil {
@@ -624,26 +649,36 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *De
return fd.VFSFileDescription(), nil
}
-// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
+// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
return syserror.EPERM
}
-// DecRef implements kernfs.Inode.DecRef.
-func (s *StaticDirectory) DecRef(context.Context) {
- s.StaticDirectoryRefs.DecRef(s.Destroy)
+// DecRef implements Inode.DecRef.
+func (s *StaticDirectory) DecRef(ctx context.Context) {
+ s.StaticDirectoryRefs.DecRef(func() { s.Destroy(ctx) })
}
-// AlwaysValid partially implements kernfs.inodeDynamicLookup.
+// InodeAlwaysValid partially implements Inode.
//
// +stateify savable
-type AlwaysValid struct{}
+type InodeAlwaysValid struct{}
-// Valid implements kernfs.inodeDynamicLookup.Valid.
-func (*AlwaysValid) Valid(context.Context) bool {
+// Valid implements Inode.Valid.
+func (*InodeAlwaysValid) Valid(context.Context) bool {
return true
}
+// InodeTemporary partially implements Inode.
+//
+// +stateify savable
+type InodeTemporary struct{}
+
+// Keep implements Inode.Keep.
+func (*InodeTemporary) Keep() bool {
+ return false
+}
+
// InodeNoStatFS partially implements the Inode interface, where the client
// filesystem doesn't support statfs(2).
//
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 6d3d79333..606081e68 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -29,12 +29,16 @@
//
// Reference Model:
//
-// Kernfs dentries represents named pointers to inodes. Dentries and inodes have
+// Kernfs dentries represents named pointers to inodes. Kernfs is solely
+// reponsible for maintaining and modifying its dentry tree; inode
+// implementations can not access the tree. Dentries and inodes have
// independent lifetimes and reference counts. A child dentry unconditionally
// holds a reference on its parent directory's dentry. A dentry also holds a
-// reference on the inode it points to. Multiple dentries can point to the same
-// inode (for example, in the case of hardlinks). File descriptors hold a
-// reference to the dentry they're opened on.
+// reference on the inode it points to (although that might not be the only
+// reference on the inode). Due to this inodes can outlive the dentries that
+// point to them. Multiple dentries can point to the same inode (for example,
+// in the case of hardlinks). File descriptors hold a reference to the dentry
+// they're opened on.
//
// Dentries are guaranteed to exist while holding Filesystem.mu for
// reading. Dropping dentries require holding Filesystem.mu for writing. To
@@ -47,8 +51,8 @@
// kernfs.Dentry.dirMu
// vfs.VirtualFilesystem.mountMu
// vfs.Dentry.mu
-// kernfs.Filesystem.droppedDentriesMu
// (inode implementation locks, if any)
+// kernfs.Filesystem.droppedDentriesMu
package kernfs
import (
@@ -60,7 +64,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory
@@ -95,7 +98,7 @@ type Filesystem struct {
// example:
//
// fs.mu.RLock()
- // fs.mu.processDeferredDecRefs()
+ // defer fs.processDeferredDecRefs()
// defer fs.mu.RUnlock()
// ...
// fs.deferDecRef(dentry)
@@ -108,8 +111,7 @@ type Filesystem struct {
// deferDecRef defers dropping a dentry ref until the next call to
// processDeferredDecRefs{,Locked}. See comment on Filesystem.mu.
-//
-// Precondition: d must not already be pending destruction.
+// This may be called while Filesystem.mu or Dentry.dirMu is locked.
func (fs *Filesystem) deferDecRef(d *Dentry) {
fs.droppedDentriesMu.Lock()
fs.droppedDentries = append(fs.droppedDentries, d)
@@ -118,17 +120,14 @@ func (fs *Filesystem) deferDecRef(d *Dentry) {
// processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the
// droppedDentries list. See comment on Filesystem.mu.
+//
+// Precondition: Filesystem.mu or Dentry.dirMu must NOT be locked.
func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) {
- fs.mu.Lock()
- fs.processDeferredDecRefsLocked(ctx)
- fs.mu.Unlock()
-}
-
-// Precondition: fs.mu must be held for writing.
-func (fs *Filesystem) processDeferredDecRefsLocked(ctx context.Context) {
fs.droppedDentriesMu.Lock()
for _, d := range fs.droppedDentries {
- d.DecRef(ctx)
+ // Defer the DecRef call so that we are not holding droppedDentriesMu
+ // when DecRef is called.
+ defer d.DecRef(ctx)
}
fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse.
fs.droppedDentriesMu.Unlock()
@@ -157,17 +156,19 @@ const (
//
// A kernfs dentry is similar to a dentry in a traditional filesystem: it's a
// named reference to an inode. A dentry generally lives as long as it's part of
-// a mounted filesystem tree. Kernfs doesn't cache dentries once all references
-// to them are removed. Dentries hold a single reference to the inode they point
+// a mounted filesystem tree. Kernfs drops dentries once all references to them
+// are dropped. Dentries hold a single reference to the inode they point
// to, and child dentries hold a reference on their parent.
//
// Must be initialized by Init prior to first use.
//
// +stateify savable
type Dentry struct {
+ vfsd vfs.Dentry
DentryRefs
- vfsd vfs.Dentry
+ // fs is the owning filesystem. fs is immutable.
+ fs *Filesystem
// flags caches useful information about the dentry from the inode. See the
// dflags* consts above. Must be accessed by atomic ops.
@@ -192,8 +193,9 @@ type Dentry struct {
// Precondition: Caller must hold a reference on inode.
//
// Postcondition: Caller's reference on inode is transferred to the dentry.
-func (d *Dentry) Init(inode Inode) {
+func (d *Dentry) Init(fs *Filesystem, inode Inode) {
d.vfsd.Init(d)
+ d.fs = fs
d.inode = inode
ftype := inode.Mode().FileType()
if ftype == linux.ModeDirectory {
@@ -222,14 +224,28 @@ func (d *Dentry) isSymlink() bool {
// DecRef implements vfs.DentryImpl.DecRef.
func (d *Dentry) DecRef(ctx context.Context) {
- // Before the destructor is called, Dentry must be removed from VFS' dentry cache.
+ decRefParent := false
+ d.fs.mu.Lock()
d.DentryRefs.DecRef(func() {
d.inode.DecRef(ctx) // IncRef from Init.
d.inode = nil
if d.parent != nil {
- d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild.
+ // We will DecRef d.parent once all locks are dropped.
+ decRefParent = true
+ d.parent.dirMu.Lock()
+ // Remove d from parent.children. It might already have been
+ // removed due to invalidation.
+ if _, ok := d.parent.children[d.name]; ok {
+ delete(d.parent.children, d.name)
+ d.fs.VFSFilesystem().VirtualFilesystem().InvalidateDentry(ctx, d.VFSDentry())
+ }
+ d.parent.dirMu.Unlock()
}
})
+ d.fs.mu.Unlock()
+ if decRefParent {
+ d.parent.DecRef(ctx) // IncRef from Dentry.insertChild.
+ }
}
// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
@@ -247,26 +263,26 @@ func (d *Dentry) Watches() *vfs.Watches {
// OnZeroWatches implements vfs.Dentry.OnZeroWatches.
func (d *Dentry) OnZeroWatches(context.Context) {}
-// InsertChild inserts child into the vfs dentry cache with the given name under
+// insertChild inserts child into the vfs dentry cache with the given name under
// this dentry. This does not update the directory inode, so calling this on its
// own isn't sufficient to insert a child into a directory.
//
// Precondition: d must represent a directory inode.
-func (d *Dentry) InsertChild(name string, child *Dentry) {
+func (d *Dentry) insertChild(name string, child *Dentry) {
d.dirMu.Lock()
- d.InsertChildLocked(name, child)
+ d.insertChildLocked(name, child)
d.dirMu.Unlock()
}
-// InsertChildLocked is equivalent to InsertChild, with additional
+// insertChildLocked is equivalent to insertChild, with additional
// preconditions.
//
// Preconditions:
// * d must represent a directory inode.
// * d.dirMu must be locked.
-func (d *Dentry) InsertChildLocked(name string, child *Dentry) {
+func (d *Dentry) insertChildLocked(name string, child *Dentry) {
if !d.isDir() {
- panic(fmt.Sprintf("InsertChildLocked called on non-directory Dentry: %+v.", d))
+ panic(fmt.Sprintf("insertChildLocked called on non-directory Dentry: %+v.", d))
}
d.IncRef() // DecRef in child's Dentry.destroy.
child.parent = d
@@ -277,36 +293,6 @@ func (d *Dentry) InsertChildLocked(name string, child *Dentry) {
d.children[name] = child
}
-// RemoveChild removes child from the vfs dentry cache. This does not update the
-// directory inode or modify the inode to be unlinked. So calling this on its own
-// isn't sufficient to remove a child from a directory.
-//
-// Precondition: d must represent a directory inode.
-func (d *Dentry) RemoveChild(name string, child *Dentry) error {
- d.dirMu.Lock()
- defer d.dirMu.Unlock()
- return d.RemoveChildLocked(name, child)
-}
-
-// RemoveChildLocked is equivalent to RemoveChild, with additional
-// preconditions.
-//
-// Precondition: d.dirMu must be locked.
-func (d *Dentry) RemoveChildLocked(name string, child *Dentry) error {
- if !d.isDir() {
- panic(fmt.Sprintf("RemoveChild called on non-directory Dentry: %+v.", d))
- }
- c, ok := d.children[name]
- if !ok {
- return syserror.ENOENT
- }
- if c != child {
- panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! Child: %+v, vfs: %+v", c, child))
- }
- delete(d.children, name)
- return nil
-}
-
// Inode returns the dentry's inode.
func (d *Dentry) Inode() Inode {
return d.inode
@@ -348,11 +334,6 @@ type Inode interface {
// a blanket implementation for all non-directory inodes.
inodeDirectory
- // Method for inodes that represent dynamic directories and their
- // children. InodeNoDynamicLookup provides a blanket implementation for all
- // non-dynamic-directory inodes.
- inodeDynamicLookup
-
// Open creates a file description for the filesystem object represented by
// this inode. The returned file description should hold a reference on the
// dentry for its lifetime.
@@ -365,6 +346,14 @@ type Inode interface {
// corresponds to vfs.FilesystemImpl.StatFSAt. If the client filesystem
// doesn't support statfs(2), this should return ENOSYS.
StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error)
+
+ // Keep indicates whether the dentry created after Inode.Lookup should be
+ // kept in the kernfs dentry tree.
+ Keep() bool
+
+ // Valid should return true if this inode is still valid, or needs to
+ // be resolved again by a call to Lookup.
+ Valid(ctx context.Context) bool
}
type inodeRefs interface {
@@ -397,8 +386,8 @@ type inodeMetadata interface {
// Precondition: All methods in this interface may only be called on directory
// inodes.
type inodeDirectory interface {
- // The New{File,Dir,Node,Symlink} methods below should return a new inode
- // hashed into this inode.
+ // The New{File,Dir,Node,Link,Symlink} methods below should return a new inode
+ // that will be hashed into the dentry tree.
//
// These inode constructors are inode-level operations rather than
// filesystem-level operations to allow client filesystems to mix different
@@ -409,60 +398,54 @@ type inodeDirectory interface {
HasChildren() bool
// NewFile creates a new regular file inode.
- NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error)
+ NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error)
// NewDir creates a new directory inode.
- NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error)
+ NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error)
// NewLink creates a new hardlink to a specified inode in this
// directory. Implementations should create a new kernfs Dentry pointing to
// target, and update target's link count.
- NewLink(ctx context.Context, name string, target Inode) (*Dentry, error)
+ NewLink(ctx context.Context, name string, target Inode) (Inode, error)
// NewSymlink creates a new symbolic link inode.
- NewSymlink(ctx context.Context, name, target string) (*Dentry, error)
+ NewSymlink(ctx context.Context, name, target string) (Inode, error)
// NewNode creates a new filesystem node for a mknod syscall.
- NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error)
+ NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error)
// Unlink removes a child dentry from this directory inode.
- Unlink(ctx context.Context, name string, child *Dentry) error
+ Unlink(ctx context.Context, name string, child Inode) error
// RmDir removes an empty child directory from this directory
// inode. Implementations must update the parent directory's link count,
// if required. Implementations are not responsible for checking that child
// is a directory, checking for an empty directory.
- RmDir(ctx context.Context, name string, child *Dentry) error
+ RmDir(ctx context.Context, name string, child Inode) error
// Rename is called on the source directory containing an inode being
// renamed. child should point to the resolved child in the source
- // directory. If Rename replaces a dentry in the destination directory, it
- // should return the replaced dentry or nil otherwise.
+ // directory.
//
// Precondition: Caller must serialize concurrent calls to Rename.
- Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (replaced *Dentry, err error)
-}
+ Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error
-type inodeDynamicLookup interface {
- // Lookup should return an appropriate dentry if name should resolve to a
- // child of this dynamic directory inode. This gives the directory an
- // opportunity on every lookup to resolve additional entries that aren't
- // hashed into the directory. This is only called when the inode is a
- // directory. If the inode is not a directory, or if the directory only
- // contains a static set of children, the implementer can unconditionally
- // return an appropriate error (ENOTDIR and ENOENT respectively).
+ // Lookup should return an appropriate inode if name should resolve to a
+ // child of this directory inode. This gives the directory an opportunity
+ // on every lookup to resolve additional entries. This is only called when
+ // the inode is a directory.
//
- // The child returned by Lookup will be hashed into the VFS dentry tree. Its
- // lifetime can be controlled by the filesystem implementation with an
- // appropriate implementation of Valid.
+ // The child returned by Lookup will be hashed into the VFS dentry tree,
+ // atleast for the duration of the current FS operation.
//
- // Lookup returns the child with an extra reference and the caller owns this
- // reference.
- Lookup(ctx context.Context, name string) (*Dentry, error)
-
- // Valid should return true if this inode is still valid, or needs to
- // be resolved again by a call to Lookup.
- Valid(ctx context.Context) bool
+ // Lookup must return the child with an extra reference whose ownership is
+ // transferred to the dentry that is created to point to that inode. If
+ // Inode.Keep returns false, that new dentry will be dropped at the end of
+ // the current filesystem operation (before returning back to the VFS
+ // layer) if no other ref is picked on that dentry. If Inode.Keep returns
+ // true, then the dentry will be cached into the dentry tree until it is
+ // Unlink'd or RmDir'd.
+ Lookup(ctx context.Context, name string) (Inode, error)
// IterDirents is used to iterate over dynamically created entries. It invokes
// cb on each entry in the directory represented by the Inode.
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
index 7d9420725..f87782ee1 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
@@ -182,18 +182,22 @@ func (i *InodeNoopRefCount) StateTypeName() string {
}
func (i *InodeNoopRefCount) StateFields() []string {
- return []string{}
+ return []string{
+ "InodeTemporary",
+ }
}
func (i *InodeNoopRefCount) beforeSave() {}
func (i *InodeNoopRefCount) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
+ stateSinkObject.Save(0, &i.InodeTemporary)
}
func (i *InodeNoopRefCount) afterLoad() {}
func (i *InodeNoopRefCount) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &i.InodeTemporary)
}
func (i *InodeDirectoryNoNewChildren) StateTypeName() string {
@@ -220,37 +224,22 @@ func (i *InodeNotDirectory) StateTypeName() string {
}
func (i *InodeNotDirectory) StateFields() []string {
- return []string{}
+ return []string{
+ "InodeAlwaysValid",
+ }
}
func (i *InodeNotDirectory) beforeSave() {}
func (i *InodeNotDirectory) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
+ stateSinkObject.Save(0, &i.InodeAlwaysValid)
}
func (i *InodeNotDirectory) afterLoad() {}
func (i *InodeNotDirectory) StateLoad(stateSourceObject state.Source) {
-}
-
-func (i *InodeNoDynamicLookup) StateTypeName() string {
- return "pkg/sentry/fsimpl/kernfs.InodeNoDynamicLookup"
-}
-
-func (i *InodeNoDynamicLookup) StateFields() []string {
- return []string{}
-}
-
-func (i *InodeNoDynamicLookup) beforeSave() {}
-
-func (i *InodeNoDynamicLookup) StateSave(stateSinkObject state.Sink) {
- i.beforeSave()
-}
-
-func (i *InodeNoDynamicLookup) afterLoad() {}
-
-func (i *InodeNoDynamicLookup) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &i.InodeAlwaysValid)
}
func (i *InodeNotSymlink) StateTypeName() string {
@@ -319,8 +308,9 @@ func (s *slot) StateTypeName() string {
func (s *slot) StateFields() []string {
return []string{
- "Name",
- "Dentry",
+ "name",
+ "inode",
+ "static",
"slotEntry",
}
}
@@ -329,17 +319,19 @@ func (s *slot) beforeSave() {}
func (s *slot) StateSave(stateSinkObject state.Sink) {
s.beforeSave()
- stateSinkObject.Save(0, &s.Name)
- stateSinkObject.Save(1, &s.Dentry)
- stateSinkObject.Save(2, &s.slotEntry)
+ stateSinkObject.Save(0, &s.name)
+ stateSinkObject.Save(1, &s.inode)
+ stateSinkObject.Save(2, &s.static)
+ stateSinkObject.Save(3, &s.slotEntry)
}
func (s *slot) afterLoad() {}
func (s *slot) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &s.Name)
- stateSourceObject.Load(1, &s.Dentry)
- stateSourceObject.Load(2, &s.slotEntry)
+ stateSourceObject.Load(0, &s.name)
+ stateSourceObject.Load(1, &s.inode)
+ stateSourceObject.Load(2, &s.static)
+ stateSourceObject.Load(3, &s.slotEntry)
}
func (o *OrderedChildrenOptions) StateTypeName() string {
@@ -442,11 +434,12 @@ func (s *StaticDirectory) StateTypeName() string {
func (s *StaticDirectory) StateFields() []string {
return []string{
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
- "InodeNoDynamicLookup",
"InodeNoStatFS",
"InodeNotSymlink",
+ "InodeTemporary",
"OrderedChildren",
"StaticDirectoryRefs",
"locks",
@@ -458,48 +451,69 @@ func (s *StaticDirectory) beforeSave() {}
func (s *StaticDirectory) StateSave(stateSinkObject state.Sink) {
s.beforeSave()
- stateSinkObject.Save(0, &s.InodeAttrs)
- stateSinkObject.Save(1, &s.InodeDirectoryNoNewChildren)
- stateSinkObject.Save(2, &s.InodeNoDynamicLookup)
+ stateSinkObject.Save(0, &s.InodeAlwaysValid)
+ stateSinkObject.Save(1, &s.InodeAttrs)
+ stateSinkObject.Save(2, &s.InodeDirectoryNoNewChildren)
stateSinkObject.Save(3, &s.InodeNoStatFS)
stateSinkObject.Save(4, &s.InodeNotSymlink)
- stateSinkObject.Save(5, &s.OrderedChildren)
- stateSinkObject.Save(6, &s.StaticDirectoryRefs)
- stateSinkObject.Save(7, &s.locks)
- stateSinkObject.Save(8, &s.fdOpts)
+ stateSinkObject.Save(5, &s.InodeTemporary)
+ stateSinkObject.Save(6, &s.OrderedChildren)
+ stateSinkObject.Save(7, &s.StaticDirectoryRefs)
+ stateSinkObject.Save(8, &s.locks)
+ stateSinkObject.Save(9, &s.fdOpts)
}
func (s *StaticDirectory) afterLoad() {}
func (s *StaticDirectory) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &s.InodeAttrs)
- stateSourceObject.Load(1, &s.InodeDirectoryNoNewChildren)
- stateSourceObject.Load(2, &s.InodeNoDynamicLookup)
+ stateSourceObject.Load(0, &s.InodeAlwaysValid)
+ stateSourceObject.Load(1, &s.InodeAttrs)
+ stateSourceObject.Load(2, &s.InodeDirectoryNoNewChildren)
stateSourceObject.Load(3, &s.InodeNoStatFS)
stateSourceObject.Load(4, &s.InodeNotSymlink)
- stateSourceObject.Load(5, &s.OrderedChildren)
- stateSourceObject.Load(6, &s.StaticDirectoryRefs)
- stateSourceObject.Load(7, &s.locks)
- stateSourceObject.Load(8, &s.fdOpts)
+ stateSourceObject.Load(5, &s.InodeTemporary)
+ stateSourceObject.Load(6, &s.OrderedChildren)
+ stateSourceObject.Load(7, &s.StaticDirectoryRefs)
+ stateSourceObject.Load(8, &s.locks)
+ stateSourceObject.Load(9, &s.fdOpts)
}
-func (a *AlwaysValid) StateTypeName() string {
- return "pkg/sentry/fsimpl/kernfs.AlwaysValid"
+func (i *InodeAlwaysValid) StateTypeName() string {
+ return "pkg/sentry/fsimpl/kernfs.InodeAlwaysValid"
}
-func (a *AlwaysValid) StateFields() []string {
+func (i *InodeAlwaysValid) StateFields() []string {
return []string{}
}
-func (a *AlwaysValid) beforeSave() {}
+func (i *InodeAlwaysValid) beforeSave() {}
-func (a *AlwaysValid) StateSave(stateSinkObject state.Sink) {
- a.beforeSave()
+func (i *InodeAlwaysValid) StateSave(stateSinkObject state.Sink) {
+ i.beforeSave()
}
-func (a *AlwaysValid) afterLoad() {}
+func (i *InodeAlwaysValid) afterLoad() {}
-func (a *AlwaysValid) StateLoad(stateSourceObject state.Source) {
+func (i *InodeAlwaysValid) StateLoad(stateSourceObject state.Source) {
+}
+
+func (i *InodeTemporary) StateTypeName() string {
+ return "pkg/sentry/fsimpl/kernfs.InodeTemporary"
+}
+
+func (i *InodeTemporary) StateFields() []string {
+ return []string{}
+}
+
+func (i *InodeTemporary) beforeSave() {}
+
+func (i *InodeTemporary) StateSave(stateSinkObject state.Sink) {
+ i.beforeSave()
+}
+
+func (i *InodeTemporary) afterLoad() {}
+
+func (i *InodeTemporary) StateLoad(stateSourceObject state.Source) {
}
func (i *InodeNoStatFS) StateTypeName() string {
@@ -556,8 +570,9 @@ func (d *Dentry) StateTypeName() string {
func (d *Dentry) StateFields() []string {
return []string{
- "DentryRefs",
"vfsd",
+ "DentryRefs",
+ "fs",
"flags",
"parent",
"name",
@@ -570,25 +585,27 @@ func (d *Dentry) beforeSave() {}
func (d *Dentry) StateSave(stateSinkObject state.Sink) {
d.beforeSave()
- stateSinkObject.Save(0, &d.DentryRefs)
- stateSinkObject.Save(1, &d.vfsd)
- stateSinkObject.Save(2, &d.flags)
- stateSinkObject.Save(3, &d.parent)
- stateSinkObject.Save(4, &d.name)
- stateSinkObject.Save(5, &d.children)
- stateSinkObject.Save(6, &d.inode)
+ stateSinkObject.Save(0, &d.vfsd)
+ stateSinkObject.Save(1, &d.DentryRefs)
+ stateSinkObject.Save(2, &d.fs)
+ stateSinkObject.Save(3, &d.flags)
+ stateSinkObject.Save(4, &d.parent)
+ stateSinkObject.Save(5, &d.name)
+ stateSinkObject.Save(6, &d.children)
+ stateSinkObject.Save(7, &d.inode)
}
func (d *Dentry) afterLoad() {}
func (d *Dentry) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &d.DentryRefs)
- stateSourceObject.Load(1, &d.vfsd)
- stateSourceObject.Load(2, &d.flags)
- stateSourceObject.Load(3, &d.parent)
- stateSourceObject.Load(4, &d.name)
- stateSourceObject.Load(5, &d.children)
- stateSourceObject.Load(6, &d.inode)
+ stateSourceObject.Load(0, &d.vfsd)
+ stateSourceObject.Load(1, &d.DentryRefs)
+ stateSourceObject.Load(2, &d.fs)
+ stateSourceObject.Load(3, &d.flags)
+ stateSourceObject.Load(4, &d.parent)
+ stateSourceObject.Load(5, &d.name)
+ stateSourceObject.Load(6, &d.children)
+ stateSourceObject.Load(7, &d.inode)
}
func (l *slotList) StateTypeName() string {
@@ -707,12 +724,12 @@ func (dir *syntheticDirectory) StateTypeName() string {
func (dir *syntheticDirectory) StateFields() []string {
return []string{
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeNoStatFS",
- "InodeNoopRefCount",
- "InodeNoDynamicLookup",
"InodeNotSymlink",
"OrderedChildren",
+ "syntheticDirectoryRefs",
"locks",
}
}
@@ -721,27 +738,50 @@ func (dir *syntheticDirectory) beforeSave() {}
func (dir *syntheticDirectory) StateSave(stateSinkObject state.Sink) {
dir.beforeSave()
- stateSinkObject.Save(0, &dir.InodeAttrs)
- stateSinkObject.Save(1, &dir.InodeNoStatFS)
- stateSinkObject.Save(2, &dir.InodeNoopRefCount)
- stateSinkObject.Save(3, &dir.InodeNoDynamicLookup)
- stateSinkObject.Save(4, &dir.InodeNotSymlink)
- stateSinkObject.Save(5, &dir.OrderedChildren)
+ stateSinkObject.Save(0, &dir.InodeAlwaysValid)
+ stateSinkObject.Save(1, &dir.InodeAttrs)
+ stateSinkObject.Save(2, &dir.InodeNoStatFS)
+ stateSinkObject.Save(3, &dir.InodeNotSymlink)
+ stateSinkObject.Save(4, &dir.OrderedChildren)
+ stateSinkObject.Save(5, &dir.syntheticDirectoryRefs)
stateSinkObject.Save(6, &dir.locks)
}
func (dir *syntheticDirectory) afterLoad() {}
func (dir *syntheticDirectory) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &dir.InodeAttrs)
- stateSourceObject.Load(1, &dir.InodeNoStatFS)
- stateSourceObject.Load(2, &dir.InodeNoopRefCount)
- stateSourceObject.Load(3, &dir.InodeNoDynamicLookup)
- stateSourceObject.Load(4, &dir.InodeNotSymlink)
- stateSourceObject.Load(5, &dir.OrderedChildren)
+ stateSourceObject.Load(0, &dir.InodeAlwaysValid)
+ stateSourceObject.Load(1, &dir.InodeAttrs)
+ stateSourceObject.Load(2, &dir.InodeNoStatFS)
+ stateSourceObject.Load(3, &dir.InodeNotSymlink)
+ stateSourceObject.Load(4, &dir.OrderedChildren)
+ stateSourceObject.Load(5, &dir.syntheticDirectoryRefs)
stateSourceObject.Load(6, &dir.locks)
}
+func (r *syntheticDirectoryRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/kernfs.syntheticDirectoryRefs"
+}
+
+func (r *syntheticDirectoryRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (r *syntheticDirectoryRefs) beforeSave() {}
+
+func (r *syntheticDirectoryRefs) StateSave(stateSinkObject state.Sink) {
+ r.beforeSave()
+ stateSinkObject.Save(0, &r.refCount)
+}
+
+func (r *syntheticDirectoryRefs) afterLoad() {}
+
+func (r *syntheticDirectoryRefs) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &r.refCount)
+}
+
func init() {
state.Register((*DentryRefs)(nil))
state.Register((*DynamicBytesFile)(nil))
@@ -752,7 +792,6 @@ func init() {
state.Register((*InodeNoopRefCount)(nil))
state.Register((*InodeDirectoryNoNewChildren)(nil))
state.Register((*InodeNotDirectory)(nil))
- state.Register((*InodeNoDynamicLookup)(nil))
state.Register((*InodeNotSymlink)(nil))
state.Register((*InodeAttrs)(nil))
state.Register((*slot)(nil))
@@ -761,7 +800,8 @@ func init() {
state.Register((*renameAcrossDifferentImplementationsError)(nil))
state.Register((*InodeSymlink)(nil))
state.Register((*StaticDirectory)(nil))
- state.Register((*AlwaysValid)(nil))
+ state.Register((*InodeAlwaysValid)(nil))
+ state.Register((*InodeTemporary)(nil))
state.Register((*InodeNoStatFS)(nil))
state.Register((*Filesystem)(nil))
state.Register((*Dentry)(nil))
@@ -770,4 +810,5 @@ func init() {
state.Register((*StaticDirectoryRefs)(nil))
state.Register((*StaticSymlink)(nil))
state.Register((*syntheticDirectory)(nil))
+ state.Register((*syntheticDirectoryRefs)(nil))
}
diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go
index 58a93eaac..934cc6c9e 100644
--- a/pkg/sentry/fsimpl/kernfs/symlink.go
+++ b/pkg/sentry/fsimpl/kernfs/symlink.go
@@ -38,13 +38,10 @@ type StaticSymlink struct {
var _ Inode = (*StaticSymlink)(nil)
// NewStaticSymlink creates a new symlink file pointing to 'target'.
-func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) *Dentry {
+func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) Inode {
inode := &StaticSymlink{}
inode.Init(creds, devMajor, devMinor, ino, target)
-
- d := &Dentry{}
- d.Init(inode)
- return d
+ return inode
}
// Init initializes the instance.
diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
index ea7f073eb..d0ed17b18 100644
--- a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
+++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
@@ -29,24 +29,22 @@ import (
//
// +stateify savable
type syntheticDirectory struct {
+ InodeAlwaysValid
InodeAttrs
InodeNoStatFS
- InodeNoopRefCount
- InodeNoDynamicLookup
InodeNotSymlink
OrderedChildren
+ syntheticDirectoryRefs
locks vfs.FileLocks
}
var _ Inode = (*syntheticDirectory)(nil)
-func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) *Dentry {
+func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) Inode {
inode := &syntheticDirectory{}
inode.Init(creds, 0 /* devMajor */, 0 /* devMinor */, 0 /* ino */, perm)
- d := &Dentry{}
- d.Init(inode)
- return d
+ return inode
}
func (dir *syntheticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
@@ -69,34 +67,46 @@ func (dir *syntheticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath,
}
// NewFile implements Inode.NewFile.
-func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error) {
+func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) {
return nil, syserror.EPERM
}
// NewDir implements Inode.NewDir.
-func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error) {
+func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) {
if !opts.ForSyntheticMountpoint {
return nil, syserror.EPERM
}
- subdird := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask)
- if err := dir.OrderedChildren.Insert(name, subdird); err != nil {
- subdird.DecRef(ctx)
+ subdirI := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask)
+ if err := dir.OrderedChildren.Insert(name, subdirI); err != nil {
+ subdirI.DecRef(ctx)
return nil, err
}
- return subdird, nil
+ return subdirI, nil
}
// NewLink implements Inode.NewLink.
-func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (*Dentry, error) {
+func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (Inode, error) {
return nil, syserror.EPERM
}
// NewSymlink implements Inode.NewSymlink.
-func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (*Dentry, error) {
+func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (Inode, error) {
return nil, syserror.EPERM
}
// NewNode implements Inode.NewNode.
-func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error) {
+func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) {
return nil, syserror.EPERM
}
+
+// DecRef implements Inode.DecRef.
+func (dir *syntheticDirectory) DecRef(ctx context.Context) {
+ dir.syntheticDirectoryRefs.DecRef(func() { dir.Destroy(ctx) })
+}
+
+// Keep implements Inode.Keep. This is redundant because inodes will never be
+// created via Lookup and inodes are always valid. Makes sense to return true
+// because these inodes are not temporary and should only be removed on RmDir.
+func (dir *syntheticDirectory) Keep() bool {
+ return true
+}
diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go
new file mode 100644
index 000000000..28d556b42
--- /dev/null
+++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go
@@ -0,0 +1,118 @@
+package kernfs
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var syntheticDirectoryownerType *syntheticDirectory
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type syntheticDirectoryRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *syntheticDirectoryRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, syntheticDirectoryownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *syntheticDirectoryRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*syntheticDirectoryRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *syntheticDirectoryRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *syntheticDirectoryRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, syntheticDirectoryownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *syntheticDirectoryRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *syntheticDirectoryRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, syntheticDirectoryownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}