From fa995da840f129d6ced03d66f4f7a747ef4df0bf Mon Sep 17 00:00:00 2001 From: Dean Deng Date: Sun, 27 Sep 2020 15:32:29 -0700 Subject: Fix kernfs race condition. Do not release dirMu between checking whether to create a child and actually inserting it. Also fixes a bug in fusefs which was causing it to deadlock under the new lock ordering. We do not need to call kernfs.Dentry.InsertChild from newEntry because it will always be called at the kernfs filesystem layer. Updates #1193. PiperOrigin-RevId: 334049264 --- pkg/sentry/fsimpl/kernfs/filesystem.go | 24 +++++++++++++++++++----- pkg/sentry/fsimpl/kernfs/kernfs.go | 21 +++++++++++++-------- 2 files changed, 32 insertions(+), 13 deletions(-) (limited to 'pkg/sentry') diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 11dc24b07..5cc1c4281 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -207,7 +207,6 @@ func checkCreateLocked(ctx context.Context, rp *vfs.ResolvingPath, parent *Dentr if len(pc) > linux.NAME_MAX { return "", syserror.ENAMETOOLONG } - // FIXME(gvisor.dev/issue/1193): Data race due to not holding dirMu. if _, ok := parent.children[pc]; ok { return "", syserror.EEXIST } @@ -305,6 +304,9 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. if err != nil { return err } + + parent.dirMu.Lock() + defer parent.dirMu.Unlock() pc, err := checkCreateLocked(ctx, rp, parent) if err != nil { return err @@ -326,7 +328,7 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. if err != nil { return err } - parent.InsertChild(pc, child) + parent.InsertChildLocked(pc, child) return nil } @@ -342,6 +344,9 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v if err != nil { return err } + + parent.dirMu.Lock() + defer parent.dirMu.Unlock() pc, err := checkCreateLocked(ctx, rp, parent) if err != nil { return err @@ -357,7 +362,7 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v } child = newSyntheticDirectory(rp.Credentials(), opts.Mode) } - parent.InsertChild(pc, child) + parent.InsertChildLocked(pc, child) return nil } @@ -373,6 +378,9 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v if err != nil { return err } + + parent.dirMu.Lock() + defer parent.dirMu.Unlock() pc, err := checkCreateLocked(ctx, rp, parent) if err != nil { return err @@ -385,7 +393,7 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v if err != nil { return err } - parent.InsertChild(pc, newD) + parent.InsertChildLocked(pc, newD) return nil } @@ -483,6 +491,9 @@ afterTrailingSymlink: if err != nil { return nil, err } + // FIXME(gvisor.dev/issue/1193): Race between checking existence with + // fs.stepExistingLocked and parent.InsertChild. If possible, we should hold + // dirMu from one to the other. parent.InsertChild(pc, child) child.inode.IncRef() defer child.inode.DecRef(ctx) @@ -739,6 +750,9 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ if err != nil { return err } + parent.dirMu.Lock() + defer parent.dirMu.Unlock() + pc, err := checkCreateLocked(ctx, rp, parent) if err != nil { return err @@ -751,7 +765,7 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ if err != nil { return err } - parent.InsertChild(pc, child) + parent.InsertChildLocked(pc, child) return nil } diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index f543a2065..6d3d79333 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -29,7 +29,7 @@ // // Reference Model: // -// Kernfs dentries represents named pointers to inodes. Dentries and inode have +// Kernfs dentries represents named pointers to inodes. Dentries and inodes have // independent lifetimes and reference counts. A child dentry unconditionally // holds a reference on its parent directory's dentry. A dentry also holds a // reference on the inode it points to. Multiple dentries can point to the same @@ -177,6 +177,10 @@ type Dentry struct { name string // dirMu protects children and the names of child Dentries. + // + // Note that holding fs.mu for writing is not sufficient; + // revalidateChildLocked(), which is a very hot path, may modify children with + // fs.mu acquired for reading only. dirMu sync.Mutex `state:"nosave"` children map[string]*Dentry @@ -244,9 +248,8 @@ func (d *Dentry) Watches() *vfs.Watches { func (d *Dentry) OnZeroWatches(context.Context) {} // InsertChild inserts child into the vfs dentry cache with the given name under -// this dentry. This does not update the directory inode, so calling this on -// its own isn't sufficient to insert a child into a directory. InsertChild -// updates the link count on d if required. +// this dentry. This does not update the directory inode, so calling this on its +// own isn't sufficient to insert a child into a directory. // // Precondition: d must represent a directory inode. func (d *Dentry) InsertChild(name string, child *Dentry) { @@ -258,10 +261,12 @@ func (d *Dentry) InsertChild(name string, child *Dentry) { // InsertChildLocked is equivalent to InsertChild, with additional // preconditions. // -// Precondition: d.dirMu must be locked. +// Preconditions: +// * d must represent a directory inode. +// * d.dirMu must be locked. func (d *Dentry) InsertChildLocked(name string, child *Dentry) { if !d.isDir() { - panic(fmt.Sprintf("InsertChild called on non-directory Dentry: %+v.", d)) + panic(fmt.Sprintf("InsertChildLocked called on non-directory Dentry: %+v.", d)) } d.IncRef() // DecRef in child's Dentry.destroy. child.parent = d @@ -322,7 +327,6 @@ func (d *Dentry) Inode() Inode { // // - Checking that dentries passed to methods are of the appropriate file type. // - Checking permissions. -// - Updating link and reference counts. // // Specific responsibilities of implementations are documented below. type Inode interface { @@ -332,7 +336,8 @@ type Inode interface { inodeRefs // Methods related to node metadata. A generic implementation is provided by - // InodeAttrs. + // InodeAttrs. Note that a concrete filesystem using kernfs is responsible for + // managing link counts. inodeMetadata // Method for inodes that represent symlink. InodeNotSymlink provides a -- cgit v1.2.3