diff options
Diffstat (limited to 'pkg/sentry')
31 files changed, 1056 insertions, 850 deletions
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go index 903135fae..8e34e26df 100644 --- a/pkg/sentry/fsimpl/devpts/devpts.go +++ b/pkg/sentry/fsimpl/devpts/devpts.go @@ -87,7 +87,9 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds root.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|0555) root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) root.EnableLeakCheck() - root.dentry.Init(root) + + var rootD kernfs.Dentry + rootD.Init(&fs.Filesystem, root) // Construct the pts master inode and dentry. Linux always uses inode // id 2 for ptmx. See fs/devpts/inode.c:mknod_ptmx. @@ -95,15 +97,14 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds root: root, } master.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 2, linux.ModeCharacterDevice|0666) - master.dentry.Init(master) // Add the master as a child of the root. - links := root.OrderedChildren.Populate(&root.dentry, map[string]*kernfs.Dentry{ - "ptmx": &master.dentry, + links := root.OrderedChildren.Populate(map[string]kernfs.Inode{ + "ptmx": master, }) root.IncLinks(links) - return fs, &root.dentry, nil + return fs, &rootD, nil } // Release implements vfs.FilesystemImpl.Release. @@ -117,24 +118,19 @@ func (fs *filesystem) Release(ctx context.Context) { // +stateify savable type rootInode struct { implStatFS - kernfs.AlwaysValid + kernfs.InodeAlwaysValid kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren kernfs.InodeNotSymlink + kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid. kernfs.OrderedChildren rootInodeRefs locks vfs.FileLocks - // Keep a reference to this inode's dentry. - dentry kernfs.Dentry - // master is the master pty inode. Immutable. master *masterInode - // root is the root directory inode for this filesystem. Immutable. - root *rootInode - // mu protects the fields below. mu sync.Mutex `state:"nosave"` @@ -173,21 +169,24 @@ func (i *rootInode) allocateTerminal(creds *auth.Credentials) (*Terminal, error) // Linux always uses pty index + 3 as the inode id. See // fs/devpts/inode.c:devpts_pty_new(). replica.InodeAttrs.Init(creds, i.InodeAttrs.DevMajor(), i.InodeAttrs.DevMinor(), uint64(idx+3), linux.ModeCharacterDevice|0600) - replica.dentry.Init(replica) i.replicas[idx] = replica return t, nil } // masterClose is called when the master end of t is closed. -func (i *rootInode) masterClose(t *Terminal) { +func (i *rootInode) masterClose(ctx context.Context, t *Terminal) { i.mu.Lock() defer i.mu.Unlock() // Sanity check that replica with idx exists. - if _, ok := i.replicas[t.n]; !ok { + ri, ok := i.replicas[t.n] + if !ok { panic(fmt.Sprintf("pty with index %d does not exist", t.n)) } + + // Drop the ref on replica inode taken during rootInode.allocateTerminal. + ri.DecRef(ctx) delete(i.replicas, t.n) } @@ -203,16 +202,22 @@ func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.D } // Lookup implements kernfs.Inode.Lookup. -func (i *rootInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) { +func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { + // Check if a static entry was looked up. + if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil { + return d, nil + } + + // Not a static entry. idx, err := strconv.ParseUint(name, 10, 32) if err != nil { return nil, syserror.ENOENT } i.mu.Lock() defer i.mu.Unlock() - if si, ok := i.replicas[uint32(idx)]; ok { - si.dentry.IncRef() - return &si.dentry, nil + if ri, ok := i.replicas[uint32(idx)]; ok { + ri.IncRef() // This ref is passed to the dentry upon creation via Init. + return ri, nil } return nil, syserror.ENOENT @@ -243,8 +248,8 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, } // DecRef implements kernfs.Inode.DecRef. -func (i *rootInode) DecRef(context.Context) { - i.rootInodeRefs.DecRef(i.Destroy) +func (i *rootInode) DecRef(ctx context.Context) { + i.rootInodeRefs.DecRef(func() { i.Destroy(ctx) }) } // +stateify savable diff --git a/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go b/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go index d2c9ffa7d..12bb996cb 100644 --- a/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go +++ b/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go @@ -58,16 +58,15 @@ func (i *rootInode) StateTypeName() string { func (i *rootInode) StateFields() []string { return []string{ "implStatFS", - "AlwaysValid", + "InodeAlwaysValid", "InodeAttrs", "InodeDirectoryNoNewChildren", "InodeNotSymlink", + "InodeTemporary", "OrderedChildren", "rootInodeRefs", "locks", - "dentry", "master", - "root", "replicas", "nextIdx", } @@ -78,36 +77,34 @@ func (i *rootInode) beforeSave() {} func (i *rootInode) StateSave(stateSinkObject state.Sink) { i.beforeSave() stateSinkObject.Save(0, &i.implStatFS) - stateSinkObject.Save(1, &i.AlwaysValid) + stateSinkObject.Save(1, &i.InodeAlwaysValid) stateSinkObject.Save(2, &i.InodeAttrs) stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren) stateSinkObject.Save(4, &i.InodeNotSymlink) - stateSinkObject.Save(5, &i.OrderedChildren) - stateSinkObject.Save(6, &i.rootInodeRefs) - stateSinkObject.Save(7, &i.locks) - stateSinkObject.Save(8, &i.dentry) + stateSinkObject.Save(5, &i.InodeTemporary) + stateSinkObject.Save(6, &i.OrderedChildren) + stateSinkObject.Save(7, &i.rootInodeRefs) + stateSinkObject.Save(8, &i.locks) stateSinkObject.Save(9, &i.master) - stateSinkObject.Save(10, &i.root) - stateSinkObject.Save(11, &i.replicas) - stateSinkObject.Save(12, &i.nextIdx) + stateSinkObject.Save(10, &i.replicas) + stateSinkObject.Save(11, &i.nextIdx) } func (i *rootInode) afterLoad() {} func (i *rootInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.implStatFS) - stateSourceObject.Load(1, &i.AlwaysValid) + stateSourceObject.Load(1, &i.InodeAlwaysValid) stateSourceObject.Load(2, &i.InodeAttrs) stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren) stateSourceObject.Load(4, &i.InodeNotSymlink) - stateSourceObject.Load(5, &i.OrderedChildren) - stateSourceObject.Load(6, &i.rootInodeRefs) - stateSourceObject.Load(7, &i.locks) - stateSourceObject.Load(8, &i.dentry) + stateSourceObject.Load(5, &i.InodeTemporary) + stateSourceObject.Load(6, &i.OrderedChildren) + stateSourceObject.Load(7, &i.rootInodeRefs) + stateSourceObject.Load(8, &i.locks) stateSourceObject.Load(9, &i.master) - stateSourceObject.Load(10, &i.root) - stateSourceObject.Load(11, &i.replicas) - stateSourceObject.Load(12, &i.nextIdx) + stateSourceObject.Load(10, &i.replicas) + stateSourceObject.Load(11, &i.nextIdx) } func (i *implStatFS) StateTypeName() string { @@ -220,7 +217,6 @@ func (mi *masterInode) StateFields() []string { "InodeNotDirectory", "InodeNotSymlink", "locks", - "dentry", "root", } } @@ -235,8 +231,7 @@ func (mi *masterInode) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(3, &mi.InodeNotDirectory) stateSinkObject.Save(4, &mi.InodeNotSymlink) stateSinkObject.Save(5, &mi.locks) - stateSinkObject.Save(6, &mi.dentry) - stateSinkObject.Save(7, &mi.root) + stateSinkObject.Save(6, &mi.root) } func (mi *masterInode) afterLoad() {} @@ -248,8 +243,7 @@ func (mi *masterInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(3, &mi.InodeNotDirectory) stateSourceObject.Load(4, &mi.InodeNotSymlink) stateSourceObject.Load(5, &mi.locks) - stateSourceObject.Load(6, &mi.dentry) - stateSourceObject.Load(7, &mi.root) + stateSourceObject.Load(6, &mi.root) } func (mfd *masterFileDescription) StateTypeName() string { @@ -334,7 +328,6 @@ func (ri *replicaInode) StateFields() []string { "InodeNotDirectory", "InodeNotSymlink", "locks", - "dentry", "root", "t", } @@ -350,9 +343,8 @@ func (ri *replicaInode) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(3, &ri.InodeNotDirectory) stateSinkObject.Save(4, &ri.InodeNotSymlink) stateSinkObject.Save(5, &ri.locks) - stateSinkObject.Save(6, &ri.dentry) - stateSinkObject.Save(7, &ri.root) - stateSinkObject.Save(8, &ri.t) + stateSinkObject.Save(6, &ri.root) + stateSinkObject.Save(7, &ri.t) } func (ri *replicaInode) afterLoad() {} @@ -364,9 +356,8 @@ func (ri *replicaInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(3, &ri.InodeNotDirectory) stateSourceObject.Load(4, &ri.InodeNotSymlink) stateSourceObject.Load(5, &ri.locks) - stateSourceObject.Load(6, &ri.dentry) - stateSourceObject.Load(7, &ri.root) - stateSourceObject.Load(8, &ri.t) + stateSourceObject.Load(6, &ri.root) + stateSourceObject.Load(7, &ri.t) } func (rfd *replicaFileDescription) StateTypeName() string { diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go index 69c2fe951..fda30fb93 100644 --- a/pkg/sentry/fsimpl/devpts/master.go +++ b/pkg/sentry/fsimpl/devpts/master.go @@ -42,9 +42,6 @@ type masterInode struct { locks vfs.FileLocks - // Keep a reference to this inode's dentry. - dentry kernfs.Dentry - // root is the devpts root inode. root *rootInode } @@ -103,7 +100,7 @@ var _ vfs.FileDescriptionImpl = (*masterFileDescription)(nil) // Release implements vfs.FileDescriptionImpl.Release. func (mfd *masterFileDescription) Release(ctx context.Context) { - mfd.inode.root.masterClose(mfd.t) + mfd.inode.root.masterClose(ctx, mfd.t) } // EventRegister implements waiter.Waitable.EventRegister. diff --git a/pkg/sentry/fsimpl/devpts/replica.go b/pkg/sentry/fsimpl/devpts/replica.go index 6515c5536..70c68cf0a 100644 --- a/pkg/sentry/fsimpl/devpts/replica.go +++ b/pkg/sentry/fsimpl/devpts/replica.go @@ -41,9 +41,6 @@ type replicaInode struct { locks vfs.FileLocks - // Keep a reference to this inode's dentry. - dentry kernfs.Dentry - // root is the devpts root inode. root *rootInode diff --git a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go index 3c189317f..4c8bc4410 100644 --- a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go +++ b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go @@ -271,12 +271,11 @@ func (i *inode) StateTypeName() string { func (i *inode) StateFields() []string { return []string{ "inodeRefs", + "InodeAlwaysValid", "InodeAttrs", "InodeDirectoryNoNewChildren", - "InodeNoDynamicLookup", "InodeNotSymlink", "OrderedChildren", - "dentry", "fs", "metadataMu", "nodeID", @@ -294,42 +293,40 @@ func (i *inode) beforeSave() {} func (i *inode) StateSave(stateSinkObject state.Sink) { i.beforeSave() stateSinkObject.Save(0, &i.inodeRefs) - stateSinkObject.Save(1, &i.InodeAttrs) - stateSinkObject.Save(2, &i.InodeDirectoryNoNewChildren) - stateSinkObject.Save(3, &i.InodeNoDynamicLookup) + stateSinkObject.Save(1, &i.InodeAlwaysValid) + stateSinkObject.Save(2, &i.InodeAttrs) + stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren) stateSinkObject.Save(4, &i.InodeNotSymlink) stateSinkObject.Save(5, &i.OrderedChildren) - stateSinkObject.Save(6, &i.dentry) - stateSinkObject.Save(7, &i.fs) - stateSinkObject.Save(8, &i.metadataMu) - stateSinkObject.Save(9, &i.nodeID) - stateSinkObject.Save(10, &i.locks) - stateSinkObject.Save(11, &i.size) - stateSinkObject.Save(12, &i.attributeVersion) - stateSinkObject.Save(13, &i.attributeTime) - stateSinkObject.Save(14, &i.version) - stateSinkObject.Save(15, &i.link) + stateSinkObject.Save(6, &i.fs) + stateSinkObject.Save(7, &i.metadataMu) + stateSinkObject.Save(8, &i.nodeID) + stateSinkObject.Save(9, &i.locks) + stateSinkObject.Save(10, &i.size) + stateSinkObject.Save(11, &i.attributeVersion) + stateSinkObject.Save(12, &i.attributeTime) + stateSinkObject.Save(13, &i.version) + stateSinkObject.Save(14, &i.link) } func (i *inode) afterLoad() {} func (i *inode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.inodeRefs) - stateSourceObject.Load(1, &i.InodeAttrs) - stateSourceObject.Load(2, &i.InodeDirectoryNoNewChildren) - stateSourceObject.Load(3, &i.InodeNoDynamicLookup) + stateSourceObject.Load(1, &i.InodeAlwaysValid) + stateSourceObject.Load(2, &i.InodeAttrs) + stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren) stateSourceObject.Load(4, &i.InodeNotSymlink) stateSourceObject.Load(5, &i.OrderedChildren) - stateSourceObject.Load(6, &i.dentry) - stateSourceObject.Load(7, &i.fs) - stateSourceObject.Load(8, &i.metadataMu) - stateSourceObject.Load(9, &i.nodeID) - stateSourceObject.Load(10, &i.locks) - stateSourceObject.Load(11, &i.size) - stateSourceObject.Load(12, &i.attributeVersion) - stateSourceObject.Load(13, &i.attributeTime) - stateSourceObject.Load(14, &i.version) - stateSourceObject.Load(15, &i.link) + stateSourceObject.Load(6, &i.fs) + stateSourceObject.Load(7, &i.metadataMu) + stateSourceObject.Load(8, &i.nodeID) + stateSourceObject.Load(9, &i.locks) + stateSourceObject.Load(10, &i.size) + stateSourceObject.Load(11, &i.attributeVersion) + stateSourceObject.Load(12, &i.attributeTime) + stateSourceObject.Load(13, &i.version) + stateSourceObject.Load(14, &i.link) } func (r *inodeRefs) StateTypeName() string { diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go index 65786e42a..1a388f54f 100644 --- a/pkg/sentry/fsimpl/fuse/fusefs.go +++ b/pkg/sentry/fsimpl/fuse/fusefs.go @@ -249,14 +249,12 @@ func (fs *filesystem) Release(ctx context.Context) { // +stateify savable type inode struct { inodeRefs + kernfs.InodeAlwaysValid kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren - kernfs.InodeNoDynamicLookup kernfs.InodeNotSymlink kernfs.OrderedChildren - dentry kernfs.Dentry - // the owning filesystem. fs is immutable. fs *filesystem @@ -284,26 +282,24 @@ type inode struct { } func (fs *filesystem) newRootInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry { - i := &inode{fs: fs} + i := &inode{fs: fs, nodeID: 1} i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, 1, linux.ModeDirectory|0755) i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) i.EnableLeakCheck() - i.dentry.Init(i) - i.nodeID = 1 - return &i.dentry + var d kernfs.Dentry + d.Init(&fs.Filesystem, i) + return &d } -func (fs *filesystem) newInode(nodeID uint64, attr linux.FUSEAttr) *kernfs.Dentry { +func (fs *filesystem) newInode(nodeID uint64, attr linux.FUSEAttr) kernfs.Inode { i := &inode{fs: fs, nodeID: nodeID} creds := auth.Credentials{EffectiveKGID: auth.KGID(attr.UID), EffectiveKUID: auth.KUID(attr.UID)} i.InodeAttrs.Init(&creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.FileMode(attr.Mode)) atomic.StoreUint64(&i.size, attr.Size) i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) i.EnableLeakCheck() - i.dentry.Init(i) - - return &i.dentry + return i } // Open implements kernfs.Inode.Open. @@ -410,23 +406,27 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr } // Lookup implements kernfs.Inode.Lookup. -func (i *inode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) { +func (i *inode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { in := linux.FUSELookupIn{Name: name} return i.newEntry(ctx, name, 0, linux.FUSE_LOOKUP, &in) } +// Keep implements kernfs.Inode.Keep. +func (i *inode) Keep() bool { + // Return true so that kernfs keeps the new dentry pointing to this + // inode in the dentry tree. This is needed because inodes created via + // Lookup are not temporary. They might refer to existing files on server + // that can be Unlink'd/Rmdir'd. + return true +} + // IterDirents implements kernfs.Inode.IterDirents. func (*inode) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { return offset, nil } -// Valid implements kernfs.Inode.Valid. -func (*inode) Valid(ctx context.Context) bool { - return true -} - // NewFile implements kernfs.Inode.NewFile. -func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*kernfs.Dentry, error) { +func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (kernfs.Inode, error) { kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.nodeID) @@ -444,7 +444,7 @@ func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) } // NewNode implements kernfs.Inode.NewNode. -func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*kernfs.Dentry, error) { +func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (kernfs.Inode, error) { in := linux.FUSEMknodIn{ MknodMeta: linux.FUSEMknodMeta{ Mode: uint32(opts.Mode), @@ -457,7 +457,7 @@ func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) } // NewSymlink implements kernfs.Inode.NewSymlink. -func (i *inode) NewSymlink(ctx context.Context, name, target string) (*kernfs.Dentry, error) { +func (i *inode) NewSymlink(ctx context.Context, name, target string) (kernfs.Inode, error) { in := linux.FUSESymLinkIn{ Name: name, Target: target, @@ -466,7 +466,7 @@ func (i *inode) NewSymlink(ctx context.Context, name, target string) (*kernfs.De } // Unlink implements kernfs.Inode.Unlink. -func (i *inode) Unlink(ctx context.Context, name string, child *kernfs.Dentry) error { +func (i *inode) Unlink(ctx context.Context, name string, child kernfs.Inode) error { kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID) @@ -482,14 +482,11 @@ func (i *inode) Unlink(ctx context.Context, name string, child *kernfs.Dentry) e return err } // only return error, discard res. - if err := res.Error(); err != nil { - return err - } - return i.dentry.RemoveChildLocked(name, child) + return res.Error() } // NewDir implements kernfs.Inode.NewDir. -func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*kernfs.Dentry, error) { +func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) { in := linux.FUSEMkdirIn{ MkdirMeta: linux.FUSEMkdirMeta{ Mode: uint32(opts.Mode), @@ -501,7 +498,7 @@ func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) } // RmDir implements kernfs.Inode.RmDir. -func (i *inode) RmDir(ctx context.Context, name string, child *kernfs.Dentry) error { +func (i *inode) RmDir(ctx context.Context, name string, child kernfs.Inode) error { fusefs := i.fs task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx) @@ -515,16 +512,12 @@ func (i *inode) RmDir(ctx context.Context, name string, child *kernfs.Dentry) er if err != nil { return err } - if err := res.Error(); err != nil { - return err - } - - return i.dentry.RemoveChildLocked(name, child) + return res.Error() } // newEntry calls FUSE server for entry creation and allocates corresponding entry according to response. // Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and FUSE_LOOKUP. -func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*kernfs.Dentry, error) { +func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (kernfs.Inode, error) { kernelTask := kernel.TaskFromContext(ctx) if kernelTask == nil { log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID) @@ -734,8 +727,8 @@ func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptio } // DecRef implements kernfs.Inode.DecRef. -func (i *inode) DecRef(context.Context) { - i.inodeRefs.DecRef(i.Destroy) +func (i *inode) DecRef(ctx context.Context) { + i.inodeRefs.DecRef(func() { i.Destroy(ctx) }) } // StatFS implements kernfs.Inode.StatFS. diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go index ffe4ddb32..da1e3bf4b 100644 --- a/pkg/sentry/fsimpl/host/host.go +++ b/pkg/sentry/fsimpl/host/host.go @@ -118,7 +118,7 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions) if err != nil { return nil, err } - d.Init(i) + d.Init(&fs.Filesystem, i) // i.open will take a reference on d. defer d.DecRef(ctx) @@ -195,6 +195,7 @@ type inode struct { kernfs.InodeNoStatFS kernfs.InodeNotDirectory kernfs.InodeNotSymlink + kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid. locks vfs.FileLocks diff --git a/pkg/sentry/fsimpl/host/host_state_autogen.go b/pkg/sentry/fsimpl/host/host_state_autogen.go index c18cef145..5aaee37c3 100644 --- a/pkg/sentry/fsimpl/host/host_state_autogen.go +++ b/pkg/sentry/fsimpl/host/host_state_autogen.go @@ -83,6 +83,7 @@ func (i *inode) StateFields() []string { "InodeNoStatFS", "InodeNotDirectory", "InodeNotSymlink", + "InodeTemporary", "locks", "inodeRefs", "hostFD", @@ -104,17 +105,18 @@ func (i *inode) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(0, &i.InodeNoStatFS) stateSinkObject.Save(1, &i.InodeNotDirectory) stateSinkObject.Save(2, &i.InodeNotSymlink) - stateSinkObject.Save(3, &i.locks) - stateSinkObject.Save(4, &i.inodeRefs) - stateSinkObject.Save(5, &i.hostFD) - stateSinkObject.Save(6, &i.ino) - stateSinkObject.Save(7, &i.isTTY) - stateSinkObject.Save(8, &i.seekable) - stateSinkObject.Save(9, &i.wouldBlock) - stateSinkObject.Save(10, &i.queue) - stateSinkObject.Save(11, &i.canMap) - stateSinkObject.Save(12, &i.mappings) - stateSinkObject.Save(13, &i.pf) + stateSinkObject.Save(3, &i.InodeTemporary) + stateSinkObject.Save(4, &i.locks) + stateSinkObject.Save(5, &i.inodeRefs) + stateSinkObject.Save(6, &i.hostFD) + stateSinkObject.Save(7, &i.ino) + stateSinkObject.Save(8, &i.isTTY) + stateSinkObject.Save(9, &i.seekable) + stateSinkObject.Save(10, &i.wouldBlock) + stateSinkObject.Save(11, &i.queue) + stateSinkObject.Save(12, &i.canMap) + stateSinkObject.Save(13, &i.mappings) + stateSinkObject.Save(14, &i.pf) } func (i *inode) afterLoad() {} @@ -123,17 +125,18 @@ func (i *inode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.InodeNoStatFS) stateSourceObject.Load(1, &i.InodeNotDirectory) stateSourceObject.Load(2, &i.InodeNotSymlink) - stateSourceObject.Load(3, &i.locks) - stateSourceObject.Load(4, &i.inodeRefs) - stateSourceObject.Load(5, &i.hostFD) - stateSourceObject.Load(6, &i.ino) - stateSourceObject.Load(7, &i.isTTY) - stateSourceObject.Load(8, &i.seekable) - stateSourceObject.Load(9, &i.wouldBlock) - stateSourceObject.Load(10, &i.queue) - stateSourceObject.Load(11, &i.canMap) - stateSourceObject.Load(12, &i.mappings) - stateSourceObject.Load(13, &i.pf) + stateSourceObject.Load(3, &i.InodeTemporary) + stateSourceObject.Load(4, &i.locks) + stateSourceObject.Load(5, &i.inodeRefs) + stateSourceObject.Load(6, &i.hostFD) + stateSourceObject.Load(7, &i.ino) + stateSourceObject.Load(8, &i.isTTY) + stateSourceObject.Load(9, &i.seekable) + stateSourceObject.Load(10, &i.wouldBlock) + stateSourceObject.Load(11, &i.queue) + stateSourceObject.Load(12, &i.canMap) + stateSourceObject.Load(13, &i.mappings) + stateSourceObject.Load(14, &i.pf) } func (f *fileDescription) StateTypeName() string { diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go index 0a4cd4057..abf1905d6 100644 --- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go @@ -201,12 +201,12 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent // these. childIdx := fd.off - 2 for it := fd.children.nthLocked(childIdx); it != nil; it = it.Next() { - stat, err := it.Dentry.inode.Stat(ctx, fd.filesystem(), opts) + stat, err := it.inode.Stat(ctx, fd.filesystem(), opts) if err != nil { return err } dirent := vfs.Dirent{ - Name: it.Name, + Name: it.name, Type: linux.FileMode(stat.Mode).DirentType(), Ino: stat.Ino, NextOff: fd.off + 1, diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go index 5cc1c4281..6426a55f6 100644 --- a/pkg/sentry/fsimpl/kernfs/filesystem.go +++ b/pkg/sentry/fsimpl/kernfs/filesystem.go @@ -89,7 +89,7 @@ afterSymlink: } if targetVD.Ok() { err := rp.HandleJump(targetVD) - targetVD.DecRef(ctx) + fs.deferDecRefVD(ctx, targetVD) if err != nil { return nil, err } @@ -120,22 +120,33 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir // Cached dentry exists, revalidate. if !child.inode.Valid(ctx) { delete(parent.children, name) - vfsObj.InvalidateDentry(ctx, &child.vfsd) - fs.deferDecRef(child) // Reference from Lookup. + if child.inode.Keep() { + // Drop the ref owned by kernfs. + fs.deferDecRef(child) + } + vfsObj.InvalidateDentry(ctx, child.VFSDentry()) child = nil } } if child == nil { // Dentry isn't cached; it either doesn't exist or failed revalidation. // Attempt to resolve it via Lookup. - c, err := parent.inode.Lookup(ctx, name) + childInode, err := parent.inode.Lookup(ctx, name) if err != nil { return nil, err } - // Reference on c (provided by Lookup) will be dropped when the dentry - // fails validation. - parent.InsertChildLocked(name, c) - child = c + var newChild Dentry + newChild.Init(fs, childInode) // childInode's ref is transferred to newChild. + parent.insertChildLocked(name, &newChild) + child = &newChild + + // Drop the ref on newChild. This will cause the dentry to get pruned + // from the dentry tree by the end of current filesystem operation + // (before returning to the VFS layer) if another ref is not picked on + // this dentry. + if !childInode.Keep() { + fs.deferDecRef(&newChild) + } } return child, nil } @@ -191,7 +202,7 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving } // checkCreateLocked checks that a file named rp.Component() may be created in -// directory parentVFSD, then returns rp.Component(). +// directory parent, then returns rp.Component(). // // Preconditions: // * Filesystem.mu must be locked for at least reading. @@ -298,9 +309,9 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EEXIST } fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -324,11 +335,13 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs. return syserror.EPERM } - child, err := parent.inode.NewLink(ctx, pc, d.inode) + childI, err := parent.inode.NewLink(ctx, pc, d.inode) if err != nil { return err } - parent.InsertChildLocked(pc, child) + var child Dentry + child.Init(fs, childI) + parent.insertChildLocked(pc, &child) return nil } @@ -338,9 +351,9 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return syserror.EEXIST } fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -355,14 +368,16 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return err } defer rp.Mount().EndWrite() - child, err := parent.inode.NewDir(ctx, pc, opts) + childI, err := parent.inode.NewDir(ctx, pc, opts) if err != nil { if !opts.ForSyntheticMountpoint || err == syserror.EEXIST { return err } - child = newSyntheticDirectory(rp.Credentials(), opts.Mode) + childI = newSyntheticDirectory(rp.Credentials(), opts.Mode) } - parent.InsertChildLocked(pc, child) + var child Dentry + child.Init(fs, childI) + parent.insertChildLocked(pc, &child) return nil } @@ -372,9 +387,9 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return syserror.EEXIST } fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -389,11 +404,13 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v return err } defer rp.Mount().EndWrite() - newD, err := parent.inode.NewNode(ctx, pc, opts) + newI, err := parent.inode.NewNode(ctx, pc, opts) if err != nil { return err } - parent.InsertChildLocked(pc, newD) + var newD Dentry + newD.Init(fs, newI) + parent.insertChildLocked(pc, &newD) return nil } @@ -409,22 +426,23 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // Do not create new file. if opts.Flags&linux.O_CREAT == 0 { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) d, err := fs.walkExistingLocked(ctx, rp) if err != nil { fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) return nil, err } if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) return nil, err } - d.inode.IncRef() - defer d.inode.DecRef(ctx) + // Open may block so we need to unlock fs.mu. IncRef d to prevent + // its destruction while fs.mu is unlocked. + d.IncRef() fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) - return d.inode.Open(ctx, rp, d, opts) + fd, err := d.inode.Open(ctx, rp, d, opts) + d.DecRef(ctx) + return fd, err } // May create new file. @@ -438,6 +456,10 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf unlocked = true } } + // Process all to-be-decref'd dentries at the end at once. + // Since we defer unlock() AFTER this, fs.mu is guaranteed to be unlocked + // when this is executed. + defer fs.processDeferredDecRefs(ctx) defer unlock() if rp.Done() { if rp.MustBeDir() { @@ -449,14 +471,16 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { return nil, err } - d.inode.IncRef() - defer d.inode.DecRef(ctx) + // Open may block so we need to unlock fs.mu. IncRef d to prevent + // its destruction while fs.mu is unlocked. + d.IncRef() unlock() - return d.inode.Open(ctx, rp, d, opts) + fd, err := d.inode.Open(ctx, rp, d, opts) + d.DecRef(ctx) + return fd, err } afterTrailingSymlink: parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return nil, err } @@ -487,18 +511,23 @@ afterTrailingSymlink: } defer rp.Mount().EndWrite() // Create and open the child. - child, err := parent.inode.NewFile(ctx, pc, opts) + childI, err := parent.inode.NewFile(ctx, pc, opts) if err != nil { return nil, err } + var child Dentry + child.Init(fs, childI) // FIXME(gvisor.dev/issue/1193): Race between checking existence with - // fs.stepExistingLocked and parent.InsertChild. If possible, we should hold + // fs.stepExistingLocked and parent.insertChild. If possible, we should hold // dirMu from one to the other. - parent.InsertChild(pc, child) - child.inode.IncRef() - defer child.inode.DecRef(ctx) + parent.insertChild(pc, &child) + // Open may block so we need to unlock fs.mu. IncRef child to prevent + // its destruction while fs.mu is unlocked. + child.IncRef() unlock() - return child.inode.Open(ctx, rp, child, opts) + fd, err := child.inode.Open(ctx, rp, &child, opts) + child.DecRef(ctx) + return fd, err } if err != nil { return nil, err @@ -514,7 +543,7 @@ afterTrailingSymlink: } if targetVD.Ok() { err := rp.HandleJump(targetVD) - targetVD.DecRef(ctx) + fs.deferDecRefVD(ctx, targetVD) if err != nil { return nil, err } @@ -530,18 +559,21 @@ afterTrailingSymlink: if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil { return nil, err } - child.inode.IncRef() - defer child.inode.DecRef(ctx) + // Open may block so we need to unlock fs.mu. IncRef child to prevent + // its destruction while fs.mu is unlocked. + child.IncRef() unlock() - return child.inode.Open(ctx, rp, child, opts) + fd, err := child.inode.Open(ctx, rp, child, opts) + child.DecRef(ctx) + return fd, err } // ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt. func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return "", err } @@ -560,7 +592,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0 fs.mu.Lock() - defer fs.processDeferredDecRefsLocked(ctx) + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() // Resolve the destination directory first to verify that it's on this @@ -632,24 +664,27 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil { return err } - replaced, err := srcDir.inode.Rename(ctx, src.name, pc, src, dstDir) + err = srcDir.inode.Rename(ctx, src.name, pc, src.inode, dstDir.inode) if err != nil { virtfs.AbortRenameDentry(srcVFSD, dstVFSD) return err } delete(srcDir.children, src.name) if srcDir != dstDir { - fs.deferDecRef(srcDir) - dstDir.IncRef() + fs.deferDecRef(srcDir) // child (src) drops ref on old parent. + dstDir.IncRef() // child (src) takes a ref on the new parent. } src.parent = dstDir src.name = pc if dstDir.children == nil { dstDir.children = make(map[string]*Dentry) } + replaced := dstDir.children[pc] dstDir.children[pc] = src var replaceVFSD *vfs.Dentry if replaced != nil { + // deferDecRef so that fs.mu and dstDir.mu are unlocked by then. + fs.deferDecRef(replaced) replaceVFSD = replaced.VFSDentry() } virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD) @@ -659,10 +694,10 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa // RmdirAt implements vfs.FilesystemImpl.RmdirAt. func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -691,10 +726,13 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error return err } - if err := parentDentry.inode.RmDir(ctx, d.name, d); err != nil { + if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil { virtfs.AbortDeleteDentry(vfsd) return err } + delete(parentDentry.children, d.name) + // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then. + fs.deferDecRef(d) virtfs.CommitDeleteDentry(ctx, vfsd) return nil } @@ -702,9 +740,9 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error // SetStatAt implements vfs.FilesystemImpl.SetStatAt. func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return err } @@ -717,9 +755,9 @@ func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts // StatAt implements vfs.FilesystemImpl.StatAt. func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return linux.Statx{}, err } @@ -729,9 +767,9 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf // StatFSAt implements vfs.FilesystemImpl.StatFSAt. func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return linux.Statfs{}, err } @@ -744,9 +782,9 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ return syserror.EEXIST } fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() parent, err := fs.walkParentDirLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -761,21 +799,23 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ return err } defer rp.Mount().EndWrite() - child, err := parent.inode.NewSymlink(ctx, pc, target) + childI, err := parent.inode.NewSymlink(ctx, pc, target) if err != nil { return err } - parent.InsertChildLocked(pc, child) + var child Dentry + child.Init(fs, childI) + parent.insertChildLocked(pc, &child) return nil } // UnlinkAt implements vfs.FilesystemImpl.UnlinkAt. func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error { fs.mu.Lock() + defer fs.processDeferredDecRefs(ctx) defer fs.mu.Unlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.processDeferredDecRefsLocked(ctx) if err != nil { return err } @@ -799,10 +839,13 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil { return err } - if err := parentDentry.inode.Unlink(ctx, d.name, d); err != nil { + if err := parentDentry.inode.Unlink(ctx, d.name, d.inode); err != nil { virtfs.AbortDeleteDentry(vfsd) return err } + delete(parentDentry.children, d.name) + // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then. + fs.deferDecRef(d) virtfs.CommitDeleteDentry(ctx, vfsd) return nil } @@ -810,9 +853,9 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error // BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt. func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() d, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return nil, err } @@ -825,9 +868,9 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath // ListXattrAt implements vfs.FilesystemImpl.ListXattrAt. func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return nil, err } @@ -838,9 +881,9 @@ func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si // GetXattrAt implements vfs.FilesystemImpl.GetXattrAt. func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return "", err } @@ -851,9 +894,9 @@ func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // SetXattrAt implements vfs.FilesystemImpl.SetXattrAt. func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return err } @@ -864,9 +907,9 @@ func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt // RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt. func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error { fs.mu.RLock() + defer fs.processDeferredDecRefs(ctx) + defer fs.mu.RUnlock() _, err := fs.walkExistingLocked(ctx, rp) - fs.mu.RUnlock() - fs.processDeferredDecRefs(ctx) if err != nil { return err } @@ -880,3 +923,16 @@ func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe defer fs.mu.RUnlock() return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b) } + +func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) { + if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs { + // The following is equivalent to vd.DecRef(ctx). This is needed + // because if d belongs to this filesystem, we can not DecRef it right + // away as we may be holding fs.mu. d.DecRef may acquire fs.mu. So we + // defer the DecRef to when locks are dropped. + vd.Mount().DecRef(ctx) + fs.deferDecRef(d) + } else { + vd.DecRef(ctx) + } +} diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go index 49210e748..122b10591 100644 --- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go +++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go @@ -34,6 +34,7 @@ import ( // // +stateify savable type InodeNoopRefCount struct { + InodeTemporary } // IncRef implements Inode.IncRef. @@ -57,27 +58,27 @@ func (InodeNoopRefCount) TryIncRef() bool { type InodeDirectoryNoNewChildren struct{} // NewFile implements Inode.NewFile. -func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) { return nil, syserror.EPERM } // NewDir implements Inode.NewDir. -func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) { return nil, syserror.EPERM } // NewLink implements Inode.NewLink. -func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (Inode, error) { return nil, syserror.EPERM } // NewSymlink implements Inode.NewSymlink. -func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (Inode, error) { return nil, syserror.EPERM } // NewNode implements Inode.NewNode. -func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) { +func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) { return nil, syserror.EPERM } @@ -88,6 +89,7 @@ func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOpt // // +stateify savable type InodeNotDirectory struct { + InodeAlwaysValid } // HasChildren implements Inode.HasChildren. @@ -96,47 +98,47 @@ func (InodeNotDirectory) HasChildren() bool { } // NewFile implements Inode.NewFile. -func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) { +func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) { panic("NewFile called on non-directory inode") } // NewDir implements Inode.NewDir. -func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) { +func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) { panic("NewDir called on non-directory inode") } // NewLink implements Inode.NewLinkink. -func (InodeNotDirectory) NewLink(context.Context, string, Inode) (*Dentry, error) { +func (InodeNotDirectory) NewLink(context.Context, string, Inode) (Inode, error) { panic("NewLink called on non-directory inode") } // NewSymlink implements Inode.NewSymlink. -func (InodeNotDirectory) NewSymlink(context.Context, string, string) (*Dentry, error) { +func (InodeNotDirectory) NewSymlink(context.Context, string, string) (Inode, error) { panic("NewSymlink called on non-directory inode") } // NewNode implements Inode.NewNode. -func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) { +func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) { panic("NewNode called on non-directory inode") } // Unlink implements Inode.Unlink. -func (InodeNotDirectory) Unlink(context.Context, string, *Dentry) error { +func (InodeNotDirectory) Unlink(context.Context, string, Inode) error { panic("Unlink called on non-directory inode") } // RmDir implements Inode.RmDir. -func (InodeNotDirectory) RmDir(context.Context, string, *Dentry) error { +func (InodeNotDirectory) RmDir(context.Context, string, Inode) error { panic("RmDir called on non-directory inode") } // Rename implements Inode.Rename. -func (InodeNotDirectory) Rename(context.Context, string, string, *Dentry, *Dentry) (*Dentry, error) { +func (InodeNotDirectory) Rename(context.Context, string, string, Inode, Inode) error { panic("Rename called on non-directory inode") } // Lookup implements Inode.Lookup. -func (InodeNotDirectory) Lookup(ctx context.Context, name string) (*Dentry, error) { +func (InodeNotDirectory) Lookup(ctx context.Context, name string) (Inode, error) { panic("Lookup called on non-directory inode") } @@ -145,35 +147,6 @@ func (InodeNotDirectory) IterDirents(ctx context.Context, callback vfs.IterDiren panic("IterDirents called on non-directory inode") } -// Valid implements Inode.Valid. -func (InodeNotDirectory) Valid(context.Context) bool { - return true -} - -// InodeNoDynamicLookup partially implements the Inode interface, specifically -// the inodeDynamicLookup sub interface. Directory inodes that do not support -// dymanic entries (i.e. entries that are not "hashed" into the -// vfs.Dentry.children) can embed this to provide no-op implementations for -// functions related to dynamic entries. -// -// +stateify savable -type InodeNoDynamicLookup struct{} - -// Lookup implements Inode.Lookup. -func (InodeNoDynamicLookup) Lookup(ctx context.Context, name string) (*Dentry, error) { - return nil, syserror.ENOENT -} - -// IterDirents implements Inode.IterDirents. -func (InodeNoDynamicLookup) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { - return offset, nil -} - -// Valid implements Inode.Valid. -func (InodeNoDynamicLookup) Valid(ctx context.Context) bool { - return true -} - // InodeNotSymlink partially implements the Inode interface, specifically the // inodeSymlink sub interface. All inodes that are not symlinks may embed this // to return the appropriate errors from symlink-related functions. @@ -273,7 +246,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut // SetInodeStat sets the corresponding attributes from opts to InodeAttrs. // This function can be used by other kernfs-based filesystem implementation to -// sets the unexported attributes into kernfs.InodeAttrs. +// sets the unexported attributes into InodeAttrs. func (a *InodeAttrs) SetInodeStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error { if opts.Stat.Mask == 0 { return nil @@ -344,8 +317,9 @@ func (a *InodeAttrs) DecLinks() { // +stateify savable type slot struct { - Name string - Dentry *Dentry + name string + inode Inode + static bool slotEntry } @@ -361,10 +335,18 @@ type OrderedChildrenOptions struct { } // OrderedChildren partially implements the Inode interface. OrderedChildren can -// be embedded in directory inodes to keep track of the children in the +// be embedded in directory inodes to keep track of children in the // directory, and can then be used to implement a generic directory FD -- see -// GenericDirectoryFD. OrderedChildren is not compatible with dynamic -// directories. +// GenericDirectoryFD. +// +// OrderedChildren can represent a node in an Inode tree. The children inodes +// might be directories themselves using OrderedChildren; hence extending the +// tree. The parent inode (OrderedChildren user) holds a ref on all its static +// children. This lets the static inodes outlive their associated dentry. +// While the dentry might have to be regenerated via a Lookup() call, we can +// keep reusing the same static inode. These static children inodes are finally +// DecRef'd when this directory inode is being destroyed. This makes +// OrderedChildren suitable for static directory entries as well. // // Must be initialize with Init before first use. // @@ -388,33 +370,63 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) { // Destroy clears the children stored in o. It should be called by structs // embedding OrderedChildren upon destruction, i.e. when their reference count // reaches zero. -func (o *OrderedChildren) Destroy() { +func (o *OrderedChildren) Destroy(ctx context.Context) { o.mu.Lock() defer o.mu.Unlock() + // Drop the ref that o owns on the static inodes it holds. + for _, s := range o.set { + if s.static { + s.inode.DecRef(ctx) + } + } o.order.Reset() o.set = nil } -// Populate inserts children into this OrderedChildren, and d's dentry -// cache. Populate returns the number of directories inserted, which the caller +// Populate inserts static children into this OrderedChildren. +// Populate returns the number of directories inserted, which the caller // may use to update the link count for the parent directory. // -// Precondition: d must represent a directory inode. children must not contain -// any conflicting entries already in o. -func (o *OrderedChildren) Populate(d *Dentry, children map[string]*Dentry) uint32 { +// Precondition: +// * d must represent a directory inode. +// * children must not contain any conflicting entries already in o. +// * Caller must hold a reference on all inodes passed. +// +// Postcondition: Caller's references on inodes are transferred to o. +func (o *OrderedChildren) Populate(children map[string]Inode) uint32 { var links uint32 for name, child := range children { - if child.isDir() { + if child.Mode().IsDir() { links++ } - if err := o.Insert(name, child); err != nil { - panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v) into %+v", name, child, d)) + if err := o.insert(name, child, true); err != nil { + panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v)", name, child)) } - d.InsertChild(name, child) } return links } +// Lookup implements Inode.Lookup. +func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error) { + o.mu.RLock() + defer o.mu.RUnlock() + + s, ok := o.set[name] + if !ok { + return nil, syserror.ENOENT + } + + s.inode.IncRef() // This ref is passed to the dentry upon creation via Init. + return s.inode, nil +} + +// IterDirents implements Inode.IterDirents. +func (o *OrderedChildren) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) { + // All entries from OrderedChildren have already been handled in + // GenericDirectoryFD.IterDirents. + return offset, nil +} + // HasChildren implements Inode.HasChildren. func (o *OrderedChildren) HasChildren() bool { o.mu.RLock() @@ -422,17 +434,27 @@ func (o *OrderedChildren) HasChildren() bool { return len(o.set) > 0 } -// Insert inserts child into o. This ignores the writability of o, as this is -// not part of the vfs.FilesystemImpl interface, and is a lower-level operation. -func (o *OrderedChildren) Insert(name string, child *Dentry) error { +// Insert inserts a dynamic child into o. This ignores the writability of o, as +// this is not part of the vfs.FilesystemImpl interface, and is a lower-level operation. +func (o *OrderedChildren) Insert(name string, child Inode) error { + return o.insert(name, child, false) +} + +// insert inserts child into o. +// +// Precondition: Caller must be holding a ref on child if static is true. +// +// Postcondition: Caller's ref on child is transferred to o if static is true. +func (o *OrderedChildren) insert(name string, child Inode, static bool) error { o.mu.Lock() defer o.mu.Unlock() if _, ok := o.set[name]; ok { return syserror.EEXIST } s := &slot{ - Name: name, - Dentry: child, + name: name, + inode: child, + static: static, } o.order.PushBack(s) o.set[name] = s @@ -442,44 +464,49 @@ func (o *OrderedChildren) Insert(name string, child *Dentry) error { // Precondition: caller must hold o.mu for writing. func (o *OrderedChildren) removeLocked(name string) { if s, ok := o.set[name]; ok { + if s.static { + panic(fmt.Sprintf("removeLocked called on a static inode: %v", s.inode)) + } delete(o.set, name) o.order.Remove(s) } } // Precondition: caller must hold o.mu for writing. -func (o *OrderedChildren) replaceChildLocked(name string, new *Dentry) *Dentry { +func (o *OrderedChildren) replaceChildLocked(ctx context.Context, name string, newI Inode) { if s, ok := o.set[name]; ok { + if s.static { + panic(fmt.Sprintf("replacing a static inode: %v", s.inode)) + } + // Existing slot with given name, simply replace the dentry. - var old *Dentry - old, s.Dentry = s.Dentry, new - return old + s.inode = newI } // No existing slot with given name, create and hash new slot. s := &slot{ - Name: name, - Dentry: new, + name: name, + inode: newI, + static: false, } o.order.PushBack(s) o.set[name] = s - return nil } // Precondition: caller must hold o.mu for reading or writing. -func (o *OrderedChildren) checkExistingLocked(name string, child *Dentry) error { +func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error { s, ok := o.set[name] if !ok { return syserror.ENOENT } - if s.Dentry != child { - panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! OrderedChild: %+v, vfs: %+v", s.Dentry, child)) + if s.inode != child { + panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! OrderedChild: %+v, kernfs: %+v", s.inode, child)) } return nil } // Unlink implements Inode.Unlink. -func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry) error { +func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode) error { if !o.writable { return syserror.EPERM } @@ -494,8 +521,8 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry return nil } -// Rmdir implements Inode.Rmdir. -func (o *OrderedChildren) RmDir(ctx context.Context, name string, child *Dentry) error { +// RmDir implements Inode.RmDir. +func (o *OrderedChildren) RmDir(ctx context.Context, name string, child Inode) error { // We're not responsible for checking that child is a directory, that it's // empty, or updating any link counts; so this is the same as unlink. return o.Unlink(ctx, name, child) @@ -517,13 +544,13 @@ func (renameAcrossDifferentImplementationsError) Error() string { // that will support Rename. // // Postcondition: reference on any replaced dentry transferred to caller. -func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (*Dentry, error) { - dst, ok := dstDir.inode.(interface{}).(*OrderedChildren) +func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error { + dst, ok := dstDir.(interface{}).(*OrderedChildren) if !ok { - return nil, renameAcrossDifferentImplementationsError{} + return renameAcrossDifferentImplementationsError{} } if !o.writable || !dst.writable { - return nil, syserror.EPERM + return syserror.EPERM } // Note: There's a potential deadlock below if concurrent calls to Rename // refer to the same src and dst directories in reverse. We avoid any @@ -536,12 +563,12 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c defer dst.mu.Unlock() } if err := o.checkExistingLocked(oldname, child); err != nil { - return nil, err + return err } // TODO(gvisor.dev/issue/3027): Check sticky bit before removing. - replaced := dst.replaceChildLocked(newname, child) - return replaced, nil + dst.replaceChildLocked(ctx, newname, child) + return nil } // nthLocked returns an iterator to the nth child tracked by this object. The @@ -576,11 +603,12 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, // // +stateify savable type StaticDirectory struct { + InodeAlwaysValid InodeAttrs InodeDirectoryNoNewChildren - InodeNoDynamicLookup InodeNoStatFS InodeNotSymlink + InodeTemporary OrderedChildren StaticDirectoryRefs @@ -591,19 +619,16 @@ type StaticDirectory struct { var _ Inode = (*StaticDirectory)(nil) // NewStaticDir creates a new static directory and returns its dentry. -func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*Dentry, fdOpts GenericDirectoryFDOptions) *Dentry { +func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]Inode, fdOpts GenericDirectoryFDOptions) Inode { inode := &StaticDirectory{} inode.Init(creds, devMajor, devMinor, ino, perm, fdOpts) inode.EnableLeakCheck() - dentry := &Dentry{} - dentry.Init(inode) - inode.OrderedChildren.Init(OrderedChildrenOptions{}) - links := inode.OrderedChildren.Populate(dentry, children) + links := inode.OrderedChildren.Populate(children) inode.IncLinks(links) - return dentry + return inode } // Init initializes StaticDirectory. @@ -615,7 +640,7 @@ func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint3 s.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeDirectory|perm) } -// Open implements kernfs.Inode.Open. +// Open implements Inode.Open. func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) { fd, err := NewGenericDirectoryFD(rp.Mount(), d, &s.OrderedChildren, &s.locks, &opts, s.fdOpts) if err != nil { @@ -624,26 +649,36 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *De return fd.VFSFileDescription(), nil } -// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. +// SetStat implements Inode.SetStat not allowing inode attributes to be changed. func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error { return syserror.EPERM } -// DecRef implements kernfs.Inode.DecRef. -func (s *StaticDirectory) DecRef(context.Context) { - s.StaticDirectoryRefs.DecRef(s.Destroy) +// DecRef implements Inode.DecRef. +func (s *StaticDirectory) DecRef(ctx context.Context) { + s.StaticDirectoryRefs.DecRef(func() { s.Destroy(ctx) }) } -// AlwaysValid partially implements kernfs.inodeDynamicLookup. +// InodeAlwaysValid partially implements Inode. // // +stateify savable -type AlwaysValid struct{} +type InodeAlwaysValid struct{} -// Valid implements kernfs.inodeDynamicLookup.Valid. -func (*AlwaysValid) Valid(context.Context) bool { +// Valid implements Inode.Valid. +func (*InodeAlwaysValid) Valid(context.Context) bool { return true } +// InodeTemporary partially implements Inode. +// +// +stateify savable +type InodeTemporary struct{} + +// Keep implements Inode.Keep. +func (*InodeTemporary) Keep() bool { + return false +} + // InodeNoStatFS partially implements the Inode interface, where the client // filesystem doesn't support statfs(2). // diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go index 6d3d79333..606081e68 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs.go @@ -29,12 +29,16 @@ // // Reference Model: // -// Kernfs dentries represents named pointers to inodes. Dentries and inodes have +// Kernfs dentries represents named pointers to inodes. Kernfs is solely +// reponsible for maintaining and modifying its dentry tree; inode +// implementations can not access the tree. Dentries and inodes have // independent lifetimes and reference counts. A child dentry unconditionally // holds a reference on its parent directory's dentry. A dentry also holds a -// reference on the inode it points to. Multiple dentries can point to the same -// inode (for example, in the case of hardlinks). File descriptors hold a -// reference to the dentry they're opened on. +// reference on the inode it points to (although that might not be the only +// reference on the inode). Due to this inodes can outlive the dentries that +// point to them. Multiple dentries can point to the same inode (for example, +// in the case of hardlinks). File descriptors hold a reference to the dentry +// they're opened on. // // Dentries are guaranteed to exist while holding Filesystem.mu for // reading. Dropping dentries require holding Filesystem.mu for writing. To @@ -47,8 +51,8 @@ // kernfs.Dentry.dirMu // vfs.VirtualFilesystem.mountMu // vfs.Dentry.mu -// kernfs.Filesystem.droppedDentriesMu // (inode implementation locks, if any) +// kernfs.Filesystem.droppedDentriesMu package kernfs import ( @@ -60,7 +64,6 @@ import ( "gvisor.dev/gvisor/pkg/sentry/kernel/auth" "gvisor.dev/gvisor/pkg/sentry/vfs" "gvisor.dev/gvisor/pkg/sync" - "gvisor.dev/gvisor/pkg/syserror" ) // Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory @@ -95,7 +98,7 @@ type Filesystem struct { // example: // // fs.mu.RLock() - // fs.mu.processDeferredDecRefs() + // defer fs.processDeferredDecRefs() // defer fs.mu.RUnlock() // ... // fs.deferDecRef(dentry) @@ -108,8 +111,7 @@ type Filesystem struct { // deferDecRef defers dropping a dentry ref until the next call to // processDeferredDecRefs{,Locked}. See comment on Filesystem.mu. -// -// Precondition: d must not already be pending destruction. +// This may be called while Filesystem.mu or Dentry.dirMu is locked. func (fs *Filesystem) deferDecRef(d *Dentry) { fs.droppedDentriesMu.Lock() fs.droppedDentries = append(fs.droppedDentries, d) @@ -118,17 +120,14 @@ func (fs *Filesystem) deferDecRef(d *Dentry) { // processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the // droppedDentries list. See comment on Filesystem.mu. +// +// Precondition: Filesystem.mu or Dentry.dirMu must NOT be locked. func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) { - fs.mu.Lock() - fs.processDeferredDecRefsLocked(ctx) - fs.mu.Unlock() -} - -// Precondition: fs.mu must be held for writing. -func (fs *Filesystem) processDeferredDecRefsLocked(ctx context.Context) { fs.droppedDentriesMu.Lock() for _, d := range fs.droppedDentries { - d.DecRef(ctx) + // Defer the DecRef call so that we are not holding droppedDentriesMu + // when DecRef is called. + defer d.DecRef(ctx) } fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse. fs.droppedDentriesMu.Unlock() @@ -157,17 +156,19 @@ const ( // // A kernfs dentry is similar to a dentry in a traditional filesystem: it's a // named reference to an inode. A dentry generally lives as long as it's part of -// a mounted filesystem tree. Kernfs doesn't cache dentries once all references -// to them are removed. Dentries hold a single reference to the inode they point +// a mounted filesystem tree. Kernfs drops dentries once all references to them +// are dropped. Dentries hold a single reference to the inode they point // to, and child dentries hold a reference on their parent. // // Must be initialized by Init prior to first use. // // +stateify savable type Dentry struct { + vfsd vfs.Dentry DentryRefs - vfsd vfs.Dentry + // fs is the owning filesystem. fs is immutable. + fs *Filesystem // flags caches useful information about the dentry from the inode. See the // dflags* consts above. Must be accessed by atomic ops. @@ -192,8 +193,9 @@ type Dentry struct { // Precondition: Caller must hold a reference on inode. // // Postcondition: Caller's reference on inode is transferred to the dentry. -func (d *Dentry) Init(inode Inode) { +func (d *Dentry) Init(fs *Filesystem, inode Inode) { d.vfsd.Init(d) + d.fs = fs d.inode = inode ftype := inode.Mode().FileType() if ftype == linux.ModeDirectory { @@ -222,14 +224,28 @@ func (d *Dentry) isSymlink() bool { // DecRef implements vfs.DentryImpl.DecRef. func (d *Dentry) DecRef(ctx context.Context) { - // Before the destructor is called, Dentry must be removed from VFS' dentry cache. + decRefParent := false + d.fs.mu.Lock() d.DentryRefs.DecRef(func() { d.inode.DecRef(ctx) // IncRef from Init. d.inode = nil if d.parent != nil { - d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild. + // We will DecRef d.parent once all locks are dropped. + decRefParent = true + d.parent.dirMu.Lock() + // Remove d from parent.children. It might already have been + // removed due to invalidation. + if _, ok := d.parent.children[d.name]; ok { + delete(d.parent.children, d.name) + d.fs.VFSFilesystem().VirtualFilesystem().InvalidateDentry(ctx, d.VFSDentry()) + } + d.parent.dirMu.Unlock() } }) + d.fs.mu.Unlock() + if decRefParent { + d.parent.DecRef(ctx) // IncRef from Dentry.insertChild. + } } // InotifyWithParent implements vfs.DentryImpl.InotifyWithParent. @@ -247,26 +263,26 @@ func (d *Dentry) Watches() *vfs.Watches { // OnZeroWatches implements vfs.Dentry.OnZeroWatches. func (d *Dentry) OnZeroWatches(context.Context) {} -// InsertChild inserts child into the vfs dentry cache with the given name under +// insertChild inserts child into the vfs dentry cache with the given name under // this dentry. This does not update the directory inode, so calling this on its // own isn't sufficient to insert a child into a directory. // // Precondition: d must represent a directory inode. -func (d *Dentry) InsertChild(name string, child *Dentry) { +func (d *Dentry) insertChild(name string, child *Dentry) { d.dirMu.Lock() - d.InsertChildLocked(name, child) + d.insertChildLocked(name, child) d.dirMu.Unlock() } -// InsertChildLocked is equivalent to InsertChild, with additional +// insertChildLocked is equivalent to insertChild, with additional // preconditions. // // Preconditions: // * d must represent a directory inode. // * d.dirMu must be locked. -func (d *Dentry) InsertChildLocked(name string, child *Dentry) { +func (d *Dentry) insertChildLocked(name string, child *Dentry) { if !d.isDir() { - panic(fmt.Sprintf("InsertChildLocked called on non-directory Dentry: %+v.", d)) + panic(fmt.Sprintf("insertChildLocked called on non-directory Dentry: %+v.", d)) } d.IncRef() // DecRef in child's Dentry.destroy. child.parent = d @@ -277,36 +293,6 @@ func (d *Dentry) InsertChildLocked(name string, child *Dentry) { d.children[name] = child } -// RemoveChild removes child from the vfs dentry cache. This does not update the -// directory inode or modify the inode to be unlinked. So calling this on its own -// isn't sufficient to remove a child from a directory. -// -// Precondition: d must represent a directory inode. -func (d *Dentry) RemoveChild(name string, child *Dentry) error { - d.dirMu.Lock() - defer d.dirMu.Unlock() - return d.RemoveChildLocked(name, child) -} - -// RemoveChildLocked is equivalent to RemoveChild, with additional -// preconditions. -// -// Precondition: d.dirMu must be locked. -func (d *Dentry) RemoveChildLocked(name string, child *Dentry) error { - if !d.isDir() { - panic(fmt.Sprintf("RemoveChild called on non-directory Dentry: %+v.", d)) - } - c, ok := d.children[name] - if !ok { - return syserror.ENOENT - } - if c != child { - panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! Child: %+v, vfs: %+v", c, child)) - } - delete(d.children, name) - return nil -} - // Inode returns the dentry's inode. func (d *Dentry) Inode() Inode { return d.inode @@ -348,11 +334,6 @@ type Inode interface { // a blanket implementation for all non-directory inodes. inodeDirectory - // Method for inodes that represent dynamic directories and their - // children. InodeNoDynamicLookup provides a blanket implementation for all - // non-dynamic-directory inodes. - inodeDynamicLookup - // Open creates a file description for the filesystem object represented by // this inode. The returned file description should hold a reference on the // dentry for its lifetime. @@ -365,6 +346,14 @@ type Inode interface { // corresponds to vfs.FilesystemImpl.StatFSAt. If the client filesystem // doesn't support statfs(2), this should return ENOSYS. StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error) + + // Keep indicates whether the dentry created after Inode.Lookup should be + // kept in the kernfs dentry tree. + Keep() bool + + // Valid should return true if this inode is still valid, or needs to + // be resolved again by a call to Lookup. + Valid(ctx context.Context) bool } type inodeRefs interface { @@ -397,8 +386,8 @@ type inodeMetadata interface { // Precondition: All methods in this interface may only be called on directory // inodes. type inodeDirectory interface { - // The New{File,Dir,Node,Symlink} methods below should return a new inode - // hashed into this inode. + // The New{File,Dir,Node,Link,Symlink} methods below should return a new inode + // that will be hashed into the dentry tree. // // These inode constructors are inode-level operations rather than // filesystem-level operations to allow client filesystems to mix different @@ -409,60 +398,54 @@ type inodeDirectory interface { HasChildren() bool // NewFile creates a new regular file inode. - NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error) + NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) // NewDir creates a new directory inode. - NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error) + NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) // NewLink creates a new hardlink to a specified inode in this // directory. Implementations should create a new kernfs Dentry pointing to // target, and update target's link count. - NewLink(ctx context.Context, name string, target Inode) (*Dentry, error) + NewLink(ctx context.Context, name string, target Inode) (Inode, error) // NewSymlink creates a new symbolic link inode. - NewSymlink(ctx context.Context, name, target string) (*Dentry, error) + NewSymlink(ctx context.Context, name, target string) (Inode, error) // NewNode creates a new filesystem node for a mknod syscall. - NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error) + NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) // Unlink removes a child dentry from this directory inode. - Unlink(ctx context.Context, name string, child *Dentry) error + Unlink(ctx context.Context, name string, child Inode) error // RmDir removes an empty child directory from this directory // inode. Implementations must update the parent directory's link count, // if required. Implementations are not responsible for checking that child // is a directory, checking for an empty directory. - RmDir(ctx context.Context, name string, child *Dentry) error + RmDir(ctx context.Context, name string, child Inode) error // Rename is called on the source directory containing an inode being // renamed. child should point to the resolved child in the source - // directory. If Rename replaces a dentry in the destination directory, it - // should return the replaced dentry or nil otherwise. + // directory. // // Precondition: Caller must serialize concurrent calls to Rename. - Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (replaced *Dentry, err error) -} + Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error -type inodeDynamicLookup interface { - // Lookup should return an appropriate dentry if name should resolve to a - // child of this dynamic directory inode. This gives the directory an - // opportunity on every lookup to resolve additional entries that aren't - // hashed into the directory. This is only called when the inode is a - // directory. If the inode is not a directory, or if the directory only - // contains a static set of children, the implementer can unconditionally - // return an appropriate error (ENOTDIR and ENOENT respectively). + // Lookup should return an appropriate inode if name should resolve to a + // child of this directory inode. This gives the directory an opportunity + // on every lookup to resolve additional entries. This is only called when + // the inode is a directory. // - // The child returned by Lookup will be hashed into the VFS dentry tree. Its - // lifetime can be controlled by the filesystem implementation with an - // appropriate implementation of Valid. + // The child returned by Lookup will be hashed into the VFS dentry tree, + // atleast for the duration of the current FS operation. // - // Lookup returns the child with an extra reference and the caller owns this - // reference. - Lookup(ctx context.Context, name string) (*Dentry, error) - - // Valid should return true if this inode is still valid, or needs to - // be resolved again by a call to Lookup. - Valid(ctx context.Context) bool + // Lookup must return the child with an extra reference whose ownership is + // transferred to the dentry that is created to point to that inode. If + // Inode.Keep returns false, that new dentry will be dropped at the end of + // the current filesystem operation (before returning back to the VFS + // layer) if no other ref is picked on that dentry. If Inode.Keep returns + // true, then the dentry will be cached into the dentry tree until it is + // Unlink'd or RmDir'd. + Lookup(ctx context.Context, name string) (Inode, error) // IterDirents is used to iterate over dynamically created entries. It invokes // cb on each entry in the directory represented by the Inode. diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go index 7d9420725..f87782ee1 100644 --- a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go +++ b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go @@ -182,18 +182,22 @@ func (i *InodeNoopRefCount) StateTypeName() string { } func (i *InodeNoopRefCount) StateFields() []string { - return []string{} + return []string{ + "InodeTemporary", + } } func (i *InodeNoopRefCount) beforeSave() {} func (i *InodeNoopRefCount) StateSave(stateSinkObject state.Sink) { i.beforeSave() + stateSinkObject.Save(0, &i.InodeTemporary) } func (i *InodeNoopRefCount) afterLoad() {} func (i *InodeNoopRefCount) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeTemporary) } func (i *InodeDirectoryNoNewChildren) StateTypeName() string { @@ -220,37 +224,22 @@ func (i *InodeNotDirectory) StateTypeName() string { } func (i *InodeNotDirectory) StateFields() []string { - return []string{} + return []string{ + "InodeAlwaysValid", + } } func (i *InodeNotDirectory) beforeSave() {} func (i *InodeNotDirectory) StateSave(stateSinkObject state.Sink) { i.beforeSave() + stateSinkObject.Save(0, &i.InodeAlwaysValid) } func (i *InodeNotDirectory) afterLoad() {} func (i *InodeNotDirectory) StateLoad(stateSourceObject state.Source) { -} - -func (i *InodeNoDynamicLookup) StateTypeName() string { - return "pkg/sentry/fsimpl/kernfs.InodeNoDynamicLookup" -} - -func (i *InodeNoDynamicLookup) StateFields() []string { - return []string{} -} - -func (i *InodeNoDynamicLookup) beforeSave() {} - -func (i *InodeNoDynamicLookup) StateSave(stateSinkObject state.Sink) { - i.beforeSave() -} - -func (i *InodeNoDynamicLookup) afterLoad() {} - -func (i *InodeNoDynamicLookup) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &i.InodeAlwaysValid) } func (i *InodeNotSymlink) StateTypeName() string { @@ -319,8 +308,9 @@ func (s *slot) StateTypeName() string { func (s *slot) StateFields() []string { return []string{ - "Name", - "Dentry", + "name", + "inode", + "static", "slotEntry", } } @@ -329,17 +319,19 @@ func (s *slot) beforeSave() {} func (s *slot) StateSave(stateSinkObject state.Sink) { s.beforeSave() - stateSinkObject.Save(0, &s.Name) - stateSinkObject.Save(1, &s.Dentry) - stateSinkObject.Save(2, &s.slotEntry) + stateSinkObject.Save(0, &s.name) + stateSinkObject.Save(1, &s.inode) + stateSinkObject.Save(2, &s.static) + stateSinkObject.Save(3, &s.slotEntry) } func (s *slot) afterLoad() {} func (s *slot) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &s.Name) - stateSourceObject.Load(1, &s.Dentry) - stateSourceObject.Load(2, &s.slotEntry) + stateSourceObject.Load(0, &s.name) + stateSourceObject.Load(1, &s.inode) + stateSourceObject.Load(2, &s.static) + stateSourceObject.Load(3, &s.slotEntry) } func (o *OrderedChildrenOptions) StateTypeName() string { @@ -442,11 +434,12 @@ func (s *StaticDirectory) StateTypeName() string { func (s *StaticDirectory) StateFields() []string { return []string{ + "InodeAlwaysValid", "InodeAttrs", "InodeDirectoryNoNewChildren", - "InodeNoDynamicLookup", "InodeNoStatFS", "InodeNotSymlink", + "InodeTemporary", "OrderedChildren", "StaticDirectoryRefs", "locks", @@ -458,48 +451,69 @@ func (s *StaticDirectory) beforeSave() {} func (s *StaticDirectory) StateSave(stateSinkObject state.Sink) { s.beforeSave() - stateSinkObject.Save(0, &s.InodeAttrs) - stateSinkObject.Save(1, &s.InodeDirectoryNoNewChildren) - stateSinkObject.Save(2, &s.InodeNoDynamicLookup) + stateSinkObject.Save(0, &s.InodeAlwaysValid) + stateSinkObject.Save(1, &s.InodeAttrs) + stateSinkObject.Save(2, &s.InodeDirectoryNoNewChildren) stateSinkObject.Save(3, &s.InodeNoStatFS) stateSinkObject.Save(4, &s.InodeNotSymlink) - stateSinkObject.Save(5, &s.OrderedChildren) - stateSinkObject.Save(6, &s.StaticDirectoryRefs) - stateSinkObject.Save(7, &s.locks) - stateSinkObject.Save(8, &s.fdOpts) + stateSinkObject.Save(5, &s.InodeTemporary) + stateSinkObject.Save(6, &s.OrderedChildren) + stateSinkObject.Save(7, &s.StaticDirectoryRefs) + stateSinkObject.Save(8, &s.locks) + stateSinkObject.Save(9, &s.fdOpts) } func (s *StaticDirectory) afterLoad() {} func (s *StaticDirectory) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &s.InodeAttrs) - stateSourceObject.Load(1, &s.InodeDirectoryNoNewChildren) - stateSourceObject.Load(2, &s.InodeNoDynamicLookup) + stateSourceObject.Load(0, &s.InodeAlwaysValid) + stateSourceObject.Load(1, &s.InodeAttrs) + stateSourceObject.Load(2, &s.InodeDirectoryNoNewChildren) stateSourceObject.Load(3, &s.InodeNoStatFS) stateSourceObject.Load(4, &s.InodeNotSymlink) - stateSourceObject.Load(5, &s.OrderedChildren) - stateSourceObject.Load(6, &s.StaticDirectoryRefs) - stateSourceObject.Load(7, &s.locks) - stateSourceObject.Load(8, &s.fdOpts) + stateSourceObject.Load(5, &s.InodeTemporary) + stateSourceObject.Load(6, &s.OrderedChildren) + stateSourceObject.Load(7, &s.StaticDirectoryRefs) + stateSourceObject.Load(8, &s.locks) + stateSourceObject.Load(9, &s.fdOpts) } -func (a *AlwaysValid) StateTypeName() string { - return "pkg/sentry/fsimpl/kernfs.AlwaysValid" +func (i *InodeAlwaysValid) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeAlwaysValid" } -func (a *AlwaysValid) StateFields() []string { +func (i *InodeAlwaysValid) StateFields() []string { return []string{} } -func (a *AlwaysValid) beforeSave() {} +func (i *InodeAlwaysValid) beforeSave() {} -func (a *AlwaysValid) StateSave(stateSinkObject state.Sink) { - a.beforeSave() +func (i *InodeAlwaysValid) StateSave(stateSinkObject state.Sink) { + i.beforeSave() } -func (a *AlwaysValid) afterLoad() {} +func (i *InodeAlwaysValid) afterLoad() {} -func (a *AlwaysValid) StateLoad(stateSourceObject state.Source) { +func (i *InodeAlwaysValid) StateLoad(stateSourceObject state.Source) { +} + +func (i *InodeTemporary) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.InodeTemporary" +} + +func (i *InodeTemporary) StateFields() []string { + return []string{} +} + +func (i *InodeTemporary) beforeSave() {} + +func (i *InodeTemporary) StateSave(stateSinkObject state.Sink) { + i.beforeSave() +} + +func (i *InodeTemporary) afterLoad() {} + +func (i *InodeTemporary) StateLoad(stateSourceObject state.Source) { } func (i *InodeNoStatFS) StateTypeName() string { @@ -556,8 +570,9 @@ func (d *Dentry) StateTypeName() string { func (d *Dentry) StateFields() []string { return []string{ - "DentryRefs", "vfsd", + "DentryRefs", + "fs", "flags", "parent", "name", @@ -570,25 +585,27 @@ func (d *Dentry) beforeSave() {} func (d *Dentry) StateSave(stateSinkObject state.Sink) { d.beforeSave() - stateSinkObject.Save(0, &d.DentryRefs) - stateSinkObject.Save(1, &d.vfsd) - stateSinkObject.Save(2, &d.flags) - stateSinkObject.Save(3, &d.parent) - stateSinkObject.Save(4, &d.name) - stateSinkObject.Save(5, &d.children) - stateSinkObject.Save(6, &d.inode) + stateSinkObject.Save(0, &d.vfsd) + stateSinkObject.Save(1, &d.DentryRefs) + stateSinkObject.Save(2, &d.fs) + stateSinkObject.Save(3, &d.flags) + stateSinkObject.Save(4, &d.parent) + stateSinkObject.Save(5, &d.name) + stateSinkObject.Save(6, &d.children) + stateSinkObject.Save(7, &d.inode) } func (d *Dentry) afterLoad() {} func (d *Dentry) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &d.DentryRefs) - stateSourceObject.Load(1, &d.vfsd) - stateSourceObject.Load(2, &d.flags) - stateSourceObject.Load(3, &d.parent) - stateSourceObject.Load(4, &d.name) - stateSourceObject.Load(5, &d.children) - stateSourceObject.Load(6, &d.inode) + stateSourceObject.Load(0, &d.vfsd) + stateSourceObject.Load(1, &d.DentryRefs) + stateSourceObject.Load(2, &d.fs) + stateSourceObject.Load(3, &d.flags) + stateSourceObject.Load(4, &d.parent) + stateSourceObject.Load(5, &d.name) + stateSourceObject.Load(6, &d.children) + stateSourceObject.Load(7, &d.inode) } func (l *slotList) StateTypeName() string { @@ -707,12 +724,12 @@ func (dir *syntheticDirectory) StateTypeName() string { func (dir *syntheticDirectory) StateFields() []string { return []string{ + "InodeAlwaysValid", "InodeAttrs", "InodeNoStatFS", - "InodeNoopRefCount", - "InodeNoDynamicLookup", "InodeNotSymlink", "OrderedChildren", + "syntheticDirectoryRefs", "locks", } } @@ -721,27 +738,50 @@ func (dir *syntheticDirectory) beforeSave() {} func (dir *syntheticDirectory) StateSave(stateSinkObject state.Sink) { dir.beforeSave() - stateSinkObject.Save(0, &dir.InodeAttrs) - stateSinkObject.Save(1, &dir.InodeNoStatFS) - stateSinkObject.Save(2, &dir.InodeNoopRefCount) - stateSinkObject.Save(3, &dir.InodeNoDynamicLookup) - stateSinkObject.Save(4, &dir.InodeNotSymlink) - stateSinkObject.Save(5, &dir.OrderedChildren) + stateSinkObject.Save(0, &dir.InodeAlwaysValid) + stateSinkObject.Save(1, &dir.InodeAttrs) + stateSinkObject.Save(2, &dir.InodeNoStatFS) + stateSinkObject.Save(3, &dir.InodeNotSymlink) + stateSinkObject.Save(4, &dir.OrderedChildren) + stateSinkObject.Save(5, &dir.syntheticDirectoryRefs) stateSinkObject.Save(6, &dir.locks) } func (dir *syntheticDirectory) afterLoad() {} func (dir *syntheticDirectory) StateLoad(stateSourceObject state.Source) { - stateSourceObject.Load(0, &dir.InodeAttrs) - stateSourceObject.Load(1, &dir.InodeNoStatFS) - stateSourceObject.Load(2, &dir.InodeNoopRefCount) - stateSourceObject.Load(3, &dir.InodeNoDynamicLookup) - stateSourceObject.Load(4, &dir.InodeNotSymlink) - stateSourceObject.Load(5, &dir.OrderedChildren) + stateSourceObject.Load(0, &dir.InodeAlwaysValid) + stateSourceObject.Load(1, &dir.InodeAttrs) + stateSourceObject.Load(2, &dir.InodeNoStatFS) + stateSourceObject.Load(3, &dir.InodeNotSymlink) + stateSourceObject.Load(4, &dir.OrderedChildren) + stateSourceObject.Load(5, &dir.syntheticDirectoryRefs) stateSourceObject.Load(6, &dir.locks) } +func (r *syntheticDirectoryRefs) StateTypeName() string { + return "pkg/sentry/fsimpl/kernfs.syntheticDirectoryRefs" +} + +func (r *syntheticDirectoryRefs) StateFields() []string { + return []string{ + "refCount", + } +} + +func (r *syntheticDirectoryRefs) beforeSave() {} + +func (r *syntheticDirectoryRefs) StateSave(stateSinkObject state.Sink) { + r.beforeSave() + stateSinkObject.Save(0, &r.refCount) +} + +func (r *syntheticDirectoryRefs) afterLoad() {} + +func (r *syntheticDirectoryRefs) StateLoad(stateSourceObject state.Source) { + stateSourceObject.Load(0, &r.refCount) +} + func init() { state.Register((*DentryRefs)(nil)) state.Register((*DynamicBytesFile)(nil)) @@ -752,7 +792,6 @@ func init() { state.Register((*InodeNoopRefCount)(nil)) state.Register((*InodeDirectoryNoNewChildren)(nil)) state.Register((*InodeNotDirectory)(nil)) - state.Register((*InodeNoDynamicLookup)(nil)) state.Register((*InodeNotSymlink)(nil)) state.Register((*InodeAttrs)(nil)) state.Register((*slot)(nil)) @@ -761,7 +800,8 @@ func init() { state.Register((*renameAcrossDifferentImplementationsError)(nil)) state.Register((*InodeSymlink)(nil)) state.Register((*StaticDirectory)(nil)) - state.Register((*AlwaysValid)(nil)) + state.Register((*InodeAlwaysValid)(nil)) + state.Register((*InodeTemporary)(nil)) state.Register((*InodeNoStatFS)(nil)) state.Register((*Filesystem)(nil)) state.Register((*Dentry)(nil)) @@ -770,4 +810,5 @@ func init() { state.Register((*StaticDirectoryRefs)(nil)) state.Register((*StaticSymlink)(nil)) state.Register((*syntheticDirectory)(nil)) + state.Register((*syntheticDirectoryRefs)(nil)) } diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go index 58a93eaac..934cc6c9e 100644 --- a/pkg/sentry/fsimpl/kernfs/symlink.go +++ b/pkg/sentry/fsimpl/kernfs/symlink.go @@ -38,13 +38,10 @@ type StaticSymlink struct { var _ Inode = (*StaticSymlink)(nil) // NewStaticSymlink creates a new symlink file pointing to 'target'. -func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) *Dentry { +func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) Inode { inode := &StaticSymlink{} inode.Init(creds, devMajor, devMinor, ino, target) - - d := &Dentry{} - d.Init(inode) - return d + return inode } // Init initializes the instance. diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go index ea7f073eb..d0ed17b18 100644 --- a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go +++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go @@ -29,24 +29,22 @@ import ( // // +stateify savable type syntheticDirectory struct { + InodeAlwaysValid InodeAttrs InodeNoStatFS - InodeNoopRefCount - InodeNoDynamicLookup InodeNotSymlink OrderedChildren + syntheticDirectoryRefs locks vfs.FileLocks } var _ Inode = (*syntheticDirectory)(nil) -func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) *Dentry { +func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) Inode { inode := &syntheticDirectory{} inode.Init(creds, 0 /* devMajor */, 0 /* devMinor */, 0 /* ino */, perm) - d := &Dentry{} - d.Init(inode) - return d + return inode } func (dir *syntheticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) { @@ -69,34 +67,46 @@ func (dir *syntheticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, } // NewFile implements Inode.NewFile. -func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error) { +func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) { return nil, syserror.EPERM } // NewDir implements Inode.NewDir. -func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error) { +func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) { if !opts.ForSyntheticMountpoint { return nil, syserror.EPERM } - subdird := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask) - if err := dir.OrderedChildren.Insert(name, subdird); err != nil { - subdird.DecRef(ctx) + subdirI := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask) + if err := dir.OrderedChildren.Insert(name, subdirI); err != nil { + subdirI.DecRef(ctx) return nil, err } - return subdird, nil + return subdirI, nil } // NewLink implements Inode.NewLink. -func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (*Dentry, error) { +func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (Inode, error) { return nil, syserror.EPERM } // NewSymlink implements Inode.NewSymlink. -func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (*Dentry, error) { +func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (Inode, error) { return nil, syserror.EPERM } // NewNode implements Inode.NewNode. -func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error) { +func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) { return nil, syserror.EPERM } + +// DecRef implements Inode.DecRef. +func (dir *syntheticDirectory) DecRef(ctx context.Context) { + dir.syntheticDirectoryRefs.DecRef(func() { dir.Destroy(ctx) }) +} + +// Keep implements Inode.Keep. This is redundant because inodes will never be +// created via Lookup and inodes are always valid. Makes sense to return true +// because these inodes are not temporary and should only be removed on RmDir. +func (dir *syntheticDirectory) Keep() bool { + return true +} diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go new file mode 100644 index 000000000..28d556b42 --- /dev/null +++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go @@ -0,0 +1,118 @@ +package kernfs + +import ( + "fmt" + "runtime" + "sync/atomic" + + "gvisor.dev/gvisor/pkg/log" + refs_vfs1 "gvisor.dev/gvisor/pkg/refs" +) + +// ownerType is used to customize logging. Note that we use a pointer to T so +// that we do not copy the entire object when passed as a format parameter. +var syntheticDirectoryownerType *syntheticDirectory + +// Refs implements refs.RefCounter. It keeps a reference count using atomic +// operations and calls the destructor when the count reaches zero. +// +// Note that the number of references is actually refCount + 1 so that a default +// zero-value Refs object contains one reference. +// +// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in +// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount. +// This will allow us to add stack trace information to the leak messages +// without growing the size of Refs. +// +// +stateify savable +type syntheticDirectoryRefs struct { + // refCount is composed of two fields: + // + // [32-bit speculative references]:[32-bit real references] + // + // Speculative references are used for TryIncRef, to avoid a CompareAndSwap + // loop. See IncRef, DecRef and TryIncRef for details of how these fields are + // used. + refCount int64 +} + +func (r *syntheticDirectoryRefs) finalize() { + var note string + switch refs_vfs1.GetLeakMode() { + case refs_vfs1.NoLeakChecking: + return + case refs_vfs1.UninitializedLeakChecking: + note = "(Leak checker uninitialized): " + } + if n := r.ReadRefs(); n != 0 { + log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, syntheticDirectoryownerType, n) + } +} + +// EnableLeakCheck checks for reference leaks when Refs gets garbage collected. +func (r *syntheticDirectoryRefs) EnableLeakCheck() { + if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking { + runtime.SetFinalizer(r, (*syntheticDirectoryRefs).finalize) + } +} + +// ReadRefs returns the current number of references. The returned count is +// inherently racy and is unsafe to use without external synchronization. +func (r *syntheticDirectoryRefs) ReadRefs() int64 { + + return atomic.LoadInt64(&r.refCount) + 1 +} + +// IncRef implements refs.RefCounter.IncRef. +// +//go:nosplit +func (r *syntheticDirectoryRefs) IncRef() { + if v := atomic.AddInt64(&r.refCount, 1); v <= 0 { + panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, syntheticDirectoryownerType)) + } +} + +// TryIncRef implements refs.RefCounter.TryIncRef. +// +// To do this safely without a loop, a speculative reference is first acquired +// on the object. This allows multiple concurrent TryIncRef calls to distinguish +// other TryIncRef calls from genuine references held. +// +//go:nosplit +func (r *syntheticDirectoryRefs) TryIncRef() bool { + const speculativeRef = 1 << 32 + v := atomic.AddInt64(&r.refCount, speculativeRef) + if int32(v) < 0 { + + atomic.AddInt64(&r.refCount, -speculativeRef) + return false + } + + atomic.AddInt64(&r.refCount, -speculativeRef+1) + return true +} + +// DecRef implements refs.RefCounter.DecRef. +// +// Note that speculative references are counted here. Since they were added +// prior to real references reaching zero, they will successfully convert to +// real references. In other words, we see speculative references only in the +// following case: +// +// A: TryIncRef [speculative increase => sees non-negative references] +// B: DecRef [real decrease] +// A: TryIncRef [transform speculative to real] +// +//go:nosplit +func (r *syntheticDirectoryRefs) DecRef(destroy func()) { + switch v := atomic.AddInt64(&r.refCount, -1); { + case v < -1: + panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, syntheticDirectoryownerType)) + + case v == -1: + + if destroy != nil { + destroy() + } + } +} diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go index 4e2da4810..903bd8cdf 100644 --- a/pkg/sentry/fsimpl/pipefs/pipefs.go +++ b/pkg/sentry/fsimpl/pipefs/pipefs.go @@ -165,7 +165,7 @@ func NewConnectedPipeFDs(ctx context.Context, mnt *vfs.Mount, flags uint32) (*vf fs := mnt.Filesystem().Impl().(*filesystem) inode := newInode(ctx, fs) var d kernfs.Dentry - d.Init(inode) + d.Init(&fs.Filesystem, inode) defer d.DecRef(ctx) return inode.pipe.ReaderWriterPair(mnt, d.VFSDentry(), flags) } diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go index 05d7948ea..bea669906 100644 --- a/pkg/sentry/fsimpl/proc/filesystem.go +++ b/pkg/sentry/fsimpl/proc/filesystem.go @@ -73,7 +73,9 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF cgroups = data.Cgroups } - _, dentry := procfs.newTasksInode(k, pidns, cgroups) + inode := procfs.newTasksInode(k, pidns, cgroups) + var dentry kernfs.Dentry + dentry.Init(&procfs.Filesystem, inode) return procfs.VFSFilesystem(), dentry.VFSDentry(), nil } @@ -94,12 +96,9 @@ type dynamicInode interface { Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode) } -func (fs *filesystem) newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry { - inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, inode, perm) - - d := &kernfs.Dentry{} - d.Init(inode) - return d +func (fs *filesystem) newInode(creds *auth.Credentials, perm linux.FileMode, inode dynamicInode) dynamicInode { + inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), inode, perm) + return inode } // +stateify savable @@ -114,8 +113,8 @@ func newStaticFile(data string) *staticFile { return &staticFile{StaticData: vfs.StaticData{Data: data}} } -func newStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry { - return kernfs.NewStaticDir(creds, devMajor, devMinor, ino, perm, children, kernfs.GenericDirectoryFDOptions{ +func (fs *filesystem) newStaticDir(creds *auth.Credentials, children map[string]kernfs.Inode) kernfs.Inode { + return kernfs.NewStaticDir(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, children, kernfs.GenericDirectoryFDOptions{ SeekEnd: kernfs.SeekEndZero, }) } diff --git a/pkg/sentry/fsimpl/proc/proc_state_autogen.go b/pkg/sentry/fsimpl/proc/proc_state_autogen.go index 0bbbd5761..e17a2a13c 100644 --- a/pkg/sentry/fsimpl/proc/proc_state_autogen.go +++ b/pkg/sentry/fsimpl/proc/proc_state_autogen.go @@ -172,10 +172,11 @@ func (i *subtasksInode) StateTypeName() string { func (i *subtasksInode) StateFields() []string { return []string{ "implStatFS", - "AlwaysValid", + "InodeAlwaysValid", "InodeAttrs", "InodeDirectoryNoNewChildren", "InodeNotSymlink", + "InodeTemporary", "OrderedChildren", "subtasksInodeRefs", "locks", @@ -191,34 +192,36 @@ func (i *subtasksInode) beforeSave() {} func (i *subtasksInode) StateSave(stateSinkObject state.Sink) { i.beforeSave() stateSinkObject.Save(0, &i.implStatFS) - stateSinkObject.Save(1, &i.AlwaysValid) + stateSinkObject.Save(1, &i.InodeAlwaysValid) stateSinkObject.Save(2, &i.InodeAttrs) stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren) stateSinkObject.Save(4, &i.InodeNotSymlink) - stateSinkObject.Save(5, &i.OrderedChildren) - stateSinkObject.Save(6, &i.subtasksInodeRefs) - stateSinkObject.Save(7, &i.locks) - stateSinkObject.Save(8, &i.fs) - stateSinkObject.Save(9, &i.task) - stateSinkObject.Save(10, &i.pidns) - stateSinkObject.Save(11, &i.cgroupControllers) + stateSinkObject.Save(5, &i.InodeTemporary) + stateSinkObject.Save(6, &i.OrderedChildren) + stateSinkObject.Save(7, &i.subtasksInodeRefs) + stateSinkObject.Save(8, &i.locks) + stateSinkObject.Save(9, &i.fs) + stateSinkObject.Save(10, &i.task) + stateSinkObject.Save(11, &i.pidns) + stateSinkObject.Save(12, &i.cgroupControllers) } func (i *subtasksInode) afterLoad() {} func (i *subtasksInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.implStatFS) - stateSourceObject.Load(1, &i.AlwaysValid) + stateSourceObject.Load(1, &i.InodeAlwaysValid) stateSourceObject.Load(2, &i.InodeAttrs) stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren) stateSourceObject.Load(4, &i.InodeNotSymlink) - stateSourceObject.Load(5, &i.OrderedChildren) - stateSourceObject.Load(6, &i.subtasksInodeRefs) - stateSourceObject.Load(7, &i.locks) - stateSourceObject.Load(8, &i.fs) - stateSourceObject.Load(9, &i.task) - stateSourceObject.Load(10, &i.pidns) - stateSourceObject.Load(11, &i.cgroupControllers) + stateSourceObject.Load(5, &i.InodeTemporary) + stateSourceObject.Load(6, &i.OrderedChildren) + stateSourceObject.Load(7, &i.subtasksInodeRefs) + stateSourceObject.Load(8, &i.locks) + stateSourceObject.Load(9, &i.fs) + stateSourceObject.Load(10, &i.task) + stateSourceObject.Load(11, &i.pidns) + stateSourceObject.Load(12, &i.cgroupControllers) } func (fd *subtasksFD) StateTypeName() string { @@ -279,8 +282,8 @@ func (i *taskInode) StateFields() []string { "implStatFS", "InodeAttrs", "InodeDirectoryNoNewChildren", - "InodeNoDynamicLookup", "InodeNotSymlink", + "InodeTemporary", "OrderedChildren", "taskInodeRefs", "locks", @@ -295,8 +298,8 @@ func (i *taskInode) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(0, &i.implStatFS) stateSinkObject.Save(1, &i.InodeAttrs) stateSinkObject.Save(2, &i.InodeDirectoryNoNewChildren) - stateSinkObject.Save(3, &i.InodeNoDynamicLookup) - stateSinkObject.Save(4, &i.InodeNotSymlink) + stateSinkObject.Save(3, &i.InodeNotSymlink) + stateSinkObject.Save(4, &i.InodeTemporary) stateSinkObject.Save(5, &i.OrderedChildren) stateSinkObject.Save(6, &i.taskInodeRefs) stateSinkObject.Save(7, &i.locks) @@ -309,8 +312,8 @@ func (i *taskInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.implStatFS) stateSourceObject.Load(1, &i.InodeAttrs) stateSourceObject.Load(2, &i.InodeDirectoryNoNewChildren) - stateSourceObject.Load(3, &i.InodeNoDynamicLookup) - stateSourceObject.Load(4, &i.InodeNotSymlink) + stateSourceObject.Load(3, &i.InodeNotSymlink) + stateSourceObject.Load(4, &i.InodeTemporary) stateSourceObject.Load(5, &i.OrderedChildren) stateSourceObject.Load(6, &i.taskInodeRefs) stateSourceObject.Load(7, &i.locks) @@ -384,10 +387,11 @@ func (i *fdDirInode) StateFields() []string { "fdDir", "fdDirInodeRefs", "implStatFS", - "AlwaysValid", + "InodeAlwaysValid", "InodeAttrs", "InodeDirectoryNoNewChildren", "InodeNotSymlink", + "InodeTemporary", "OrderedChildren", } } @@ -399,11 +403,12 @@ func (i *fdDirInode) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(0, &i.fdDir) stateSinkObject.Save(1, &i.fdDirInodeRefs) stateSinkObject.Save(2, &i.implStatFS) - stateSinkObject.Save(3, &i.AlwaysValid) + stateSinkObject.Save(3, &i.InodeAlwaysValid) stateSinkObject.Save(4, &i.InodeAttrs) stateSinkObject.Save(5, &i.InodeDirectoryNoNewChildren) stateSinkObject.Save(6, &i.InodeNotSymlink) - stateSinkObject.Save(7, &i.OrderedChildren) + stateSinkObject.Save(7, &i.InodeTemporary) + stateSinkObject.Save(8, &i.OrderedChildren) } func (i *fdDirInode) afterLoad() {} @@ -412,11 +417,12 @@ func (i *fdDirInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.fdDir) stateSourceObject.Load(1, &i.fdDirInodeRefs) stateSourceObject.Load(2, &i.implStatFS) - stateSourceObject.Load(3, &i.AlwaysValid) + stateSourceObject.Load(3, &i.InodeAlwaysValid) stateSourceObject.Load(4, &i.InodeAttrs) stateSourceObject.Load(5, &i.InodeDirectoryNoNewChildren) stateSourceObject.Load(6, &i.InodeNotSymlink) - stateSourceObject.Load(7, &i.OrderedChildren) + stateSourceObject.Load(7, &i.InodeTemporary) + stateSourceObject.Load(8, &i.OrderedChildren) } func (s *fdSymlink) StateTypeName() string { @@ -466,10 +472,11 @@ func (i *fdInfoDirInode) StateFields() []string { "fdDir", "fdInfoDirInodeRefs", "implStatFS", - "AlwaysValid", + "InodeAlwaysValid", "InodeAttrs", "InodeDirectoryNoNewChildren", "InodeNotSymlink", + "InodeTemporary", "OrderedChildren", } } @@ -481,11 +488,12 @@ func (i *fdInfoDirInode) StateSave(stateSinkObject state.Sink) { stateSinkObject.Save(0, &i.fdDir) stateSinkObject.Save(1, &i.fdInfoDirInodeRefs) stateSinkObject.Save(2, &i.implStatFS) - stateSinkObject.Save(3, &i.AlwaysValid) + stateSinkObject.Save(3, &i.InodeAlwaysValid) stateSinkObject.Save(4, &i.InodeAttrs) stateSinkObject.Save(5, &i.InodeDirectoryNoNewChildren) stateSinkObject.Save(6, &i.InodeNotSymlink) - stateSinkObject.Save(7, &i.OrderedChildren) + stateSinkObject.Save(7, &i.InodeTemporary) + stateSinkObject.Save(8, &i.OrderedChildren) } func (i *fdInfoDirInode) afterLoad() {} @@ -494,11 +502,12 @@ func (i *fdInfoDirInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.fdDir) stateSourceObject.Load(1, &i.fdInfoDirInodeRefs) stateSourceObject.Load(2, &i.implStatFS) - stateSourceObject.Load(3, &i.AlwaysValid) + stateSourceObject.Load(3, &i.InodeAlwaysValid) stateSourceObject.Load(4, &i.InodeAttrs) stateSourceObject.Load(5, &i.InodeDirectoryNoNewChildren) stateSourceObject.Load(6, &i.InodeNotSymlink) - stateSourceObject.Load(7, &i.OrderedChildren) + stateSourceObject.Load(7, &i.InodeTemporary) + stateSourceObject.Load(8, &i.OrderedChildren) } func (d *fdInfoData) StateTypeName() string { @@ -1365,17 +1374,16 @@ func (i *tasksInode) StateTypeName() string { func (i *tasksInode) StateFields() []string { return []string{ "implStatFS", - "AlwaysValid", + "InodeAlwaysValid", "InodeAttrs", "InodeDirectoryNoNewChildren", "InodeNotSymlink", + "InodeTemporary", "OrderedChildren", "tasksInodeRefs", "locks", "fs", "pidns", - "selfSymlink", - "threadSelfSymlink", "cgroupControllers", } } @@ -1385,36 +1393,34 @@ func (i *tasksInode) beforeSave() {} func (i *tasksInode) StateSave(stateSinkObject state.Sink) { i.beforeSave() stateSinkObject.Save(0, &i.implStatFS) - stateSinkObject.Save(1, &i.AlwaysValid) + stateSinkObject.Save(1, &i.InodeAlwaysValid) stateSinkObject.Save(2, &i.InodeAttrs) stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren) stateSinkObject.Save(4, &i.InodeNotSymlink) - stateSinkObject.Save(5, &i.OrderedChildren) - stateSinkObject.Save(6, &i.tasksInodeRefs) - stateSinkObject.Save(7, &i.locks) - stateSinkObject.Save(8, &i.fs) - stateSinkObject.Save(9, &i.pidns) - stateSinkObject.Save(10, &i.selfSymlink) - stateSinkObject.Save(11, &i.threadSelfSymlink) - stateSinkObject.Save(12, &i.cgroupControllers) + stateSinkObject.Save(5, &i.InodeTemporary) + stateSinkObject.Save(6, &i.OrderedChildren) + stateSinkObject.Save(7, &i.tasksInodeRefs) + stateSinkObject.Save(8, &i.locks) + stateSinkObject.Save(9, &i.fs) + stateSinkObject.Save(10, &i.pidns) + stateSinkObject.Save(11, &i.cgroupControllers) } func (i *tasksInode) afterLoad() {} func (i *tasksInode) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &i.implStatFS) - stateSourceObject.Load(1, &i.AlwaysValid) + stateSourceObject.Load(1, &i.InodeAlwaysValid) stateSourceObject.Load(2, &i.InodeAttrs) stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren) stateSourceObject.Load(4, &i.InodeNotSymlink) - stateSourceObject.Load(5, &i.OrderedChildren) - stateSourceObject.Load(6, &i.tasksInodeRefs) - stateSourceObject.Load(7, &i.locks) - stateSourceObject.Load(8, &i.fs) - stateSourceObject.Load(9, &i.pidns) - stateSourceObject.Load(10, &i.selfSymlink) - stateSourceObject.Load(11, &i.threadSelfSymlink) - stateSourceObject.Load(12, &i.cgroupControllers) + stateSourceObject.Load(5, &i.InodeTemporary) + stateSourceObject.Load(6, &i.OrderedChildren) + stateSourceObject.Load(7, &i.tasksInodeRefs) + stateSourceObject.Load(8, &i.locks) + stateSourceObject.Load(9, &i.fs) + stateSourceObject.Load(10, &i.pidns) + stateSourceObject.Load(11, &i.cgroupControllers) } func (s *staticFileSetStat) StateTypeName() string { diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go index 47ecd941c..bad2fab4f 100644 --- a/pkg/sentry/fsimpl/proc/subtasks.go +++ b/pkg/sentry/fsimpl/proc/subtasks.go @@ -32,10 +32,11 @@ import ( // +stateify savable type subtasksInode struct { implStatFS - kernfs.AlwaysValid + kernfs.InodeAlwaysValid kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren kernfs.InodeNotSymlink + kernfs.InodeTemporary kernfs.OrderedChildren subtasksInodeRefs @@ -49,7 +50,7 @@ type subtasksInode struct { var _ kernfs.Inode = (*subtasksInode)(nil) -func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) *kernfs.Dentry { +func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) kernfs.Inode { subInode := &subtasksInode{ fs: fs, task: task, @@ -62,14 +63,11 @@ func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, subInode.EnableLeakCheck() inode := &taskOwnedInode{Inode: subInode, owner: task} - dentry := &kernfs.Dentry{} - dentry.Init(inode) - - return dentry + return inode } -// Lookup implements kernfs.inodeDynamicLookup.Lookup. -func (i *subtasksInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) { +// Lookup implements kernfs.inodeDirectory.Lookup. +func (i *subtasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { tid, err := strconv.ParseUint(name, 10, 32) if err != nil { return nil, syserror.ENOENT @@ -82,10 +80,10 @@ func (i *subtasksInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry if subTask.ThreadGroup() != i.task.ThreadGroup() { return nil, syserror.ENOENT } - return i.fs.newTaskInode(subTask, i.pidns, false, i.cgroupControllers), nil + return i.fs.newTaskInode(subTask, i.pidns, false, i.cgroupControllers) } -// IterDirents implements kernfs.inodeDynamicLookup.IterDirents. +// IterDirents implements kernfs.inodeDirectory.IterDirents. func (i *subtasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { tasks := i.task.ThreadGroup().MemberIDs(i.pidns) if len(tasks) == 0 { @@ -186,6 +184,6 @@ func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credential } // DecRef implements kernfs.Inode.DecRef. -func (i *subtasksInode) DecRef(context.Context) { - i.subtasksInodeRefs.DecRef(i.Destroy) +func (i *subtasksInode) DecRef(ctx context.Context) { + i.subtasksInodeRefs.DecRef(func() { i.Destroy(ctx) }) } diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index a7cd6f57e..b63a4eca0 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -35,8 +35,8 @@ type taskInode struct { implStatFS kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren - kernfs.InodeNoDynamicLookup kernfs.InodeNotSymlink + kernfs.InodeTemporary kernfs.OrderedChildren taskInodeRefs @@ -47,41 +47,44 @@ type taskInode struct { var _ kernfs.Inode = (*taskInode)(nil) -func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) *kernfs.Dentry { - // TODO(gvisor.dev/issue/164): Fail with ESRCH if task exited. - contents := map[string]*kernfs.Dentry{ - "auxv": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &auxvData{task: task}), - "cmdline": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}), +func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) (kernfs.Inode, error) { + if task.ExitState() == kernel.TaskExitDead { + return nil, syserror.ESRCH + } + + contents := map[string]kernfs.Inode{ + "auxv": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &auxvData{task: task}), + "cmdline": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}), "comm": fs.newComm(task, fs.NextIno(), 0444), "cwd": fs.newCwdSymlink(task, fs.NextIno()), - "environ": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}), + "environ": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}), "exe": fs.newExeSymlink(task, fs.NextIno()), "fd": fs.newFDDirInode(task), "fdinfo": fs.newFDInfoDirInode(task), - "gid_map": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &idMapData{task: task, gids: true}), - "io": fs.newTaskOwnedFile(task, fs.NextIno(), 0400, newIO(task, isThreadGroup)), - "maps": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mapsData{task: task}), - "mountinfo": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mountInfoData{task: task}), - "mounts": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mountsData{task: task}), + "gid_map": fs.newTaskOwnedInode(task, fs.NextIno(), 0644, &idMapData{task: task, gids: true}), + "io": fs.newTaskOwnedInode(task, fs.NextIno(), 0400, newIO(task, isThreadGroup)), + "maps": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mapsData{task: task}), + "mountinfo": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mountInfoData{task: task}), + "mounts": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mountsData{task: task}), "net": fs.newTaskNetDir(task), - "ns": fs.newTaskOwnedDir(task, fs.NextIno(), 0511, map[string]*kernfs.Dentry{ + "ns": fs.newTaskOwnedDir(task, fs.NextIno(), 0511, map[string]kernfs.Inode{ "net": fs.newNamespaceSymlink(task, fs.NextIno(), "net"), "pid": fs.newNamespaceSymlink(task, fs.NextIno(), "pid"), "user": fs.newNamespaceSymlink(task, fs.NextIno(), "user"), }), - "oom_score": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, newStaticFile("0\n")), - "oom_score_adj": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &oomScoreAdj{task: task}), - "smaps": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &smapsData{task: task}), - "stat": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), - "statm": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &statmData{task: task}), - "status": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}), - "uid_map": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}), + "oom_score": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, newStaticFile("0\n")), + "oom_score_adj": fs.newTaskOwnedInode(task, fs.NextIno(), 0644, &oomScoreAdj{task: task}), + "smaps": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &smapsData{task: task}), + "stat": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), + "statm": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &statmData{task: task}), + "status": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}), + "uid_map": fs.newTaskOwnedInode(task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}), } if isThreadGroup { contents["task"] = fs.newSubtasks(task, pidns, cgroupControllers) } if len(cgroupControllers) > 0 { - contents["cgroup"] = fs.newTaskOwnedFile(task, fs.NextIno(), 0444, newCgroupData(cgroupControllers)) + contents["cgroup"] = fs.newTaskOwnedInode(task, fs.NextIno(), 0444, newCgroupData(cgroupControllers)) } taskInode := &taskInode{task: task} @@ -90,17 +93,15 @@ func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace taskInode.EnableLeakCheck() inode := &taskOwnedInode{Inode: taskInode, owner: task} - dentry := &kernfs.Dentry{} - dentry.Init(inode) taskInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - links := taskInode.OrderedChildren.Populate(dentry, contents) + links := taskInode.OrderedChildren.Populate(contents) taskInode.IncLinks(links) - return dentry + return inode, nil } -// Valid implements kernfs.inodeDynamicLookup. This inode remains valid as long +// Valid implements kernfs.Inode.Valid. This inode remains valid as long // as the task is still running. When it's dead, another tasks with the same // PID could replace it. func (i *taskInode) Valid(ctx context.Context) bool { @@ -124,8 +125,8 @@ func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, v } // DecRef implements kernfs.Inode.DecRef. -func (i *taskInode) DecRef(context.Context) { - i.taskInodeRefs.DecRef(i.Destroy) +func (i *taskInode) DecRef(ctx context.Context) { + i.taskInodeRefs.DecRef(func() { i.Destroy(ctx) }) } // taskOwnedInode implements kernfs.Inode and overrides inode owner with task @@ -141,34 +142,23 @@ type taskOwnedInode struct { var _ kernfs.Inode = (*taskOwnedInode)(nil) -func (fs *filesystem) newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry { +func (fs *filesystem) newTaskOwnedInode(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) kernfs.Inode { // Note: credentials are overridden by taskOwnedInode. inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, inode, perm) - taskInode := &taskOwnedInode{Inode: inode, owner: task} - d := &kernfs.Dentry{} - d.Init(taskInode) - return d + return &taskOwnedInode{Inode: inode, owner: task} } -func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry { - dir := &kernfs.StaticDirectory{} - +func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]kernfs.Inode) kernfs.Inode { // Note: credentials are overridden by taskOwnedInode. - dir.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, kernfs.GenericDirectoryFDOptions{ - SeekEnd: kernfs.SeekEndZero, - }) - dir.EnableLeakCheck() - - inode := &taskOwnedInode{Inode: dir, owner: task} - d := &kernfs.Dentry{} - d.Init(inode) + fdOpts := kernfs.GenericDirectoryFDOptions{SeekEnd: kernfs.SeekEndZero} + dir := kernfs.NewStaticDir(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, children, fdOpts) - dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - links := dir.OrderedChildren.Populate(d, children) - dir.IncLinks(links) + return &taskOwnedInode{Inode: dir, owner: task} +} - return d +func (i *taskOwnedInode) Valid(ctx context.Context) bool { + return i.owner.ExitState() != kernel.TaskExitDead && i.Inode.Valid(ctx) } // Stat implements kernfs.Inode.Stat. diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go index 0866cea2b..2c80ac5c2 100644 --- a/pkg/sentry/fsimpl/proc/task_fds.go +++ b/pkg/sentry/fsimpl/proc/task_fds.go @@ -63,7 +63,7 @@ type fdDir struct { produceSymlink bool } -// IterDirents implements kernfs.inodeDynamicLookup.IterDirents. +// IterDirents implements kernfs.inodeDirectory.IterDirents. func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { var fds []int32 i.task.WithMuLocked(func(t *kernel.Task) { @@ -109,16 +109,17 @@ type fdDirInode struct { fdDir fdDirInodeRefs implStatFS - kernfs.AlwaysValid + kernfs.InodeAlwaysValid kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren kernfs.InodeNotSymlink + kernfs.InodeTemporary kernfs.OrderedChildren } var _ kernfs.Inode = (*fdDirInode)(nil) -func (fs *filesystem) newFDDirInode(task *kernel.Task) *kernfs.Dentry { +func (fs *filesystem) newFDDirInode(task *kernel.Task) kernfs.Inode { inode := &fdDirInode{ fdDir: fdDir{ fs: fs, @@ -128,16 +129,17 @@ func (fs *filesystem) newFDDirInode(task *kernel.Task) *kernfs.Dentry { } inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) inode.EnableLeakCheck() - - dentry := &kernfs.Dentry{} - dentry.Init(inode) inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) + return inode +} - return dentry +// IterDirents implements kernfs.inodeDirectory.IterDirents. +func (i *fdDirInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) { + return i.fdDir.IterDirents(ctx, cb, offset, relOffset) } -// Lookup implements kernfs.inodeDynamicLookup.Lookup. -func (i *fdDirInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) { +// Lookup implements kernfs.inodeDirectory.Lookup. +func (i *fdDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { fdInt, err := strconv.ParseInt(name, 10, 32) if err != nil { return nil, syserror.ENOENT @@ -183,8 +185,8 @@ func (i *fdDirInode) CheckPermissions(ctx context.Context, creds *auth.Credentia } // DecRef implements kernfs.Inode.DecRef. -func (i *fdDirInode) DecRef(context.Context) { - i.fdDirInodeRefs.DecRef(i.Destroy) +func (i *fdDirInode) DecRef(ctx context.Context) { + i.fdDirInodeRefs.DecRef(func() { i.Destroy(ctx) }) } // fdSymlink is an symlink for the /proc/[pid]/fd/[fd] file. @@ -202,16 +204,13 @@ type fdSymlink struct { var _ kernfs.Inode = (*fdSymlink)(nil) -func (fs *filesystem) newFDSymlink(task *kernel.Task, fd int32, ino uint64) *kernfs.Dentry { +func (fs *filesystem) newFDSymlink(task *kernel.Task, fd int32, ino uint64) kernfs.Inode { inode := &fdSymlink{ task: task, fd: fd, } inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d + return inode } func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) { @@ -236,6 +235,11 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen return vd, "", nil } +// Valid implements kernfs.Inode.Valid. +func (s *fdSymlink) Valid(ctx context.Context) bool { + return taskFDExists(ctx, s.task, s.fd) +} + // fdInfoDirInode represents the inode for /proc/[pid]/fdinfo directory. // // +stateify savable @@ -243,16 +247,17 @@ type fdInfoDirInode struct { fdDir fdInfoDirInodeRefs implStatFS - kernfs.AlwaysValid + kernfs.InodeAlwaysValid kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren kernfs.InodeNotSymlink + kernfs.InodeTemporary kernfs.OrderedChildren } var _ kernfs.Inode = (*fdInfoDirInode)(nil) -func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) *kernfs.Dentry { +func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) kernfs.Inode { inode := &fdInfoDirInode{ fdDir: fdDir{ fs: fs, @@ -261,16 +266,12 @@ func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) *kernfs.Dentry { } inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) inode.EnableLeakCheck() - - dentry := &kernfs.Dentry{} - dentry.Init(inode) inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - - return dentry + return inode } -// Lookup implements kernfs.inodeDynamicLookup.Lookup. -func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) { +// Lookup implements kernfs.inodeDirectory.Lookup. +func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { fdInt, err := strconv.ParseInt(name, 10, 32) if err != nil { return nil, syserror.ENOENT @@ -283,7 +284,12 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*kernfs.Dentr task: i.task, fd: fd, } - return i.fs.newTaskOwnedFile(i.task, i.fs.NextIno(), 0444, data), nil + return i.fs.newTaskOwnedInode(i.task, i.fs.NextIno(), 0444, data), nil +} + +// IterDirents implements Inode.IterDirents. +func (i *fdInfoDirInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) { + return i.fdDir.IterDirents(ctx, cb, offset, relOffset) } // Open implements kernfs.Inode.Open. @@ -298,8 +304,8 @@ func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *ker } // DecRef implements kernfs.Inode.DecRef. -func (i *fdInfoDirInode) DecRef(context.Context) { - i.fdInfoDirInodeRefs.DecRef(i.Destroy) +func (i *fdInfoDirInode) DecRef(ctx context.Context) { + i.fdInfoDirInodeRefs.DecRef(func() { i.Destroy(ctx) }) } // fdInfoData implements vfs.DynamicBytesSource for /proc/[pid]/fdinfo/[fd]. @@ -328,3 +334,8 @@ func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error { fmt.Fprintf(buf, "flags:\t0%o\n", flags) return nil } + +// Valid implements kernfs.Inode.Valid. +func (d *fdInfoData) Valid(ctx context.Context) bool { + return taskFDExists(ctx, d.task, d.fd) +} diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index 3fbf081a6..79f8b7e9f 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -247,13 +247,10 @@ type commInode struct { task *kernel.Task } -func (fs *filesystem) newComm(task *kernel.Task, ino uint64, perm linux.FileMode) *kernfs.Dentry { +func (fs *filesystem) newComm(task *kernel.Task, ino uint64, perm linux.FileMode) kernfs.Inode { inode := &commInode{task: task} inode.DynamicBytesFile.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, &commData{task: task}, perm) - - d := &kernfs.Dentry{} - d.Init(inode) - return d + return inode } func (i *commInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error { @@ -658,13 +655,10 @@ type exeSymlink struct { var _ kernfs.Inode = (*exeSymlink)(nil) -func (fs *filesystem) newExeSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry { +func (fs *filesystem) newExeSymlink(task *kernel.Task, ino uint64) kernfs.Inode { inode := &exeSymlink{task: task} inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d + return inode } // Readlink implements kernfs.Inode.Readlink. @@ -737,13 +731,10 @@ type cwdSymlink struct { var _ kernfs.Inode = (*cwdSymlink)(nil) -func (fs *filesystem) newCwdSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry { +func (fs *filesystem) newCwdSymlink(task *kernel.Task, ino uint64) kernfs.Inode { inode := &cwdSymlink{task: task} inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d + return inode } // Readlink implements kernfs.Inode.Readlink. @@ -851,7 +842,7 @@ type namespaceSymlink struct { task *kernel.Task } -func (fs *filesystem) newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry { +func (fs *filesystem) newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) kernfs.Inode { // Namespace symlinks should contain the namespace name and the inode number // for the namespace instance, so for example user:[123456]. We currently fake // the inode number by sticking the symlink inode in its place. @@ -862,9 +853,7 @@ func (fs *filesystem) newNamespaceSymlink(task *kernel.Task, ino uint64, ns stri inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, target) taskInode := &taskOwnedInode{Inode: inode, owner: task} - d := &kernfs.Dentry{} - d.Init(taskInode) - return d + return taskInode } // Readlink implements kernfs.Inode.Readlink. @@ -882,11 +871,12 @@ func (s *namespaceSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.Vir } // Create a synthetic inode to represent the namespace. + fs := mnt.Filesystem().Impl().(*filesystem) dentry := &kernfs.Dentry{} - dentry.Init(&namespaceInode{}) + dentry.Init(&fs.Filesystem, &namespaceInode{}) vd := vfs.MakeVirtualDentry(mnt, dentry.VFSDentry()) - vd.IncRef() - dentry.DecRef(ctx) + // Only IncRef vd.Mount() because vd.Dentry() already holds a ref of 1. + mnt.IncRef() return vd, "", nil } diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go index e7f748655..3425e8698 100644 --- a/pkg/sentry/fsimpl/proc/task_net.go +++ b/pkg/sentry/fsimpl/proc/task_net.go @@ -37,12 +37,12 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -func (fs *filesystem) newTaskNetDir(task *kernel.Task) *kernfs.Dentry { +func (fs *filesystem) newTaskNetDir(task *kernel.Task) kernfs.Inode { k := task.Kernel() pidns := task.PIDNamespace() root := auth.NewRootCredentials(pidns.UserNamespace()) - var contents map[string]*kernfs.Dentry + var contents map[string]kernfs.Inode if stack := task.NetworkNamespace().Stack(); stack != nil { const ( arp = "IP address HW type Flags HW address Mask Device\n" @@ -56,34 +56,34 @@ func (fs *filesystem) newTaskNetDir(task *kernel.Task) *kernfs.Dentry { // TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task // network namespace. - contents = map[string]*kernfs.Dentry{ - "dev": fs.newDentry(root, fs.NextIno(), 0444, &netDevData{stack: stack}), - "snmp": fs.newDentry(root, fs.NextIno(), 0444, &netSnmpData{stack: stack}), + contents = map[string]kernfs.Inode{ + "dev": fs.newInode(root, 0444, &netDevData{stack: stack}), + "snmp": fs.newInode(root, 0444, &netSnmpData{stack: stack}), // The following files are simple stubs until they are implemented in // netstack, if the file contains a header the stub is just the header // otherwise it is an empty file. - "arp": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(arp)), - "netlink": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(netlink)), - "netstat": fs.newDentry(root, fs.NextIno(), 0444, &netStatData{}), - "packet": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(packet)), - "protocols": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(protocols)), + "arp": fs.newInode(root, 0444, newStaticFile(arp)), + "netlink": fs.newInode(root, 0444, newStaticFile(netlink)), + "netstat": fs.newInode(root, 0444, &netStatData{}), + "packet": fs.newInode(root, 0444, newStaticFile(packet)), + "protocols": fs.newInode(root, 0444, newStaticFile(protocols)), // Linux sets psched values to: nsec per usec, psched tick in ns, 1000000, // high res timer ticks per sec (ClockGetres returns 1ns resolution). - "psched": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(psched)), - "ptype": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(ptype)), - "route": fs.newDentry(root, fs.NextIno(), 0444, &netRouteData{stack: stack}), - "tcp": fs.newDentry(root, fs.NextIno(), 0444, &netTCPData{kernel: k}), - "udp": fs.newDentry(root, fs.NextIno(), 0444, &netUDPData{kernel: k}), - "unix": fs.newDentry(root, fs.NextIno(), 0444, &netUnixData{kernel: k}), + "psched": fs.newInode(root, 0444, newStaticFile(psched)), + "ptype": fs.newInode(root, 0444, newStaticFile(ptype)), + "route": fs.newInode(root, 0444, &netRouteData{stack: stack}), + "tcp": fs.newInode(root, 0444, &netTCPData{kernel: k}), + "udp": fs.newInode(root, 0444, &netUDPData{kernel: k}), + "unix": fs.newInode(root, 0444, &netUnixData{kernel: k}), } if stack.SupportsIPv6() { - contents["if_inet6"] = fs.newDentry(root, fs.NextIno(), 0444, &ifinet6{stack: stack}) - contents["ipv6_route"] = fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")) - contents["tcp6"] = fs.newDentry(root, fs.NextIno(), 0444, &netTCP6Data{kernel: k}) - contents["udp6"] = fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(upd6)) + contents["if_inet6"] = fs.newInode(root, 0444, &ifinet6{stack: stack}) + contents["ipv6_route"] = fs.newInode(root, 0444, newStaticFile("")) + contents["tcp6"] = fs.newInode(root, 0444, &netTCP6Data{kernel: k}) + contents["udp6"] = fs.newInode(root, 0444, newStaticFile(upd6)) } } diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go index d8f5dd509..3259c3732 100644 --- a/pkg/sentry/fsimpl/proc/tasks.go +++ b/pkg/sentry/fsimpl/proc/tasks.go @@ -38,10 +38,11 @@ const ( // +stateify savable type tasksInode struct { implStatFS - kernfs.AlwaysValid + kernfs.InodeAlwaysValid kernfs.InodeAttrs kernfs.InodeDirectoryNoNewChildren kernfs.InodeNotSymlink + kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid. kernfs.OrderedChildren tasksInodeRefs @@ -52,8 +53,6 @@ type tasksInode struct { // '/proc/self' and '/proc/thread-self' have custom directory offsets in // Linux. So handle them outside of OrderedChildren. - selfSymlink *kernfs.Dentry - threadSelfSymlink *kernfs.Dentry // cgroupControllers is a map of controller name to directory in the // cgroup hierarchy. These controllers are immutable and will be listed @@ -63,52 +62,53 @@ type tasksInode struct { var _ kernfs.Inode = (*tasksInode)(nil) -func (fs *filesystem) newTasksInode(k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) { +func (fs *filesystem) newTasksInode(k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) *tasksInode { root := auth.NewRootCredentials(pidns.UserNamespace()) - contents := map[string]*kernfs.Dentry{ - "cpuinfo": fs.newDentry(root, fs.NextIno(), 0444, newStaticFileSetStat(cpuInfoData(k))), - "filesystems": fs.newDentry(root, fs.NextIno(), 0444, &filesystemsData{}), - "loadavg": fs.newDentry(root, fs.NextIno(), 0444, &loadavgData{}), + contents := map[string]kernfs.Inode{ + "cpuinfo": fs.newInode(root, 0444, newStaticFileSetStat(cpuInfoData(k))), + "filesystems": fs.newInode(root, 0444, &filesystemsData{}), + "loadavg": fs.newInode(root, 0444, &loadavgData{}), "sys": fs.newSysDir(root, k), - "meminfo": fs.newDentry(root, fs.NextIno(), 0444, &meminfoData{}), + "meminfo": fs.newInode(root, 0444, &meminfoData{}), "mounts": kernfs.NewStaticSymlink(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/mounts"), "net": kernfs.NewStaticSymlink(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/net"), - "stat": fs.newDentry(root, fs.NextIno(), 0444, &statData{}), - "uptime": fs.newDentry(root, fs.NextIno(), 0444, &uptimeData{}), - "version": fs.newDentry(root, fs.NextIno(), 0444, &versionData{}), + "stat": fs.newInode(root, 0444, &statData{}), + "uptime": fs.newInode(root, 0444, &uptimeData{}), + "version": fs.newInode(root, 0444, &versionData{}), } inode := &tasksInode{ pidns: pidns, fs: fs, - selfSymlink: fs.newSelfSymlink(root, fs.NextIno(), pidns), - threadSelfSymlink: fs.newThreadSelfSymlink(root, fs.NextIno(), pidns), cgroupControllers: cgroupControllers, } inode.InodeAttrs.Init(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555) inode.EnableLeakCheck() - dentry := &kernfs.Dentry{} - dentry.Init(inode) - inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) - links := inode.OrderedChildren.Populate(dentry, contents) + links := inode.OrderedChildren.Populate(contents) inode.IncLinks(links) - return inode, dentry + return inode } -// Lookup implements kernfs.inodeDynamicLookup.Lookup. -func (i *tasksInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) { - // Try to lookup a corresponding task. +// Lookup implements kernfs.inodeDirectory.Lookup. +func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) { + // Check if a static entry was looked up. + if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil { + return d, nil + } + + // Not a static entry. Try to lookup a corresponding task. tid, err := strconv.ParseUint(name, 10, 64) if err != nil { + root := auth.NewRootCredentials(i.pidns.UserNamespace()) // If it failed to parse, check if it's one of the special handled files. switch name { case selfName: - return i.selfSymlink, nil + return i.newSelfSymlink(root), nil case threadSelfName: - return i.threadSelfSymlink, nil + return i.newThreadSelfSymlink(root), nil } return nil, syserror.ENOENT } @@ -118,10 +118,10 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, e return nil, syserror.ENOENT } - return i.fs.newTaskInode(task, i.pidns, true, i.cgroupControllers), nil + return i.fs.newTaskInode(task, i.pidns, true, i.cgroupControllers) } -// IterDirents implements kernfs.inodeDynamicLookup.IterDirents. +// IterDirents implements kernfs.inodeDirectory.IterDirents. func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) { // fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256 const FIRST_PROCESS_ENTRY = 256 @@ -229,8 +229,8 @@ func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.St } // DecRef implements kernfs.Inode.DecRef. -func (i *tasksInode) DecRef(context.Context) { - i.tasksInodeRefs.DecRef(i.Destroy) +func (i *tasksInode) DecRef(ctx context.Context) { + i.tasksInodeRefs.DecRef(func() { i.Destroy(ctx) }) } // staticFileSetStat implements a special static file that allows inode diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go index f268c59b0..07c27cdd9 100644 --- a/pkg/sentry/fsimpl/proc/tasks_files.go +++ b/pkg/sentry/fsimpl/proc/tasks_files.go @@ -43,13 +43,10 @@ type selfSymlink struct { var _ kernfs.Inode = (*selfSymlink)(nil) -func (fs *filesystem) newSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry { - inode := &selfSymlink{pidns: pidns} - inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d +func (i *tasksInode) newSelfSymlink(creds *auth.Credentials) kernfs.Inode { + inode := &selfSymlink{pidns: i.pidns} + inode.Init(creds, linux.UNNAMED_MAJOR, i.fs.devMinor, i.fs.NextIno(), linux.ModeSymlink|0777) + return inode } func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) { @@ -87,13 +84,10 @@ type threadSelfSymlink struct { var _ kernfs.Inode = (*threadSelfSymlink)(nil) -func (fs *filesystem) newThreadSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry { - inode := &threadSelfSymlink{pidns: pidns} - inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777) - - d := &kernfs.Dentry{} - d.Init(inode) - return d +func (i *tasksInode) newThreadSelfSymlink(creds *auth.Credentials) kernfs.Inode { + inode := &threadSelfSymlink{pidns: i.pidns} + inode.Init(creds, linux.UNNAMED_MAJOR, i.fs.devMinor, i.fs.NextIno(), linux.ModeSymlink|0777) + return inode } func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) { diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go index 3312b0418..95420368d 100644 --- a/pkg/sentry/fsimpl/proc/tasks_sys.go +++ b/pkg/sentry/fsimpl/proc/tasks_sys.go @@ -40,93 +40,93 @@ const ( ) // newSysDir returns the dentry corresponding to /proc/sys directory. -func (fs *filesystem) newSysDir(root *auth.Credentials, k *kernel.Kernel) *kernfs.Dentry { - return newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{ - "kernel": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{ - "hostname": fs.newDentry(root, fs.NextIno(), 0444, &hostnameData{}), - "shmall": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMALL)), - "shmmax": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMMAX)), - "shmmni": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMMNI)), +func (fs *filesystem) newSysDir(root *auth.Credentials, k *kernel.Kernel) kernfs.Inode { + return fs.newStaticDir(root, map[string]kernfs.Inode{ + "kernel": fs.newStaticDir(root, map[string]kernfs.Inode{ + "hostname": fs.newInode(root, 0444, &hostnameData{}), + "shmall": fs.newInode(root, 0444, shmData(linux.SHMALL)), + "shmmax": fs.newInode(root, 0444, shmData(linux.SHMMAX)), + "shmmni": fs.newInode(root, 0444, shmData(linux.SHMMNI)), }), - "vm": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{ - "mmap_min_addr": fs.newDentry(root, fs.NextIno(), 0444, &mmapMinAddrData{k: k}), - "overcommit_memory": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0\n")), + "vm": fs.newStaticDir(root, map[string]kernfs.Inode{ + "mmap_min_addr": fs.newInode(root, 0444, &mmapMinAddrData{k: k}), + "overcommit_memory": fs.newInode(root, 0444, newStaticFile("0\n")), }), "net": fs.newSysNetDir(root, k), }) } // newSysNetDir returns the dentry corresponding to /proc/sys/net directory. -func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *kernfs.Dentry { - var contents map[string]*kernfs.Dentry +func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) kernfs.Inode { + var contents map[string]kernfs.Inode // TODO(gvisor.dev/issue/1833): Support for using the network stack in the // network namespace of the calling process. if stack := k.RootNetworkNamespace().Stack(); stack != nil { - contents = map[string]*kernfs.Dentry{ - "ipv4": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{ - "tcp_recovery": fs.newDentry(root, fs.NextIno(), 0644, &tcpRecoveryData{stack: stack}), - "tcp_rmem": fs.newDentry(root, fs.NextIno(), 0644, &tcpMemData{stack: stack, dir: tcpRMem}), - "tcp_sack": fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}), - "tcp_wmem": fs.newDentry(root, fs.NextIno(), 0644, &tcpMemData{stack: stack, dir: tcpWMem}), - "ip_forward": fs.newDentry(root, fs.NextIno(), 0444, &ipForwarding{stack: stack}), + contents = map[string]kernfs.Inode{ + "ipv4": fs.newStaticDir(root, map[string]kernfs.Inode{ + "tcp_recovery": fs.newInode(root, 0644, &tcpRecoveryData{stack: stack}), + "tcp_rmem": fs.newInode(root, 0644, &tcpMemData{stack: stack, dir: tcpRMem}), + "tcp_sack": fs.newInode(root, 0644, &tcpSackData{stack: stack}), + "tcp_wmem": fs.newInode(root, 0644, &tcpMemData{stack: stack, dir: tcpWMem}), + "ip_forward": fs.newInode(root, 0444, &ipForwarding{stack: stack}), // The following files are simple stubs until they are implemented in // netstack, most of these files are configuration related. We use the // value closest to the actual netstack behavior or any empty file, all // of these files will have mode 0444 (read-only for all users). - "ip_local_port_range": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("16000 65535")), - "ip_local_reserved_ports": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")), - "ipfrag_time": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("30")), - "ip_nonlocal_bind": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "ip_no_pmtu_disc": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")), + "ip_local_port_range": fs.newInode(root, 0444, newStaticFile("16000 65535")), + "ip_local_reserved_ports": fs.newInode(root, 0444, newStaticFile("")), + "ipfrag_time": fs.newInode(root, 0444, newStaticFile("30")), + "ip_nonlocal_bind": fs.newInode(root, 0444, newStaticFile("0")), + "ip_no_pmtu_disc": fs.newInode(root, 0444, newStaticFile("1")), // tcp_allowed_congestion_control tell the user what they are able to // do as an unprivledged process so we leave it empty. - "tcp_allowed_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")), - "tcp_available_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("reno")), - "tcp_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("reno")), + "tcp_allowed_congestion_control": fs.newInode(root, 0444, newStaticFile("")), + "tcp_available_congestion_control": fs.newInode(root, 0444, newStaticFile("reno")), + "tcp_congestion_control": fs.newInode(root, 0444, newStaticFile("reno")), // Many of the following stub files are features netstack doesn't // support. The unsupported features return "0" to indicate they are // disabled. - "tcp_base_mss": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1280")), - "tcp_dsack": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_early_retrans": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_fack": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_fastopen": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_fastopen_key": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")), - "tcp_invalid_ratelimit": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_keepalive_intvl": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_keepalive_probes": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_keepalive_time": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("7200")), - "tcp_mtu_probing": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_no_metrics_save": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")), - "tcp_probe_interval": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_probe_threshold": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "tcp_retries1": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("3")), - "tcp_retries2": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("15")), - "tcp_rfc1337": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")), - "tcp_slow_start_after_idle": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")), - "tcp_synack_retries": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("5")), - "tcp_syn_retries": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("3")), - "tcp_timestamps": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")), + "tcp_base_mss": fs.newInode(root, 0444, newStaticFile("1280")), + "tcp_dsack": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_early_retrans": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_fack": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_fastopen": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_fastopen_key": fs.newInode(root, 0444, newStaticFile("")), + "tcp_invalid_ratelimit": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_keepalive_intvl": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_keepalive_probes": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_keepalive_time": fs.newInode(root, 0444, newStaticFile("7200")), + "tcp_mtu_probing": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_no_metrics_save": fs.newInode(root, 0444, newStaticFile("1")), + "tcp_probe_interval": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_probe_threshold": fs.newInode(root, 0444, newStaticFile("0")), + "tcp_retries1": fs.newInode(root, 0444, newStaticFile("3")), + "tcp_retries2": fs.newInode(root, 0444, newStaticFile("15")), + "tcp_rfc1337": fs.newInode(root, 0444, newStaticFile("1")), + "tcp_slow_start_after_idle": fs.newInode(root, 0444, newStaticFile("1")), + "tcp_synack_retries": fs.newInode(root, 0444, newStaticFile("5")), + "tcp_syn_retries": fs.newInode(root, 0444, newStaticFile("3")), + "tcp_timestamps": fs.newInode(root, 0444, newStaticFile("1")), }), - "core": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{ - "default_qdisc": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("pfifo_fast")), - "message_burst": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("10")), - "message_cost": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("5")), - "optmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")), - "rmem_default": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")), - "rmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")), - "somaxconn": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("128")), - "wmem_default": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")), - "wmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")), + "core": fs.newStaticDir(root, map[string]kernfs.Inode{ + "default_qdisc": fs.newInode(root, 0444, newStaticFile("pfifo_fast")), + "message_burst": fs.newInode(root, 0444, newStaticFile("10")), + "message_cost": fs.newInode(root, 0444, newStaticFile("5")), + "optmem_max": fs.newInode(root, 0444, newStaticFile("0")), + "rmem_default": fs.newInode(root, 0444, newStaticFile("212992")), + "rmem_max": fs.newInode(root, 0444, newStaticFile("212992")), + "somaxconn": fs.newInode(root, 0444, newStaticFile("128")), + "wmem_default": fs.newInode(root, 0444, newStaticFile("212992")), + "wmem_max": fs.newInode(root, 0444, newStaticFile("212992")), }), } } - return newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, contents) + return fs.newStaticDir(root, contents) } // mmapMinAddrData implements vfs.DynamicBytesSource for diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go index 29e5371d6..9eef16cc6 100644 --- a/pkg/sentry/fsimpl/sockfs/sockfs.go +++ b/pkg/sentry/fsimpl/sockfs/sockfs.go @@ -114,6 +114,6 @@ func NewDentry(creds *auth.Credentials, mnt *vfs.Mount) *vfs.Dentry { i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.Filesystem.NextIno(), filemode) d := &kernfs.Dentry{} - d.Init(i) + d.Init(&fs.Filesystem, i) return d.VFSDentry() } diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go index 1a6749e53..94366d429 100644 --- a/pkg/sentry/fsimpl/sys/kcov.go +++ b/pkg/sentry/fsimpl/sys/kcov.go @@ -27,12 +27,10 @@ import ( "gvisor.dev/gvisor/pkg/usermem" ) -func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials) *kernfs.Dentry { +func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials) kernfs.Inode { k := &kcovInode{} k.InodeAttrs.Init(creds, 0, 0, fs.NextIno(), linux.S_IFREG|0600) - d := &kernfs.Dentry{} - d.Init(k) - return d + return k } // kcovInode implements kernfs.Inode. diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go index 1568c581f..5a06f4e1c 100644 --- a/pkg/sentry/fsimpl/sys/sys.go +++ b/pkg/sentry/fsimpl/sys/sys.go @@ -64,15 +64,15 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt } fs.VFSFilesystem().Init(vfsObj, &fsType, fs) - root := fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{ + root := fs.newDir(creds, defaultSysDirMode, map[string]kernfs.Inode{ "block": fs.newDir(creds, defaultSysDirMode, nil), "bus": fs.newDir(creds, defaultSysDirMode, nil), - "class": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{ + "class": fs.newDir(creds, defaultSysDirMode, map[string]kernfs.Inode{ "power_supply": fs.newDir(creds, defaultSysDirMode, nil), }), "dev": fs.newDir(creds, defaultSysDirMode, nil), - "devices": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{ - "system": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{ + "devices": fs.newDir(creds, defaultSysDirMode, map[string]kernfs.Inode{ + "system": fs.newDir(creds, defaultSysDirMode, map[string]kernfs.Inode{ "cpu": cpuDir(ctx, fs, creds), }), }), @@ -82,13 +82,15 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt "module": fs.newDir(creds, defaultSysDirMode, nil), "power": fs.newDir(creds, defaultSysDirMode, nil), }) - return fs.VFSFilesystem(), root.VFSDentry(), nil + var rootD kernfs.Dentry + rootD.Init(&fs.Filesystem, root) + return fs.VFSFilesystem(), rootD.VFSDentry(), nil } -func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernfs.Dentry { +func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) kernfs.Inode { k := kernel.KernelFromContext(ctx) maxCPUCores := k.ApplicationCores() - children := map[string]*kernfs.Dentry{ + children := map[string]kernfs.Inode{ "online": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)), "possible": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)), "present": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)), @@ -99,14 +101,14 @@ func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernf return fs.newDir(creds, defaultSysDirMode, children) } -func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernfs.Dentry { +func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) kernfs.Inode { // If kcov is available, set up /sys/kernel/debug/kcov. Technically, debugfs // should be mounted at debug/, but for our purposes, it is sufficient to // keep it in sys. - var children map[string]*kernfs.Dentry + var children map[string]kernfs.Inode if coverage.KcovAvailable() { - children = map[string]*kernfs.Dentry{ - "debug": fs.newDir(creds, linux.FileMode(0700), map[string]*kernfs.Dentry{ + children = map[string]kernfs.Inode{ + "debug": fs.newDir(creds, linux.FileMode(0700), map[string]kernfs.Inode{ "kcov": fs.newKcovFile(ctx, creds), }), } @@ -125,27 +127,23 @@ func (fs *filesystem) Release(ctx context.Context) { // +stateify savable type dir struct { dirRefs + kernfs.InodeAlwaysValid kernfs.InodeAttrs - kernfs.InodeNoDynamicLookup kernfs.InodeNotSymlink kernfs.InodeDirectoryNoNewChildren + kernfs.InodeTemporary kernfs.OrderedChildren locks vfs.FileLocks - - dentry kernfs.Dentry } -func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry { +func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]kernfs.Inode) kernfs.Inode { d := &dir{} d.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755) d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{}) d.EnableLeakCheck() - d.dentry.Init(d) - - d.IncLinks(d.OrderedChildren.Populate(&d.dentry, contents)) - - return &d.dentry + d.IncLinks(d.OrderedChildren.Populate(contents)) + return d } // SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed. @@ -165,8 +163,8 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry } // DecRef implements kernfs.Inode.DecRef. -func (d *dir) DecRef(context.Context) { - d.dirRefs.DecRef(d.Destroy) +func (d *dir) DecRef(ctx context.Context) { + d.dirRefs.DecRef(func() { d.Destroy(ctx) }) } // StatFS implements kernfs.Inode.StatFS. @@ -190,12 +188,10 @@ func (c *cpuFile) Generate(ctx context.Context, buf *bytes.Buffer) error { return nil } -func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode linux.FileMode) *kernfs.Dentry { +func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode linux.FileMode) kernfs.Inode { c := &cpuFile{maxCores: maxCores} c.DynamicBytesFile.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), c, mode) - d := &kernfs.Dentry{} - d.Init(c) - return d + return c } // +stateify savable diff --git a/pkg/sentry/fsimpl/sys/sys_state_autogen.go b/pkg/sentry/fsimpl/sys/sys_state_autogen.go index 410e5a7d0..64c9c9d1f 100644 --- a/pkg/sentry/fsimpl/sys/sys_state_autogen.go +++ b/pkg/sentry/fsimpl/sys/sys_state_autogen.go @@ -151,13 +151,13 @@ func (d *dir) StateTypeName() string { func (d *dir) StateFields() []string { return []string{ "dirRefs", + "InodeAlwaysValid", "InodeAttrs", - "InodeNoDynamicLookup", "InodeNotSymlink", "InodeDirectoryNoNewChildren", + "InodeTemporary", "OrderedChildren", "locks", - "dentry", } } @@ -166,26 +166,26 @@ func (d *dir) beforeSave() {} func (d *dir) StateSave(stateSinkObject state.Sink) { d.beforeSave() stateSinkObject.Save(0, &d.dirRefs) - stateSinkObject.Save(1, &d.InodeAttrs) - stateSinkObject.Save(2, &d.InodeNoDynamicLookup) + stateSinkObject.Save(1, &d.InodeAlwaysValid) + stateSinkObject.Save(2, &d.InodeAttrs) stateSinkObject.Save(3, &d.InodeNotSymlink) stateSinkObject.Save(4, &d.InodeDirectoryNoNewChildren) - stateSinkObject.Save(5, &d.OrderedChildren) - stateSinkObject.Save(6, &d.locks) - stateSinkObject.Save(7, &d.dentry) + stateSinkObject.Save(5, &d.InodeTemporary) + stateSinkObject.Save(6, &d.OrderedChildren) + stateSinkObject.Save(7, &d.locks) } func (d *dir) afterLoad() {} func (d *dir) StateLoad(stateSourceObject state.Source) { stateSourceObject.Load(0, &d.dirRefs) - stateSourceObject.Load(1, &d.InodeAttrs) - stateSourceObject.Load(2, &d.InodeNoDynamicLookup) + stateSourceObject.Load(1, &d.InodeAlwaysValid) + stateSourceObject.Load(2, &d.InodeAttrs) stateSourceObject.Load(3, &d.InodeNotSymlink) stateSourceObject.Load(4, &d.InodeDirectoryNoNewChildren) - stateSourceObject.Load(5, &d.OrderedChildren) - stateSourceObject.Load(6, &d.locks) - stateSourceObject.Load(7, &d.dentry) + stateSourceObject.Load(5, &d.InodeTemporary) + stateSourceObject.Load(6, &d.OrderedChildren) + stateSourceObject.Load(7, &d.locks) } func (c *cpuFile) StateTypeName() string { |