summaryrefslogtreecommitdiffhomepage
path: root/pkg/sentry
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/sentry')
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts.go47
-rw-r--r--pkg/sentry/fsimpl/devpts/devpts_state_autogen.go53
-rw-r--r--pkg/sentry/fsimpl/devpts/master.go5
-rw-r--r--pkg/sentry/fsimpl/devpts/replica.go3
-rw-r--r--pkg/sentry/fsimpl/fuse/fuse_state_autogen.go53
-rw-r--r--pkg/sentry/fsimpl/fuse/fusefs.go63
-rw-r--r--pkg/sentry/fsimpl/host/host.go3
-rw-r--r--pkg/sentry/fsimpl/host/host_state_autogen.go47
-rw-r--r--pkg/sentry/fsimpl/kernfs/fd_impl_util.go4
-rw-r--r--pkg/sentry/fsimpl/kernfs/filesystem.go192
-rw-r--r--pkg/sentry/fsimpl/kernfs/inode_impl_util.go235
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs.go173
-rw-r--r--pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go207
-rw-r--r--pkg/sentry/fsimpl/kernfs/symlink.go7
-rw-r--r--pkg/sentry/fsimpl/kernfs/synthetic_directory.go40
-rw-r--r--pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go118
-rw-r--r--pkg/sentry/fsimpl/pipefs/pipefs.go2
-rw-r--r--pkg/sentry/fsimpl/proc/filesystem.go17
-rw-r--r--pkg/sentry/fsimpl/proc/proc_state_autogen.go112
-rw-r--r--pkg/sentry/fsimpl/proc/subtasks.go22
-rw-r--r--pkg/sentry/fsimpl/proc/task.go86
-rw-r--r--pkg/sentry/fsimpl/proc/task_fds.go67
-rw-r--r--pkg/sentry/fsimpl/proc/task_files.go34
-rw-r--r--pkg/sentry/fsimpl/proc/task_net.go40
-rw-r--r--pkg/sentry/fsimpl/proc/tasks.go56
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_files.go22
-rw-r--r--pkg/sentry/fsimpl/proc/tasks_sys.go118
-rw-r--r--pkg/sentry/fsimpl/sockfs/sockfs.go2
-rw-r--r--pkg/sentry/fsimpl/sys/kcov.go6
-rw-r--r--pkg/sentry/fsimpl/sys/sys.go48
-rw-r--r--pkg/sentry/fsimpl/sys/sys_state_autogen.go24
31 files changed, 1056 insertions, 850 deletions
diff --git a/pkg/sentry/fsimpl/devpts/devpts.go b/pkg/sentry/fsimpl/devpts/devpts.go
index 903135fae..8e34e26df 100644
--- a/pkg/sentry/fsimpl/devpts/devpts.go
+++ b/pkg/sentry/fsimpl/devpts/devpts.go
@@ -87,7 +87,9 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds
root.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 1, linux.ModeDirectory|0555)
root.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
root.EnableLeakCheck()
- root.dentry.Init(root)
+
+ var rootD kernfs.Dentry
+ rootD.Init(&fs.Filesystem, root)
// Construct the pts master inode and dentry. Linux always uses inode
// id 2 for ptmx. See fs/devpts/inode.c:mknod_ptmx.
@@ -95,15 +97,14 @@ func (fstype FilesystemType) newFilesystem(vfsObj *vfs.VirtualFilesystem, creds
root: root,
}
master.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, devMinor, 2, linux.ModeCharacterDevice|0666)
- master.dentry.Init(master)
// Add the master as a child of the root.
- links := root.OrderedChildren.Populate(&root.dentry, map[string]*kernfs.Dentry{
- "ptmx": &master.dentry,
+ links := root.OrderedChildren.Populate(map[string]kernfs.Inode{
+ "ptmx": master,
})
root.IncLinks(links)
- return fs, &root.dentry, nil
+ return fs, &rootD, nil
}
// Release implements vfs.FilesystemImpl.Release.
@@ -117,24 +118,19 @@ func (fs *filesystem) Release(ctx context.Context) {
// +stateify savable
type rootInode struct {
implStatFS
- kernfs.AlwaysValid
+ kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeNotSymlink
+ kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid.
kernfs.OrderedChildren
rootInodeRefs
locks vfs.FileLocks
- // Keep a reference to this inode's dentry.
- dentry kernfs.Dentry
-
// master is the master pty inode. Immutable.
master *masterInode
- // root is the root directory inode for this filesystem. Immutable.
- root *rootInode
-
// mu protects the fields below.
mu sync.Mutex `state:"nosave"`
@@ -173,21 +169,24 @@ func (i *rootInode) allocateTerminal(creds *auth.Credentials) (*Terminal, error)
// Linux always uses pty index + 3 as the inode id. See
// fs/devpts/inode.c:devpts_pty_new().
replica.InodeAttrs.Init(creds, i.InodeAttrs.DevMajor(), i.InodeAttrs.DevMinor(), uint64(idx+3), linux.ModeCharacterDevice|0600)
- replica.dentry.Init(replica)
i.replicas[idx] = replica
return t, nil
}
// masterClose is called when the master end of t is closed.
-func (i *rootInode) masterClose(t *Terminal) {
+func (i *rootInode) masterClose(ctx context.Context, t *Terminal) {
i.mu.Lock()
defer i.mu.Unlock()
// Sanity check that replica with idx exists.
- if _, ok := i.replicas[t.n]; !ok {
+ ri, ok := i.replicas[t.n]
+ if !ok {
panic(fmt.Sprintf("pty with index %d does not exist", t.n))
}
+
+ // Drop the ref on replica inode taken during rootInode.allocateTerminal.
+ ri.DecRef(ctx)
delete(i.replicas, t.n)
}
@@ -203,16 +202,22 @@ func (i *rootInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.D
}
// Lookup implements kernfs.Inode.Lookup.
-func (i *rootInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) {
+func (i *rootInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
+ // Check if a static entry was looked up.
+ if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil {
+ return d, nil
+ }
+
+ // Not a static entry.
idx, err := strconv.ParseUint(name, 10, 32)
if err != nil {
return nil, syserror.ENOENT
}
i.mu.Lock()
defer i.mu.Unlock()
- if si, ok := i.replicas[uint32(idx)]; ok {
- si.dentry.IncRef()
- return &si.dentry, nil
+ if ri, ok := i.replicas[uint32(idx)]; ok {
+ ri.IncRef() // This ref is passed to the dentry upon creation via Init.
+ return ri, nil
}
return nil, syserror.ENOENT
@@ -243,8 +248,8 @@ func (i *rootInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback,
}
// DecRef implements kernfs.Inode.DecRef.
-func (i *rootInode) DecRef(context.Context) {
- i.rootInodeRefs.DecRef(i.Destroy)
+func (i *rootInode) DecRef(ctx context.Context) {
+ i.rootInodeRefs.DecRef(func() { i.Destroy(ctx) })
}
// +stateify savable
diff --git a/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go b/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go
index d2c9ffa7d..12bb996cb 100644
--- a/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go
+++ b/pkg/sentry/fsimpl/devpts/devpts_state_autogen.go
@@ -58,16 +58,15 @@ func (i *rootInode) StateTypeName() string {
func (i *rootInode) StateFields() []string {
return []string{
"implStatFS",
- "AlwaysValid",
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
"InodeNotSymlink",
+ "InodeTemporary",
"OrderedChildren",
"rootInodeRefs",
"locks",
- "dentry",
"master",
- "root",
"replicas",
"nextIdx",
}
@@ -78,36 +77,34 @@ func (i *rootInode) beforeSave() {}
func (i *rootInode) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
stateSinkObject.Save(0, &i.implStatFS)
- stateSinkObject.Save(1, &i.AlwaysValid)
+ stateSinkObject.Save(1, &i.InodeAlwaysValid)
stateSinkObject.Save(2, &i.InodeAttrs)
stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren)
stateSinkObject.Save(4, &i.InodeNotSymlink)
- stateSinkObject.Save(5, &i.OrderedChildren)
- stateSinkObject.Save(6, &i.rootInodeRefs)
- stateSinkObject.Save(7, &i.locks)
- stateSinkObject.Save(8, &i.dentry)
+ stateSinkObject.Save(5, &i.InodeTemporary)
+ stateSinkObject.Save(6, &i.OrderedChildren)
+ stateSinkObject.Save(7, &i.rootInodeRefs)
+ stateSinkObject.Save(8, &i.locks)
stateSinkObject.Save(9, &i.master)
- stateSinkObject.Save(10, &i.root)
- stateSinkObject.Save(11, &i.replicas)
- stateSinkObject.Save(12, &i.nextIdx)
+ stateSinkObject.Save(10, &i.replicas)
+ stateSinkObject.Save(11, &i.nextIdx)
}
func (i *rootInode) afterLoad() {}
func (i *rootInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.implStatFS)
- stateSourceObject.Load(1, &i.AlwaysValid)
+ stateSourceObject.Load(1, &i.InodeAlwaysValid)
stateSourceObject.Load(2, &i.InodeAttrs)
stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren)
stateSourceObject.Load(4, &i.InodeNotSymlink)
- stateSourceObject.Load(5, &i.OrderedChildren)
- stateSourceObject.Load(6, &i.rootInodeRefs)
- stateSourceObject.Load(7, &i.locks)
- stateSourceObject.Load(8, &i.dentry)
+ stateSourceObject.Load(5, &i.InodeTemporary)
+ stateSourceObject.Load(6, &i.OrderedChildren)
+ stateSourceObject.Load(7, &i.rootInodeRefs)
+ stateSourceObject.Load(8, &i.locks)
stateSourceObject.Load(9, &i.master)
- stateSourceObject.Load(10, &i.root)
- stateSourceObject.Load(11, &i.replicas)
- stateSourceObject.Load(12, &i.nextIdx)
+ stateSourceObject.Load(10, &i.replicas)
+ stateSourceObject.Load(11, &i.nextIdx)
}
func (i *implStatFS) StateTypeName() string {
@@ -220,7 +217,6 @@ func (mi *masterInode) StateFields() []string {
"InodeNotDirectory",
"InodeNotSymlink",
"locks",
- "dentry",
"root",
}
}
@@ -235,8 +231,7 @@ func (mi *masterInode) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(3, &mi.InodeNotDirectory)
stateSinkObject.Save(4, &mi.InodeNotSymlink)
stateSinkObject.Save(5, &mi.locks)
- stateSinkObject.Save(6, &mi.dentry)
- stateSinkObject.Save(7, &mi.root)
+ stateSinkObject.Save(6, &mi.root)
}
func (mi *masterInode) afterLoad() {}
@@ -248,8 +243,7 @@ func (mi *masterInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(3, &mi.InodeNotDirectory)
stateSourceObject.Load(4, &mi.InodeNotSymlink)
stateSourceObject.Load(5, &mi.locks)
- stateSourceObject.Load(6, &mi.dentry)
- stateSourceObject.Load(7, &mi.root)
+ stateSourceObject.Load(6, &mi.root)
}
func (mfd *masterFileDescription) StateTypeName() string {
@@ -334,7 +328,6 @@ func (ri *replicaInode) StateFields() []string {
"InodeNotDirectory",
"InodeNotSymlink",
"locks",
- "dentry",
"root",
"t",
}
@@ -350,9 +343,8 @@ func (ri *replicaInode) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(3, &ri.InodeNotDirectory)
stateSinkObject.Save(4, &ri.InodeNotSymlink)
stateSinkObject.Save(5, &ri.locks)
- stateSinkObject.Save(6, &ri.dentry)
- stateSinkObject.Save(7, &ri.root)
- stateSinkObject.Save(8, &ri.t)
+ stateSinkObject.Save(6, &ri.root)
+ stateSinkObject.Save(7, &ri.t)
}
func (ri *replicaInode) afterLoad() {}
@@ -364,9 +356,8 @@ func (ri *replicaInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(3, &ri.InodeNotDirectory)
stateSourceObject.Load(4, &ri.InodeNotSymlink)
stateSourceObject.Load(5, &ri.locks)
- stateSourceObject.Load(6, &ri.dentry)
- stateSourceObject.Load(7, &ri.root)
- stateSourceObject.Load(8, &ri.t)
+ stateSourceObject.Load(6, &ri.root)
+ stateSourceObject.Load(7, &ri.t)
}
func (rfd *replicaFileDescription) StateTypeName() string {
diff --git a/pkg/sentry/fsimpl/devpts/master.go b/pkg/sentry/fsimpl/devpts/master.go
index 69c2fe951..fda30fb93 100644
--- a/pkg/sentry/fsimpl/devpts/master.go
+++ b/pkg/sentry/fsimpl/devpts/master.go
@@ -42,9 +42,6 @@ type masterInode struct {
locks vfs.FileLocks
- // Keep a reference to this inode's dentry.
- dentry kernfs.Dentry
-
// root is the devpts root inode.
root *rootInode
}
@@ -103,7 +100,7 @@ var _ vfs.FileDescriptionImpl = (*masterFileDescription)(nil)
// Release implements vfs.FileDescriptionImpl.Release.
func (mfd *masterFileDescription) Release(ctx context.Context) {
- mfd.inode.root.masterClose(mfd.t)
+ mfd.inode.root.masterClose(ctx, mfd.t)
}
// EventRegister implements waiter.Waitable.EventRegister.
diff --git a/pkg/sentry/fsimpl/devpts/replica.go b/pkg/sentry/fsimpl/devpts/replica.go
index 6515c5536..70c68cf0a 100644
--- a/pkg/sentry/fsimpl/devpts/replica.go
+++ b/pkg/sentry/fsimpl/devpts/replica.go
@@ -41,9 +41,6 @@ type replicaInode struct {
locks vfs.FileLocks
- // Keep a reference to this inode's dentry.
- dentry kernfs.Dentry
-
// root is the devpts root inode.
root *rootInode
diff --git a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
index 3c189317f..4c8bc4410 100644
--- a/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
+++ b/pkg/sentry/fsimpl/fuse/fuse_state_autogen.go
@@ -271,12 +271,11 @@ func (i *inode) StateTypeName() string {
func (i *inode) StateFields() []string {
return []string{
"inodeRefs",
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
- "InodeNoDynamicLookup",
"InodeNotSymlink",
"OrderedChildren",
- "dentry",
"fs",
"metadataMu",
"nodeID",
@@ -294,42 +293,40 @@ func (i *inode) beforeSave() {}
func (i *inode) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
stateSinkObject.Save(0, &i.inodeRefs)
- stateSinkObject.Save(1, &i.InodeAttrs)
- stateSinkObject.Save(2, &i.InodeDirectoryNoNewChildren)
- stateSinkObject.Save(3, &i.InodeNoDynamicLookup)
+ stateSinkObject.Save(1, &i.InodeAlwaysValid)
+ stateSinkObject.Save(2, &i.InodeAttrs)
+ stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren)
stateSinkObject.Save(4, &i.InodeNotSymlink)
stateSinkObject.Save(5, &i.OrderedChildren)
- stateSinkObject.Save(6, &i.dentry)
- stateSinkObject.Save(7, &i.fs)
- stateSinkObject.Save(8, &i.metadataMu)
- stateSinkObject.Save(9, &i.nodeID)
- stateSinkObject.Save(10, &i.locks)
- stateSinkObject.Save(11, &i.size)
- stateSinkObject.Save(12, &i.attributeVersion)
- stateSinkObject.Save(13, &i.attributeTime)
- stateSinkObject.Save(14, &i.version)
- stateSinkObject.Save(15, &i.link)
+ stateSinkObject.Save(6, &i.fs)
+ stateSinkObject.Save(7, &i.metadataMu)
+ stateSinkObject.Save(8, &i.nodeID)
+ stateSinkObject.Save(9, &i.locks)
+ stateSinkObject.Save(10, &i.size)
+ stateSinkObject.Save(11, &i.attributeVersion)
+ stateSinkObject.Save(12, &i.attributeTime)
+ stateSinkObject.Save(13, &i.version)
+ stateSinkObject.Save(14, &i.link)
}
func (i *inode) afterLoad() {}
func (i *inode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.inodeRefs)
- stateSourceObject.Load(1, &i.InodeAttrs)
- stateSourceObject.Load(2, &i.InodeDirectoryNoNewChildren)
- stateSourceObject.Load(3, &i.InodeNoDynamicLookup)
+ stateSourceObject.Load(1, &i.InodeAlwaysValid)
+ stateSourceObject.Load(2, &i.InodeAttrs)
+ stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren)
stateSourceObject.Load(4, &i.InodeNotSymlink)
stateSourceObject.Load(5, &i.OrderedChildren)
- stateSourceObject.Load(6, &i.dentry)
- stateSourceObject.Load(7, &i.fs)
- stateSourceObject.Load(8, &i.metadataMu)
- stateSourceObject.Load(9, &i.nodeID)
- stateSourceObject.Load(10, &i.locks)
- stateSourceObject.Load(11, &i.size)
- stateSourceObject.Load(12, &i.attributeVersion)
- stateSourceObject.Load(13, &i.attributeTime)
- stateSourceObject.Load(14, &i.version)
- stateSourceObject.Load(15, &i.link)
+ stateSourceObject.Load(6, &i.fs)
+ stateSourceObject.Load(7, &i.metadataMu)
+ stateSourceObject.Load(8, &i.nodeID)
+ stateSourceObject.Load(9, &i.locks)
+ stateSourceObject.Load(10, &i.size)
+ stateSourceObject.Load(11, &i.attributeVersion)
+ stateSourceObject.Load(12, &i.attributeTime)
+ stateSourceObject.Load(13, &i.version)
+ stateSourceObject.Load(14, &i.link)
}
func (r *inodeRefs) StateTypeName() string {
diff --git a/pkg/sentry/fsimpl/fuse/fusefs.go b/pkg/sentry/fsimpl/fuse/fusefs.go
index 65786e42a..1a388f54f 100644
--- a/pkg/sentry/fsimpl/fuse/fusefs.go
+++ b/pkg/sentry/fsimpl/fuse/fusefs.go
@@ -249,14 +249,12 @@ func (fs *filesystem) Release(ctx context.Context) {
// +stateify savable
type inode struct {
inodeRefs
+ kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeDirectoryNoNewChildren
- kernfs.InodeNoDynamicLookup
kernfs.InodeNotSymlink
kernfs.OrderedChildren
- dentry kernfs.Dentry
-
// the owning filesystem. fs is immutable.
fs *filesystem
@@ -284,26 +282,24 @@ type inode struct {
}
func (fs *filesystem) newRootInode(creds *auth.Credentials, mode linux.FileMode) *kernfs.Dentry {
- i := &inode{fs: fs}
+ i := &inode{fs: fs, nodeID: 1}
i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, 1, linux.ModeDirectory|0755)
i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
i.EnableLeakCheck()
- i.dentry.Init(i)
- i.nodeID = 1
- return &i.dentry
+ var d kernfs.Dentry
+ d.Init(&fs.Filesystem, i)
+ return &d
}
-func (fs *filesystem) newInode(nodeID uint64, attr linux.FUSEAttr) *kernfs.Dentry {
+func (fs *filesystem) newInode(nodeID uint64, attr linux.FUSEAttr) kernfs.Inode {
i := &inode{fs: fs, nodeID: nodeID}
creds := auth.Credentials{EffectiveKGID: auth.KGID(attr.UID), EffectiveKUID: auth.KUID(attr.UID)}
i.InodeAttrs.Init(&creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.FileMode(attr.Mode))
atomic.StoreUint64(&i.size, attr.Size)
i.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
i.EnableLeakCheck()
- i.dentry.Init(i)
-
- return &i.dentry
+ return i
}
// Open implements kernfs.Inode.Open.
@@ -410,23 +406,27 @@ func (i *inode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.Dentr
}
// Lookup implements kernfs.Inode.Lookup.
-func (i *inode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) {
+func (i *inode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
in := linux.FUSELookupIn{Name: name}
return i.newEntry(ctx, name, 0, linux.FUSE_LOOKUP, &in)
}
+// Keep implements kernfs.Inode.Keep.
+func (i *inode) Keep() bool {
+ // Return true so that kernfs keeps the new dentry pointing to this
+ // inode in the dentry tree. This is needed because inodes created via
+ // Lookup are not temporary. They might refer to existing files on server
+ // that can be Unlink'd/Rmdir'd.
+ return true
+}
+
// IterDirents implements kernfs.Inode.IterDirents.
func (*inode) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
return offset, nil
}
-// Valid implements kernfs.Inode.Valid.
-func (*inode) Valid(ctx context.Context) bool {
- return true
-}
-
// NewFile implements kernfs.Inode.NewFile.
-func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*kernfs.Dentry, error) {
+func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (kernfs.Inode, error) {
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.NewFile: couldn't get kernel task from context", i.nodeID)
@@ -444,7 +444,7 @@ func (i *inode) NewFile(ctx context.Context, name string, opts vfs.OpenOptions)
}
// NewNode implements kernfs.Inode.NewNode.
-func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*kernfs.Dentry, error) {
+func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (kernfs.Inode, error) {
in := linux.FUSEMknodIn{
MknodMeta: linux.FUSEMknodMeta{
Mode: uint32(opts.Mode),
@@ -457,7 +457,7 @@ func (i *inode) NewNode(ctx context.Context, name string, opts vfs.MknodOptions)
}
// NewSymlink implements kernfs.Inode.NewSymlink.
-func (i *inode) NewSymlink(ctx context.Context, name, target string) (*kernfs.Dentry, error) {
+func (i *inode) NewSymlink(ctx context.Context, name, target string) (kernfs.Inode, error) {
in := linux.FUSESymLinkIn{
Name: name,
Target: target,
@@ -466,7 +466,7 @@ func (i *inode) NewSymlink(ctx context.Context, name, target string) (*kernfs.De
}
// Unlink implements kernfs.Inode.Unlink.
-func (i *inode) Unlink(ctx context.Context, name string, child *kernfs.Dentry) error {
+func (i *inode) Unlink(ctx context.Context, name string, child kernfs.Inode) error {
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
@@ -482,14 +482,11 @@ func (i *inode) Unlink(ctx context.Context, name string, child *kernfs.Dentry) e
return err
}
// only return error, discard res.
- if err := res.Error(); err != nil {
- return err
- }
- return i.dentry.RemoveChildLocked(name, child)
+ return res.Error()
}
// NewDir implements kernfs.Inode.NewDir.
-func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*kernfs.Dentry, error) {
+func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (kernfs.Inode, error) {
in := linux.FUSEMkdirIn{
MkdirMeta: linux.FUSEMkdirMeta{
Mode: uint32(opts.Mode),
@@ -501,7 +498,7 @@ func (i *inode) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions)
}
// RmDir implements kernfs.Inode.RmDir.
-func (i *inode) RmDir(ctx context.Context, name string, child *kernfs.Dentry) error {
+func (i *inode) RmDir(ctx context.Context, name string, child kernfs.Inode) error {
fusefs := i.fs
task, creds := kernel.TaskFromContext(ctx), auth.CredentialsFromContext(ctx)
@@ -515,16 +512,12 @@ func (i *inode) RmDir(ctx context.Context, name string, child *kernfs.Dentry) er
if err != nil {
return err
}
- if err := res.Error(); err != nil {
- return err
- }
-
- return i.dentry.RemoveChildLocked(name, child)
+ return res.Error()
}
// newEntry calls FUSE server for entry creation and allocates corresponding entry according to response.
// Shared by FUSE_MKNOD, FUSE_MKDIR, FUSE_SYMLINK, FUSE_LINK and FUSE_LOOKUP.
-func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (*kernfs.Dentry, error) {
+func (i *inode) newEntry(ctx context.Context, name string, fileType linux.FileMode, opcode linux.FUSEOpcode, payload marshal.Marshallable) (kernfs.Inode, error) {
kernelTask := kernel.TaskFromContext(ctx)
if kernelTask == nil {
log.Warningf("fusefs.Inode.newEntry: couldn't get kernel task from context", i.nodeID)
@@ -734,8 +727,8 @@ func (i *inode) Stat(ctx context.Context, fs *vfs.Filesystem, opts vfs.StatOptio
}
// DecRef implements kernfs.Inode.DecRef.
-func (i *inode) DecRef(context.Context) {
- i.inodeRefs.DecRef(i.Destroy)
+func (i *inode) DecRef(ctx context.Context) {
+ i.inodeRefs.DecRef(func() { i.Destroy(ctx) })
}
// StatFS implements kernfs.Inode.StatFS.
diff --git a/pkg/sentry/fsimpl/host/host.go b/pkg/sentry/fsimpl/host/host.go
index ffe4ddb32..da1e3bf4b 100644
--- a/pkg/sentry/fsimpl/host/host.go
+++ b/pkg/sentry/fsimpl/host/host.go
@@ -118,7 +118,7 @@ func NewFD(ctx context.Context, mnt *vfs.Mount, hostFD int, opts *NewFDOptions)
if err != nil {
return nil, err
}
- d.Init(i)
+ d.Init(&fs.Filesystem, i)
// i.open will take a reference on d.
defer d.DecRef(ctx)
@@ -195,6 +195,7 @@ type inode struct {
kernfs.InodeNoStatFS
kernfs.InodeNotDirectory
kernfs.InodeNotSymlink
+ kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid.
locks vfs.FileLocks
diff --git a/pkg/sentry/fsimpl/host/host_state_autogen.go b/pkg/sentry/fsimpl/host/host_state_autogen.go
index c18cef145..5aaee37c3 100644
--- a/pkg/sentry/fsimpl/host/host_state_autogen.go
+++ b/pkg/sentry/fsimpl/host/host_state_autogen.go
@@ -83,6 +83,7 @@ func (i *inode) StateFields() []string {
"InodeNoStatFS",
"InodeNotDirectory",
"InodeNotSymlink",
+ "InodeTemporary",
"locks",
"inodeRefs",
"hostFD",
@@ -104,17 +105,18 @@ func (i *inode) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(0, &i.InodeNoStatFS)
stateSinkObject.Save(1, &i.InodeNotDirectory)
stateSinkObject.Save(2, &i.InodeNotSymlink)
- stateSinkObject.Save(3, &i.locks)
- stateSinkObject.Save(4, &i.inodeRefs)
- stateSinkObject.Save(5, &i.hostFD)
- stateSinkObject.Save(6, &i.ino)
- stateSinkObject.Save(7, &i.isTTY)
- stateSinkObject.Save(8, &i.seekable)
- stateSinkObject.Save(9, &i.wouldBlock)
- stateSinkObject.Save(10, &i.queue)
- stateSinkObject.Save(11, &i.canMap)
- stateSinkObject.Save(12, &i.mappings)
- stateSinkObject.Save(13, &i.pf)
+ stateSinkObject.Save(3, &i.InodeTemporary)
+ stateSinkObject.Save(4, &i.locks)
+ stateSinkObject.Save(5, &i.inodeRefs)
+ stateSinkObject.Save(6, &i.hostFD)
+ stateSinkObject.Save(7, &i.ino)
+ stateSinkObject.Save(8, &i.isTTY)
+ stateSinkObject.Save(9, &i.seekable)
+ stateSinkObject.Save(10, &i.wouldBlock)
+ stateSinkObject.Save(11, &i.queue)
+ stateSinkObject.Save(12, &i.canMap)
+ stateSinkObject.Save(13, &i.mappings)
+ stateSinkObject.Save(14, &i.pf)
}
func (i *inode) afterLoad() {}
@@ -123,17 +125,18 @@ func (i *inode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.InodeNoStatFS)
stateSourceObject.Load(1, &i.InodeNotDirectory)
stateSourceObject.Load(2, &i.InodeNotSymlink)
- stateSourceObject.Load(3, &i.locks)
- stateSourceObject.Load(4, &i.inodeRefs)
- stateSourceObject.Load(5, &i.hostFD)
- stateSourceObject.Load(6, &i.ino)
- stateSourceObject.Load(7, &i.isTTY)
- stateSourceObject.Load(8, &i.seekable)
- stateSourceObject.Load(9, &i.wouldBlock)
- stateSourceObject.Load(10, &i.queue)
- stateSourceObject.Load(11, &i.canMap)
- stateSourceObject.Load(12, &i.mappings)
- stateSourceObject.Load(13, &i.pf)
+ stateSourceObject.Load(3, &i.InodeTemporary)
+ stateSourceObject.Load(4, &i.locks)
+ stateSourceObject.Load(5, &i.inodeRefs)
+ stateSourceObject.Load(6, &i.hostFD)
+ stateSourceObject.Load(7, &i.ino)
+ stateSourceObject.Load(8, &i.isTTY)
+ stateSourceObject.Load(9, &i.seekable)
+ stateSourceObject.Load(10, &i.wouldBlock)
+ stateSourceObject.Load(11, &i.queue)
+ stateSourceObject.Load(12, &i.canMap)
+ stateSourceObject.Load(13, &i.mappings)
+ stateSourceObject.Load(14, &i.pf)
}
func (f *fileDescription) StateTypeName() string {
diff --git a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
index 0a4cd4057..abf1905d6 100644
--- a/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/fd_impl_util.go
@@ -201,12 +201,12 @@ func (fd *GenericDirectoryFD) IterDirents(ctx context.Context, cb vfs.IterDirent
// these.
childIdx := fd.off - 2
for it := fd.children.nthLocked(childIdx); it != nil; it = it.Next() {
- stat, err := it.Dentry.inode.Stat(ctx, fd.filesystem(), opts)
+ stat, err := it.inode.Stat(ctx, fd.filesystem(), opts)
if err != nil {
return err
}
dirent := vfs.Dirent{
- Name: it.Name,
+ Name: it.name,
Type: linux.FileMode(stat.Mode).DirentType(),
Ino: stat.Ino,
NextOff: fd.off + 1,
diff --git a/pkg/sentry/fsimpl/kernfs/filesystem.go b/pkg/sentry/fsimpl/kernfs/filesystem.go
index 5cc1c4281..6426a55f6 100644
--- a/pkg/sentry/fsimpl/kernfs/filesystem.go
+++ b/pkg/sentry/fsimpl/kernfs/filesystem.go
@@ -89,7 +89,7 @@ afterSymlink:
}
if targetVD.Ok() {
err := rp.HandleJump(targetVD)
- targetVD.DecRef(ctx)
+ fs.deferDecRefVD(ctx, targetVD)
if err != nil {
return nil, err
}
@@ -120,22 +120,33 @@ func (fs *Filesystem) revalidateChildLocked(ctx context.Context, vfsObj *vfs.Vir
// Cached dentry exists, revalidate.
if !child.inode.Valid(ctx) {
delete(parent.children, name)
- vfsObj.InvalidateDentry(ctx, &child.vfsd)
- fs.deferDecRef(child) // Reference from Lookup.
+ if child.inode.Keep() {
+ // Drop the ref owned by kernfs.
+ fs.deferDecRef(child)
+ }
+ vfsObj.InvalidateDentry(ctx, child.VFSDentry())
child = nil
}
}
if child == nil {
// Dentry isn't cached; it either doesn't exist or failed revalidation.
// Attempt to resolve it via Lookup.
- c, err := parent.inode.Lookup(ctx, name)
+ childInode, err := parent.inode.Lookup(ctx, name)
if err != nil {
return nil, err
}
- // Reference on c (provided by Lookup) will be dropped when the dentry
- // fails validation.
- parent.InsertChildLocked(name, c)
- child = c
+ var newChild Dentry
+ newChild.Init(fs, childInode) // childInode's ref is transferred to newChild.
+ parent.insertChildLocked(name, &newChild)
+ child = &newChild
+
+ // Drop the ref on newChild. This will cause the dentry to get pruned
+ // from the dentry tree by the end of current filesystem operation
+ // (before returning to the VFS layer) if another ref is not picked on
+ // this dentry.
+ if !childInode.Keep() {
+ fs.deferDecRef(&newChild)
+ }
}
return child, nil
}
@@ -191,7 +202,7 @@ func (fs *Filesystem) walkParentDirLocked(ctx context.Context, rp *vfs.Resolving
}
// checkCreateLocked checks that a file named rp.Component() may be created in
-// directory parentVFSD, then returns rp.Component().
+// directory parent, then returns rp.Component().
//
// Preconditions:
// * Filesystem.mu must be locked for at least reading.
@@ -298,9 +309,9 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return syserror.EEXIST
}
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -324,11 +335,13 @@ func (fs *Filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
return syserror.EPERM
}
- child, err := parent.inode.NewLink(ctx, pc, d.inode)
+ childI, err := parent.inode.NewLink(ctx, pc, d.inode)
if err != nil {
return err
}
- parent.InsertChildLocked(pc, child)
+ var child Dentry
+ child.Init(fs, childI)
+ parent.insertChildLocked(pc, &child)
return nil
}
@@ -338,9 +351,9 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return syserror.EEXIST
}
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -355,14 +368,16 @@ func (fs *Filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
defer rp.Mount().EndWrite()
- child, err := parent.inode.NewDir(ctx, pc, opts)
+ childI, err := parent.inode.NewDir(ctx, pc, opts)
if err != nil {
if !opts.ForSyntheticMountpoint || err == syserror.EEXIST {
return err
}
- child = newSyntheticDirectory(rp.Credentials(), opts.Mode)
+ childI = newSyntheticDirectory(rp.Credentials(), opts.Mode)
}
- parent.InsertChildLocked(pc, child)
+ var child Dentry
+ child.Init(fs, childI)
+ parent.insertChildLocked(pc, &child)
return nil
}
@@ -372,9 +387,9 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return syserror.EEXIST
}
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -389,11 +404,13 @@ func (fs *Filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
return err
}
defer rp.Mount().EndWrite()
- newD, err := parent.inode.NewNode(ctx, pc, opts)
+ newI, err := parent.inode.NewNode(ctx, pc, opts)
if err != nil {
return err
}
- parent.InsertChildLocked(pc, newD)
+ var newD Dentry
+ newD.Init(fs, newI)
+ parent.insertChildLocked(pc, &newD)
return nil
}
@@ -409,22 +426,23 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
// Do not create new file.
if opts.Flags&linux.O_CREAT == 0 {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
d, err := fs.walkExistingLocked(ctx, rp)
if err != nil {
fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
return nil, err
}
if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
return nil, err
}
- d.inode.IncRef()
- defer d.inode.DecRef(ctx)
+ // Open may block so we need to unlock fs.mu. IncRef d to prevent
+ // its destruction while fs.mu is unlocked.
+ d.IncRef()
fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
- return d.inode.Open(ctx, rp, d, opts)
+ fd, err := d.inode.Open(ctx, rp, d, opts)
+ d.DecRef(ctx)
+ return fd, err
}
// May create new file.
@@ -438,6 +456,10 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
unlocked = true
}
}
+ // Process all to-be-decref'd dentries at the end at once.
+ // Since we defer unlock() AFTER this, fs.mu is guaranteed to be unlocked
+ // when this is executed.
+ defer fs.processDeferredDecRefs(ctx)
defer unlock()
if rp.Done() {
if rp.MustBeDir() {
@@ -449,14 +471,16 @@ func (fs *Filesystem) OpenAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
if err := d.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
- d.inode.IncRef()
- defer d.inode.DecRef(ctx)
+ // Open may block so we need to unlock fs.mu. IncRef d to prevent
+ // its destruction while fs.mu is unlocked.
+ d.IncRef()
unlock()
- return d.inode.Open(ctx, rp, d, opts)
+ fd, err := d.inode.Open(ctx, rp, d, opts)
+ d.DecRef(ctx)
+ return fd, err
}
afterTrailingSymlink:
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return nil, err
}
@@ -487,18 +511,23 @@ afterTrailingSymlink:
}
defer rp.Mount().EndWrite()
// Create and open the child.
- child, err := parent.inode.NewFile(ctx, pc, opts)
+ childI, err := parent.inode.NewFile(ctx, pc, opts)
if err != nil {
return nil, err
}
+ var child Dentry
+ child.Init(fs, childI)
// FIXME(gvisor.dev/issue/1193): Race between checking existence with
- // fs.stepExistingLocked and parent.InsertChild. If possible, we should hold
+ // fs.stepExistingLocked and parent.insertChild. If possible, we should hold
// dirMu from one to the other.
- parent.InsertChild(pc, child)
- child.inode.IncRef()
- defer child.inode.DecRef(ctx)
+ parent.insertChild(pc, &child)
+ // Open may block so we need to unlock fs.mu. IncRef child to prevent
+ // its destruction while fs.mu is unlocked.
+ child.IncRef()
unlock()
- return child.inode.Open(ctx, rp, child, opts)
+ fd, err := child.inode.Open(ctx, rp, &child, opts)
+ child.DecRef(ctx)
+ return fd, err
}
if err != nil {
return nil, err
@@ -514,7 +543,7 @@ afterTrailingSymlink:
}
if targetVD.Ok() {
err := rp.HandleJump(targetVD)
- targetVD.DecRef(ctx)
+ fs.deferDecRefVD(ctx, targetVD)
if err != nil {
return nil, err
}
@@ -530,18 +559,21 @@ afterTrailingSymlink:
if err := child.inode.CheckPermissions(ctx, rp.Credentials(), ats); err != nil {
return nil, err
}
- child.inode.IncRef()
- defer child.inode.DecRef(ctx)
+ // Open may block so we need to unlock fs.mu. IncRef child to prevent
+ // its destruction while fs.mu is unlocked.
+ child.IncRef()
unlock()
- return child.inode.Open(ctx, rp, child, opts)
+ fd, err := child.inode.Open(ctx, rp, child, opts)
+ child.DecRef(ctx)
+ return fd, err
}
// ReadlinkAt implements vfs.FilesystemImpl.ReadlinkAt.
func (fs *Filesystem) ReadlinkAt(ctx context.Context, rp *vfs.ResolvingPath) (string, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return "", err
}
@@ -560,7 +592,7 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
noReplace := opts.Flags&linux.RENAME_NOREPLACE != 0
fs.mu.Lock()
- defer fs.processDeferredDecRefsLocked(ctx)
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
// Resolve the destination directory first to verify that it's on this
@@ -632,24 +664,27 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
if err := virtfs.PrepareRenameDentry(mntns, srcVFSD, dstVFSD); err != nil {
return err
}
- replaced, err := srcDir.inode.Rename(ctx, src.name, pc, src, dstDir)
+ err = srcDir.inode.Rename(ctx, src.name, pc, src.inode, dstDir.inode)
if err != nil {
virtfs.AbortRenameDentry(srcVFSD, dstVFSD)
return err
}
delete(srcDir.children, src.name)
if srcDir != dstDir {
- fs.deferDecRef(srcDir)
- dstDir.IncRef()
+ fs.deferDecRef(srcDir) // child (src) drops ref on old parent.
+ dstDir.IncRef() // child (src) takes a ref on the new parent.
}
src.parent = dstDir
src.name = pc
if dstDir.children == nil {
dstDir.children = make(map[string]*Dentry)
}
+ replaced := dstDir.children[pc]
dstDir.children[pc] = src
var replaceVFSD *vfs.Dentry
if replaced != nil {
+ // deferDecRef so that fs.mu and dstDir.mu are unlocked by then.
+ fs.deferDecRef(replaced)
replaceVFSD = replaced.VFSDentry()
}
virtfs.CommitRenameReplaceDentry(ctx, srcVFSD, replaceVFSD)
@@ -659,10 +694,10 @@ func (fs *Filesystem) RenameAt(ctx context.Context, rp *vfs.ResolvingPath, oldPa
// RmdirAt implements vfs.FilesystemImpl.RmdirAt.
func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error {
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -691,10 +726,13 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
return err
}
- if err := parentDentry.inode.RmDir(ctx, d.name, d); err != nil {
+ if err := parentDentry.inode.RmDir(ctx, d.name, d.inode); err != nil {
virtfs.AbortDeleteDentry(vfsd)
return err
}
+ delete(parentDentry.children, d.name)
+ // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then.
+ fs.deferDecRef(d)
virtfs.CommitDeleteDentry(ctx, vfsd)
return nil
}
@@ -702,9 +740,9 @@ func (fs *Filesystem) RmdirAt(ctx context.Context, rp *vfs.ResolvingPath) error
// SetStatAt implements vfs.FilesystemImpl.SetStatAt.
func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetStatOptions) error {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return err
}
@@ -717,9 +755,9 @@ func (fs *Filesystem) SetStatAt(ctx context.Context, rp *vfs.ResolvingPath, opts
// StatAt implements vfs.FilesystemImpl.StatAt.
func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.StatOptions) (linux.Statx, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return linux.Statx{}, err
}
@@ -729,9 +767,9 @@ func (fs *Filesystem) StatAt(ctx context.Context, rp *vfs.ResolvingPath, opts vf
// StatFSAt implements vfs.FilesystemImpl.StatFSAt.
func (fs *Filesystem) StatFSAt(ctx context.Context, rp *vfs.ResolvingPath) (linux.Statfs, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return linux.Statfs{}, err
}
@@ -744,9 +782,9 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
return syserror.EEXIST
}
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
parent, err := fs.walkParentDirLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -761,21 +799,23 @@ func (fs *Filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
return err
}
defer rp.Mount().EndWrite()
- child, err := parent.inode.NewSymlink(ctx, pc, target)
+ childI, err := parent.inode.NewSymlink(ctx, pc, target)
if err != nil {
return err
}
- parent.InsertChildLocked(pc, child)
+ var child Dentry
+ child.Init(fs, childI)
+ parent.insertChildLocked(pc, &child)
return nil
}
// UnlinkAt implements vfs.FilesystemImpl.UnlinkAt.
func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error {
fs.mu.Lock()
+ defer fs.processDeferredDecRefs(ctx)
defer fs.mu.Unlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.processDeferredDecRefsLocked(ctx)
if err != nil {
return err
}
@@ -799,10 +839,13 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
if err := virtfs.PrepareDeleteDentry(mntns, vfsd); err != nil {
return err
}
- if err := parentDentry.inode.Unlink(ctx, d.name, d); err != nil {
+ if err := parentDentry.inode.Unlink(ctx, d.name, d.inode); err != nil {
virtfs.AbortDeleteDentry(vfsd)
return err
}
+ delete(parentDentry.children, d.name)
+ // Defer decref so that fs.mu and parentDentry.dirMu are unlocked by then.
+ fs.deferDecRef(d)
virtfs.CommitDeleteDentry(ctx, vfsd)
return nil
}
@@ -810,9 +853,9 @@ func (fs *Filesystem) UnlinkAt(ctx context.Context, rp *vfs.ResolvingPath) error
// BoundEndpointAt implements vfs.FilesystemImpl.BoundEndpointAt.
func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.BoundEndpointOptions) (transport.BoundEndpoint, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
d, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return nil, err
}
@@ -825,9 +868,9 @@ func (fs *Filesystem) BoundEndpointAt(ctx context.Context, rp *vfs.ResolvingPath
// ListXattrAt implements vfs.FilesystemImpl.ListXattrAt.
func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, size uint64) ([]string, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
_, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return nil, err
}
@@ -838,9 +881,9 @@ func (fs *Filesystem) ListXattrAt(ctx context.Context, rp *vfs.ResolvingPath, si
// GetXattrAt implements vfs.FilesystemImpl.GetXattrAt.
func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.GetXattrOptions) (string, error) {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
_, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return "", err
}
@@ -851,9 +894,9 @@ func (fs *Filesystem) GetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
// SetXattrAt implements vfs.FilesystemImpl.SetXattrAt.
func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opts vfs.SetXattrOptions) error {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
_, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return err
}
@@ -864,9 +907,9 @@ func (fs *Filesystem) SetXattrAt(ctx context.Context, rp *vfs.ResolvingPath, opt
// RemoveXattrAt implements vfs.FilesystemImpl.RemoveXattrAt.
func (fs *Filesystem) RemoveXattrAt(ctx context.Context, rp *vfs.ResolvingPath, name string) error {
fs.mu.RLock()
+ defer fs.processDeferredDecRefs(ctx)
+ defer fs.mu.RUnlock()
_, err := fs.walkExistingLocked(ctx, rp)
- fs.mu.RUnlock()
- fs.processDeferredDecRefs(ctx)
if err != nil {
return err
}
@@ -880,3 +923,16 @@ func (fs *Filesystem) PrependPath(ctx context.Context, vfsroot, vd vfs.VirtualDe
defer fs.mu.RUnlock()
return genericPrependPath(vfsroot, vd.Mount(), vd.Dentry().Impl().(*Dentry), b)
}
+
+func (fs *Filesystem) deferDecRefVD(ctx context.Context, vd vfs.VirtualDentry) {
+ if d, ok := vd.Dentry().Impl().(*Dentry); ok && d.fs == fs {
+ // The following is equivalent to vd.DecRef(ctx). This is needed
+ // because if d belongs to this filesystem, we can not DecRef it right
+ // away as we may be holding fs.mu. d.DecRef may acquire fs.mu. So we
+ // defer the DecRef to when locks are dropped.
+ vd.Mount().DecRef(ctx)
+ fs.deferDecRef(d)
+ } else {
+ vd.DecRef(ctx)
+ }
+}
diff --git a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
index 49210e748..122b10591 100644
--- a/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
+++ b/pkg/sentry/fsimpl/kernfs/inode_impl_util.go
@@ -34,6 +34,7 @@ import (
//
// +stateify savable
type InodeNoopRefCount struct {
+ InodeTemporary
}
// IncRef implements Inode.IncRef.
@@ -57,27 +58,27 @@ func (InodeNoopRefCount) TryIncRef() bool {
type InodeDirectoryNoNewChildren struct{}
// NewFile implements Inode.NewFile.
-func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) {
return nil, syserror.EPERM
}
// NewDir implements Inode.NewDir.
-func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) {
return nil, syserror.EPERM
}
// NewLink implements Inode.NewLink.
-func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewLink(context.Context, string, Inode) (Inode, error) {
return nil, syserror.EPERM
}
// NewSymlink implements Inode.NewSymlink.
-func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewSymlink(context.Context, string, string) (Inode, error) {
return nil, syserror.EPERM
}
// NewNode implements Inode.NewNode.
-func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) {
+func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) {
return nil, syserror.EPERM
}
@@ -88,6 +89,7 @@ func (InodeDirectoryNoNewChildren) NewNode(context.Context, string, vfs.MknodOpt
//
// +stateify savable
type InodeNotDirectory struct {
+ InodeAlwaysValid
}
// HasChildren implements Inode.HasChildren.
@@ -96,47 +98,47 @@ func (InodeNotDirectory) HasChildren() bool {
}
// NewFile implements Inode.NewFile.
-func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (*Dentry, error) {
+func (InodeNotDirectory) NewFile(context.Context, string, vfs.OpenOptions) (Inode, error) {
panic("NewFile called on non-directory inode")
}
// NewDir implements Inode.NewDir.
-func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (*Dentry, error) {
+func (InodeNotDirectory) NewDir(context.Context, string, vfs.MkdirOptions) (Inode, error) {
panic("NewDir called on non-directory inode")
}
// NewLink implements Inode.NewLinkink.
-func (InodeNotDirectory) NewLink(context.Context, string, Inode) (*Dentry, error) {
+func (InodeNotDirectory) NewLink(context.Context, string, Inode) (Inode, error) {
panic("NewLink called on non-directory inode")
}
// NewSymlink implements Inode.NewSymlink.
-func (InodeNotDirectory) NewSymlink(context.Context, string, string) (*Dentry, error) {
+func (InodeNotDirectory) NewSymlink(context.Context, string, string) (Inode, error) {
panic("NewSymlink called on non-directory inode")
}
// NewNode implements Inode.NewNode.
-func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (*Dentry, error) {
+func (InodeNotDirectory) NewNode(context.Context, string, vfs.MknodOptions) (Inode, error) {
panic("NewNode called on non-directory inode")
}
// Unlink implements Inode.Unlink.
-func (InodeNotDirectory) Unlink(context.Context, string, *Dentry) error {
+func (InodeNotDirectory) Unlink(context.Context, string, Inode) error {
panic("Unlink called on non-directory inode")
}
// RmDir implements Inode.RmDir.
-func (InodeNotDirectory) RmDir(context.Context, string, *Dentry) error {
+func (InodeNotDirectory) RmDir(context.Context, string, Inode) error {
panic("RmDir called on non-directory inode")
}
// Rename implements Inode.Rename.
-func (InodeNotDirectory) Rename(context.Context, string, string, *Dentry, *Dentry) (*Dentry, error) {
+func (InodeNotDirectory) Rename(context.Context, string, string, Inode, Inode) error {
panic("Rename called on non-directory inode")
}
// Lookup implements Inode.Lookup.
-func (InodeNotDirectory) Lookup(ctx context.Context, name string) (*Dentry, error) {
+func (InodeNotDirectory) Lookup(ctx context.Context, name string) (Inode, error) {
panic("Lookup called on non-directory inode")
}
@@ -145,35 +147,6 @@ func (InodeNotDirectory) IterDirents(ctx context.Context, callback vfs.IterDiren
panic("IterDirents called on non-directory inode")
}
-// Valid implements Inode.Valid.
-func (InodeNotDirectory) Valid(context.Context) bool {
- return true
-}
-
-// InodeNoDynamicLookup partially implements the Inode interface, specifically
-// the inodeDynamicLookup sub interface. Directory inodes that do not support
-// dymanic entries (i.e. entries that are not "hashed" into the
-// vfs.Dentry.children) can embed this to provide no-op implementations for
-// functions related to dynamic entries.
-//
-// +stateify savable
-type InodeNoDynamicLookup struct{}
-
-// Lookup implements Inode.Lookup.
-func (InodeNoDynamicLookup) Lookup(ctx context.Context, name string) (*Dentry, error) {
- return nil, syserror.ENOENT
-}
-
-// IterDirents implements Inode.IterDirents.
-func (InodeNoDynamicLookup) IterDirents(ctx context.Context, callback vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
- return offset, nil
-}
-
-// Valid implements Inode.Valid.
-func (InodeNoDynamicLookup) Valid(ctx context.Context) bool {
- return true
-}
-
// InodeNotSymlink partially implements the Inode interface, specifically the
// inodeSymlink sub interface. All inodes that are not symlinks may embed this
// to return the appropriate errors from symlink-related functions.
@@ -273,7 +246,7 @@ func (a *InodeAttrs) SetStat(ctx context.Context, fs *vfs.Filesystem, creds *aut
// SetInodeStat sets the corresponding attributes from opts to InodeAttrs.
// This function can be used by other kernfs-based filesystem implementation to
-// sets the unexported attributes into kernfs.InodeAttrs.
+// sets the unexported attributes into InodeAttrs.
func (a *InodeAttrs) SetInodeStat(ctx context.Context, fs *vfs.Filesystem, creds *auth.Credentials, opts vfs.SetStatOptions) error {
if opts.Stat.Mask == 0 {
return nil
@@ -344,8 +317,9 @@ func (a *InodeAttrs) DecLinks() {
// +stateify savable
type slot struct {
- Name string
- Dentry *Dentry
+ name string
+ inode Inode
+ static bool
slotEntry
}
@@ -361,10 +335,18 @@ type OrderedChildrenOptions struct {
}
// OrderedChildren partially implements the Inode interface. OrderedChildren can
-// be embedded in directory inodes to keep track of the children in the
+// be embedded in directory inodes to keep track of children in the
// directory, and can then be used to implement a generic directory FD -- see
-// GenericDirectoryFD. OrderedChildren is not compatible with dynamic
-// directories.
+// GenericDirectoryFD.
+//
+// OrderedChildren can represent a node in an Inode tree. The children inodes
+// might be directories themselves using OrderedChildren; hence extending the
+// tree. The parent inode (OrderedChildren user) holds a ref on all its static
+// children. This lets the static inodes outlive their associated dentry.
+// While the dentry might have to be regenerated via a Lookup() call, we can
+// keep reusing the same static inode. These static children inodes are finally
+// DecRef'd when this directory inode is being destroyed. This makes
+// OrderedChildren suitable for static directory entries as well.
//
// Must be initialize with Init before first use.
//
@@ -388,33 +370,63 @@ func (o *OrderedChildren) Init(opts OrderedChildrenOptions) {
// Destroy clears the children stored in o. It should be called by structs
// embedding OrderedChildren upon destruction, i.e. when their reference count
// reaches zero.
-func (o *OrderedChildren) Destroy() {
+func (o *OrderedChildren) Destroy(ctx context.Context) {
o.mu.Lock()
defer o.mu.Unlock()
+ // Drop the ref that o owns on the static inodes it holds.
+ for _, s := range o.set {
+ if s.static {
+ s.inode.DecRef(ctx)
+ }
+ }
o.order.Reset()
o.set = nil
}
-// Populate inserts children into this OrderedChildren, and d's dentry
-// cache. Populate returns the number of directories inserted, which the caller
+// Populate inserts static children into this OrderedChildren.
+// Populate returns the number of directories inserted, which the caller
// may use to update the link count for the parent directory.
//
-// Precondition: d must represent a directory inode. children must not contain
-// any conflicting entries already in o.
-func (o *OrderedChildren) Populate(d *Dentry, children map[string]*Dentry) uint32 {
+// Precondition:
+// * d must represent a directory inode.
+// * children must not contain any conflicting entries already in o.
+// * Caller must hold a reference on all inodes passed.
+//
+// Postcondition: Caller's references on inodes are transferred to o.
+func (o *OrderedChildren) Populate(children map[string]Inode) uint32 {
var links uint32
for name, child := range children {
- if child.isDir() {
+ if child.Mode().IsDir() {
links++
}
- if err := o.Insert(name, child); err != nil {
- panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v) into %+v", name, child, d))
+ if err := o.insert(name, child, true); err != nil {
+ panic(fmt.Sprintf("Collision when attempting to insert child %q (%+v)", name, child))
}
- d.InsertChild(name, child)
}
return links
}
+// Lookup implements Inode.Lookup.
+func (o *OrderedChildren) Lookup(ctx context.Context, name string) (Inode, error) {
+ o.mu.RLock()
+ defer o.mu.RUnlock()
+
+ s, ok := o.set[name]
+ if !ok {
+ return nil, syserror.ENOENT
+ }
+
+ s.inode.IncRef() // This ref is passed to the dentry upon creation via Init.
+ return s.inode, nil
+}
+
+// IterDirents implements Inode.IterDirents.
+func (o *OrderedChildren) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) {
+ // All entries from OrderedChildren have already been handled in
+ // GenericDirectoryFD.IterDirents.
+ return offset, nil
+}
+
// HasChildren implements Inode.HasChildren.
func (o *OrderedChildren) HasChildren() bool {
o.mu.RLock()
@@ -422,17 +434,27 @@ func (o *OrderedChildren) HasChildren() bool {
return len(o.set) > 0
}
-// Insert inserts child into o. This ignores the writability of o, as this is
-// not part of the vfs.FilesystemImpl interface, and is a lower-level operation.
-func (o *OrderedChildren) Insert(name string, child *Dentry) error {
+// Insert inserts a dynamic child into o. This ignores the writability of o, as
+// this is not part of the vfs.FilesystemImpl interface, and is a lower-level operation.
+func (o *OrderedChildren) Insert(name string, child Inode) error {
+ return o.insert(name, child, false)
+}
+
+// insert inserts child into o.
+//
+// Precondition: Caller must be holding a ref on child if static is true.
+//
+// Postcondition: Caller's ref on child is transferred to o if static is true.
+func (o *OrderedChildren) insert(name string, child Inode, static bool) error {
o.mu.Lock()
defer o.mu.Unlock()
if _, ok := o.set[name]; ok {
return syserror.EEXIST
}
s := &slot{
- Name: name,
- Dentry: child,
+ name: name,
+ inode: child,
+ static: static,
}
o.order.PushBack(s)
o.set[name] = s
@@ -442,44 +464,49 @@ func (o *OrderedChildren) Insert(name string, child *Dentry) error {
// Precondition: caller must hold o.mu for writing.
func (o *OrderedChildren) removeLocked(name string) {
if s, ok := o.set[name]; ok {
+ if s.static {
+ panic(fmt.Sprintf("removeLocked called on a static inode: %v", s.inode))
+ }
delete(o.set, name)
o.order.Remove(s)
}
}
// Precondition: caller must hold o.mu for writing.
-func (o *OrderedChildren) replaceChildLocked(name string, new *Dentry) *Dentry {
+func (o *OrderedChildren) replaceChildLocked(ctx context.Context, name string, newI Inode) {
if s, ok := o.set[name]; ok {
+ if s.static {
+ panic(fmt.Sprintf("replacing a static inode: %v", s.inode))
+ }
+
// Existing slot with given name, simply replace the dentry.
- var old *Dentry
- old, s.Dentry = s.Dentry, new
- return old
+ s.inode = newI
}
// No existing slot with given name, create and hash new slot.
s := &slot{
- Name: name,
- Dentry: new,
+ name: name,
+ inode: newI,
+ static: false,
}
o.order.PushBack(s)
o.set[name] = s
- return nil
}
// Precondition: caller must hold o.mu for reading or writing.
-func (o *OrderedChildren) checkExistingLocked(name string, child *Dentry) error {
+func (o *OrderedChildren) checkExistingLocked(name string, child Inode) error {
s, ok := o.set[name]
if !ok {
return syserror.ENOENT
}
- if s.Dentry != child {
- panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! OrderedChild: %+v, vfs: %+v", s.Dentry, child))
+ if s.inode != child {
+ panic(fmt.Sprintf("Inode doesn't match what kernfs thinks! OrderedChild: %+v, kernfs: %+v", s.inode, child))
}
return nil
}
// Unlink implements Inode.Unlink.
-func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry) error {
+func (o *OrderedChildren) Unlink(ctx context.Context, name string, child Inode) error {
if !o.writable {
return syserror.EPERM
}
@@ -494,8 +521,8 @@ func (o *OrderedChildren) Unlink(ctx context.Context, name string, child *Dentry
return nil
}
-// Rmdir implements Inode.Rmdir.
-func (o *OrderedChildren) RmDir(ctx context.Context, name string, child *Dentry) error {
+// RmDir implements Inode.RmDir.
+func (o *OrderedChildren) RmDir(ctx context.Context, name string, child Inode) error {
// We're not responsible for checking that child is a directory, that it's
// empty, or updating any link counts; so this is the same as unlink.
return o.Unlink(ctx, name, child)
@@ -517,13 +544,13 @@ func (renameAcrossDifferentImplementationsError) Error() string {
// that will support Rename.
//
// Postcondition: reference on any replaced dentry transferred to caller.
-func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (*Dentry, error) {
- dst, ok := dstDir.inode.(interface{}).(*OrderedChildren)
+func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error {
+ dst, ok := dstDir.(interface{}).(*OrderedChildren)
if !ok {
- return nil, renameAcrossDifferentImplementationsError{}
+ return renameAcrossDifferentImplementationsError{}
}
if !o.writable || !dst.writable {
- return nil, syserror.EPERM
+ return syserror.EPERM
}
// Note: There's a potential deadlock below if concurrent calls to Rename
// refer to the same src and dst directories in reverse. We avoid any
@@ -536,12 +563,12 @@ func (o *OrderedChildren) Rename(ctx context.Context, oldname, newname string, c
defer dst.mu.Unlock()
}
if err := o.checkExistingLocked(oldname, child); err != nil {
- return nil, err
+ return err
}
// TODO(gvisor.dev/issue/3027): Check sticky bit before removing.
- replaced := dst.replaceChildLocked(newname, child)
- return replaced, nil
+ dst.replaceChildLocked(ctx, newname, child)
+ return nil
}
// nthLocked returns an iterator to the nth child tracked by this object. The
@@ -576,11 +603,12 @@ func (InodeSymlink) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry,
//
// +stateify savable
type StaticDirectory struct {
+ InodeAlwaysValid
InodeAttrs
InodeDirectoryNoNewChildren
- InodeNoDynamicLookup
InodeNoStatFS
InodeNotSymlink
+ InodeTemporary
OrderedChildren
StaticDirectoryRefs
@@ -591,19 +619,16 @@ type StaticDirectory struct {
var _ Inode = (*StaticDirectory)(nil)
// NewStaticDir creates a new static directory and returns its dentry.
-func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*Dentry, fdOpts GenericDirectoryFDOptions) *Dentry {
+func NewStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]Inode, fdOpts GenericDirectoryFDOptions) Inode {
inode := &StaticDirectory{}
inode.Init(creds, devMajor, devMinor, ino, perm, fdOpts)
inode.EnableLeakCheck()
- dentry := &Dentry{}
- dentry.Init(inode)
-
inode.OrderedChildren.Init(OrderedChildrenOptions{})
- links := inode.OrderedChildren.Populate(dentry, children)
+ links := inode.OrderedChildren.Populate(children)
inode.IncLinks(links)
- return dentry
+ return inode
}
// Init initializes StaticDirectory.
@@ -615,7 +640,7 @@ func (s *StaticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint3
s.InodeAttrs.Init(creds, devMajor, devMinor, ino, linux.ModeDirectory|perm)
}
-// Open implements kernfs.Inode.Open.
+// Open implements Inode.Open.
func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *Dentry, opts vfs.OpenOptions) (*vfs.FileDescription, error) {
fd, err := NewGenericDirectoryFD(rp.Mount(), d, &s.OrderedChildren, &s.locks, &opts, s.fdOpts)
if err != nil {
@@ -624,26 +649,36 @@ func (s *StaticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath, d *De
return fd.VFSFileDescription(), nil
}
-// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
+// SetStat implements Inode.SetStat not allowing inode attributes to be changed.
func (*StaticDirectory) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, vfs.SetStatOptions) error {
return syserror.EPERM
}
-// DecRef implements kernfs.Inode.DecRef.
-func (s *StaticDirectory) DecRef(context.Context) {
- s.StaticDirectoryRefs.DecRef(s.Destroy)
+// DecRef implements Inode.DecRef.
+func (s *StaticDirectory) DecRef(ctx context.Context) {
+ s.StaticDirectoryRefs.DecRef(func() { s.Destroy(ctx) })
}
-// AlwaysValid partially implements kernfs.inodeDynamicLookup.
+// InodeAlwaysValid partially implements Inode.
//
// +stateify savable
-type AlwaysValid struct{}
+type InodeAlwaysValid struct{}
-// Valid implements kernfs.inodeDynamicLookup.Valid.
-func (*AlwaysValid) Valid(context.Context) bool {
+// Valid implements Inode.Valid.
+func (*InodeAlwaysValid) Valid(context.Context) bool {
return true
}
+// InodeTemporary partially implements Inode.
+//
+// +stateify savable
+type InodeTemporary struct{}
+
+// Keep implements Inode.Keep.
+func (*InodeTemporary) Keep() bool {
+ return false
+}
+
// InodeNoStatFS partially implements the Inode interface, where the client
// filesystem doesn't support statfs(2).
//
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs.go b/pkg/sentry/fsimpl/kernfs/kernfs.go
index 6d3d79333..606081e68 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs.go
@@ -29,12 +29,16 @@
//
// Reference Model:
//
-// Kernfs dentries represents named pointers to inodes. Dentries and inodes have
+// Kernfs dentries represents named pointers to inodes. Kernfs is solely
+// reponsible for maintaining and modifying its dentry tree; inode
+// implementations can not access the tree. Dentries and inodes have
// independent lifetimes and reference counts. A child dentry unconditionally
// holds a reference on its parent directory's dentry. A dentry also holds a
-// reference on the inode it points to. Multiple dentries can point to the same
-// inode (for example, in the case of hardlinks). File descriptors hold a
-// reference to the dentry they're opened on.
+// reference on the inode it points to (although that might not be the only
+// reference on the inode). Due to this inodes can outlive the dentries that
+// point to them. Multiple dentries can point to the same inode (for example,
+// in the case of hardlinks). File descriptors hold a reference to the dentry
+// they're opened on.
//
// Dentries are guaranteed to exist while holding Filesystem.mu for
// reading. Dropping dentries require holding Filesystem.mu for writing. To
@@ -47,8 +51,8 @@
// kernfs.Dentry.dirMu
// vfs.VirtualFilesystem.mountMu
// vfs.Dentry.mu
-// kernfs.Filesystem.droppedDentriesMu
// (inode implementation locks, if any)
+// kernfs.Filesystem.droppedDentriesMu
package kernfs
import (
@@ -60,7 +64,6 @@ import (
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/sentry/vfs"
"gvisor.dev/gvisor/pkg/sync"
- "gvisor.dev/gvisor/pkg/syserror"
)
// Filesystem mostly implements vfs.FilesystemImpl for a generic in-memory
@@ -95,7 +98,7 @@ type Filesystem struct {
// example:
//
// fs.mu.RLock()
- // fs.mu.processDeferredDecRefs()
+ // defer fs.processDeferredDecRefs()
// defer fs.mu.RUnlock()
// ...
// fs.deferDecRef(dentry)
@@ -108,8 +111,7 @@ type Filesystem struct {
// deferDecRef defers dropping a dentry ref until the next call to
// processDeferredDecRefs{,Locked}. See comment on Filesystem.mu.
-//
-// Precondition: d must not already be pending destruction.
+// This may be called while Filesystem.mu or Dentry.dirMu is locked.
func (fs *Filesystem) deferDecRef(d *Dentry) {
fs.droppedDentriesMu.Lock()
fs.droppedDentries = append(fs.droppedDentries, d)
@@ -118,17 +120,14 @@ func (fs *Filesystem) deferDecRef(d *Dentry) {
// processDeferredDecRefs calls vfs.Dentry.DecRef on all dentries in the
// droppedDentries list. See comment on Filesystem.mu.
+//
+// Precondition: Filesystem.mu or Dentry.dirMu must NOT be locked.
func (fs *Filesystem) processDeferredDecRefs(ctx context.Context) {
- fs.mu.Lock()
- fs.processDeferredDecRefsLocked(ctx)
- fs.mu.Unlock()
-}
-
-// Precondition: fs.mu must be held for writing.
-func (fs *Filesystem) processDeferredDecRefsLocked(ctx context.Context) {
fs.droppedDentriesMu.Lock()
for _, d := range fs.droppedDentries {
- d.DecRef(ctx)
+ // Defer the DecRef call so that we are not holding droppedDentriesMu
+ // when DecRef is called.
+ defer d.DecRef(ctx)
}
fs.droppedDentries = fs.droppedDentries[:0] // Keep slice memory for reuse.
fs.droppedDentriesMu.Unlock()
@@ -157,17 +156,19 @@ const (
//
// A kernfs dentry is similar to a dentry in a traditional filesystem: it's a
// named reference to an inode. A dentry generally lives as long as it's part of
-// a mounted filesystem tree. Kernfs doesn't cache dentries once all references
-// to them are removed. Dentries hold a single reference to the inode they point
+// a mounted filesystem tree. Kernfs drops dentries once all references to them
+// are dropped. Dentries hold a single reference to the inode they point
// to, and child dentries hold a reference on their parent.
//
// Must be initialized by Init prior to first use.
//
// +stateify savable
type Dentry struct {
+ vfsd vfs.Dentry
DentryRefs
- vfsd vfs.Dentry
+ // fs is the owning filesystem. fs is immutable.
+ fs *Filesystem
// flags caches useful information about the dentry from the inode. See the
// dflags* consts above. Must be accessed by atomic ops.
@@ -192,8 +193,9 @@ type Dentry struct {
// Precondition: Caller must hold a reference on inode.
//
// Postcondition: Caller's reference on inode is transferred to the dentry.
-func (d *Dentry) Init(inode Inode) {
+func (d *Dentry) Init(fs *Filesystem, inode Inode) {
d.vfsd.Init(d)
+ d.fs = fs
d.inode = inode
ftype := inode.Mode().FileType()
if ftype == linux.ModeDirectory {
@@ -222,14 +224,28 @@ func (d *Dentry) isSymlink() bool {
// DecRef implements vfs.DentryImpl.DecRef.
func (d *Dentry) DecRef(ctx context.Context) {
- // Before the destructor is called, Dentry must be removed from VFS' dentry cache.
+ decRefParent := false
+ d.fs.mu.Lock()
d.DentryRefs.DecRef(func() {
d.inode.DecRef(ctx) // IncRef from Init.
d.inode = nil
if d.parent != nil {
- d.parent.DecRef(ctx) // IncRef from Dentry.InsertChild.
+ // We will DecRef d.parent once all locks are dropped.
+ decRefParent = true
+ d.parent.dirMu.Lock()
+ // Remove d from parent.children. It might already have been
+ // removed due to invalidation.
+ if _, ok := d.parent.children[d.name]; ok {
+ delete(d.parent.children, d.name)
+ d.fs.VFSFilesystem().VirtualFilesystem().InvalidateDentry(ctx, d.VFSDentry())
+ }
+ d.parent.dirMu.Unlock()
}
})
+ d.fs.mu.Unlock()
+ if decRefParent {
+ d.parent.DecRef(ctx) // IncRef from Dentry.insertChild.
+ }
}
// InotifyWithParent implements vfs.DentryImpl.InotifyWithParent.
@@ -247,26 +263,26 @@ func (d *Dentry) Watches() *vfs.Watches {
// OnZeroWatches implements vfs.Dentry.OnZeroWatches.
func (d *Dentry) OnZeroWatches(context.Context) {}
-// InsertChild inserts child into the vfs dentry cache with the given name under
+// insertChild inserts child into the vfs dentry cache with the given name under
// this dentry. This does not update the directory inode, so calling this on its
// own isn't sufficient to insert a child into a directory.
//
// Precondition: d must represent a directory inode.
-func (d *Dentry) InsertChild(name string, child *Dentry) {
+func (d *Dentry) insertChild(name string, child *Dentry) {
d.dirMu.Lock()
- d.InsertChildLocked(name, child)
+ d.insertChildLocked(name, child)
d.dirMu.Unlock()
}
-// InsertChildLocked is equivalent to InsertChild, with additional
+// insertChildLocked is equivalent to insertChild, with additional
// preconditions.
//
// Preconditions:
// * d must represent a directory inode.
// * d.dirMu must be locked.
-func (d *Dentry) InsertChildLocked(name string, child *Dentry) {
+func (d *Dentry) insertChildLocked(name string, child *Dentry) {
if !d.isDir() {
- panic(fmt.Sprintf("InsertChildLocked called on non-directory Dentry: %+v.", d))
+ panic(fmt.Sprintf("insertChildLocked called on non-directory Dentry: %+v.", d))
}
d.IncRef() // DecRef in child's Dentry.destroy.
child.parent = d
@@ -277,36 +293,6 @@ func (d *Dentry) InsertChildLocked(name string, child *Dentry) {
d.children[name] = child
}
-// RemoveChild removes child from the vfs dentry cache. This does not update the
-// directory inode or modify the inode to be unlinked. So calling this on its own
-// isn't sufficient to remove a child from a directory.
-//
-// Precondition: d must represent a directory inode.
-func (d *Dentry) RemoveChild(name string, child *Dentry) error {
- d.dirMu.Lock()
- defer d.dirMu.Unlock()
- return d.RemoveChildLocked(name, child)
-}
-
-// RemoveChildLocked is equivalent to RemoveChild, with additional
-// preconditions.
-//
-// Precondition: d.dirMu must be locked.
-func (d *Dentry) RemoveChildLocked(name string, child *Dentry) error {
- if !d.isDir() {
- panic(fmt.Sprintf("RemoveChild called on non-directory Dentry: %+v.", d))
- }
- c, ok := d.children[name]
- if !ok {
- return syserror.ENOENT
- }
- if c != child {
- panic(fmt.Sprintf("Dentry hashed into inode doesn't match what vfs thinks! Child: %+v, vfs: %+v", c, child))
- }
- delete(d.children, name)
- return nil
-}
-
// Inode returns the dentry's inode.
func (d *Dentry) Inode() Inode {
return d.inode
@@ -348,11 +334,6 @@ type Inode interface {
// a blanket implementation for all non-directory inodes.
inodeDirectory
- // Method for inodes that represent dynamic directories and their
- // children. InodeNoDynamicLookup provides a blanket implementation for all
- // non-dynamic-directory inodes.
- inodeDynamicLookup
-
// Open creates a file description for the filesystem object represented by
// this inode. The returned file description should hold a reference on the
// dentry for its lifetime.
@@ -365,6 +346,14 @@ type Inode interface {
// corresponds to vfs.FilesystemImpl.StatFSAt. If the client filesystem
// doesn't support statfs(2), this should return ENOSYS.
StatFS(ctx context.Context, fs *vfs.Filesystem) (linux.Statfs, error)
+
+ // Keep indicates whether the dentry created after Inode.Lookup should be
+ // kept in the kernfs dentry tree.
+ Keep() bool
+
+ // Valid should return true if this inode is still valid, or needs to
+ // be resolved again by a call to Lookup.
+ Valid(ctx context.Context) bool
}
type inodeRefs interface {
@@ -397,8 +386,8 @@ type inodeMetadata interface {
// Precondition: All methods in this interface may only be called on directory
// inodes.
type inodeDirectory interface {
- // The New{File,Dir,Node,Symlink} methods below should return a new inode
- // hashed into this inode.
+ // The New{File,Dir,Node,Link,Symlink} methods below should return a new inode
+ // that will be hashed into the dentry tree.
//
// These inode constructors are inode-level operations rather than
// filesystem-level operations to allow client filesystems to mix different
@@ -409,60 +398,54 @@ type inodeDirectory interface {
HasChildren() bool
// NewFile creates a new regular file inode.
- NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error)
+ NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error)
// NewDir creates a new directory inode.
- NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error)
+ NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error)
// NewLink creates a new hardlink to a specified inode in this
// directory. Implementations should create a new kernfs Dentry pointing to
// target, and update target's link count.
- NewLink(ctx context.Context, name string, target Inode) (*Dentry, error)
+ NewLink(ctx context.Context, name string, target Inode) (Inode, error)
// NewSymlink creates a new symbolic link inode.
- NewSymlink(ctx context.Context, name, target string) (*Dentry, error)
+ NewSymlink(ctx context.Context, name, target string) (Inode, error)
// NewNode creates a new filesystem node for a mknod syscall.
- NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error)
+ NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error)
// Unlink removes a child dentry from this directory inode.
- Unlink(ctx context.Context, name string, child *Dentry) error
+ Unlink(ctx context.Context, name string, child Inode) error
// RmDir removes an empty child directory from this directory
// inode. Implementations must update the parent directory's link count,
// if required. Implementations are not responsible for checking that child
// is a directory, checking for an empty directory.
- RmDir(ctx context.Context, name string, child *Dentry) error
+ RmDir(ctx context.Context, name string, child Inode) error
// Rename is called on the source directory containing an inode being
// renamed. child should point to the resolved child in the source
- // directory. If Rename replaces a dentry in the destination directory, it
- // should return the replaced dentry or nil otherwise.
+ // directory.
//
// Precondition: Caller must serialize concurrent calls to Rename.
- Rename(ctx context.Context, oldname, newname string, child, dstDir *Dentry) (replaced *Dentry, err error)
-}
+ Rename(ctx context.Context, oldname, newname string, child, dstDir Inode) error
-type inodeDynamicLookup interface {
- // Lookup should return an appropriate dentry if name should resolve to a
- // child of this dynamic directory inode. This gives the directory an
- // opportunity on every lookup to resolve additional entries that aren't
- // hashed into the directory. This is only called when the inode is a
- // directory. If the inode is not a directory, or if the directory only
- // contains a static set of children, the implementer can unconditionally
- // return an appropriate error (ENOTDIR and ENOENT respectively).
+ // Lookup should return an appropriate inode if name should resolve to a
+ // child of this directory inode. This gives the directory an opportunity
+ // on every lookup to resolve additional entries. This is only called when
+ // the inode is a directory.
//
- // The child returned by Lookup will be hashed into the VFS dentry tree. Its
- // lifetime can be controlled by the filesystem implementation with an
- // appropriate implementation of Valid.
+ // The child returned by Lookup will be hashed into the VFS dentry tree,
+ // atleast for the duration of the current FS operation.
//
- // Lookup returns the child with an extra reference and the caller owns this
- // reference.
- Lookup(ctx context.Context, name string) (*Dentry, error)
-
- // Valid should return true if this inode is still valid, or needs to
- // be resolved again by a call to Lookup.
- Valid(ctx context.Context) bool
+ // Lookup must return the child with an extra reference whose ownership is
+ // transferred to the dentry that is created to point to that inode. If
+ // Inode.Keep returns false, that new dentry will be dropped at the end of
+ // the current filesystem operation (before returning back to the VFS
+ // layer) if no other ref is picked on that dentry. If Inode.Keep returns
+ // true, then the dentry will be cached into the dentry tree until it is
+ // Unlink'd or RmDir'd.
+ Lookup(ctx context.Context, name string) (Inode, error)
// IterDirents is used to iterate over dynamically created entries. It invokes
// cb on each entry in the directory represented by the Inode.
diff --git a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
index 7d9420725..f87782ee1 100644
--- a/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
+++ b/pkg/sentry/fsimpl/kernfs/kernfs_state_autogen.go
@@ -182,18 +182,22 @@ func (i *InodeNoopRefCount) StateTypeName() string {
}
func (i *InodeNoopRefCount) StateFields() []string {
- return []string{}
+ return []string{
+ "InodeTemporary",
+ }
}
func (i *InodeNoopRefCount) beforeSave() {}
func (i *InodeNoopRefCount) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
+ stateSinkObject.Save(0, &i.InodeTemporary)
}
func (i *InodeNoopRefCount) afterLoad() {}
func (i *InodeNoopRefCount) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &i.InodeTemporary)
}
func (i *InodeDirectoryNoNewChildren) StateTypeName() string {
@@ -220,37 +224,22 @@ func (i *InodeNotDirectory) StateTypeName() string {
}
func (i *InodeNotDirectory) StateFields() []string {
- return []string{}
+ return []string{
+ "InodeAlwaysValid",
+ }
}
func (i *InodeNotDirectory) beforeSave() {}
func (i *InodeNotDirectory) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
+ stateSinkObject.Save(0, &i.InodeAlwaysValid)
}
func (i *InodeNotDirectory) afterLoad() {}
func (i *InodeNotDirectory) StateLoad(stateSourceObject state.Source) {
-}
-
-func (i *InodeNoDynamicLookup) StateTypeName() string {
- return "pkg/sentry/fsimpl/kernfs.InodeNoDynamicLookup"
-}
-
-func (i *InodeNoDynamicLookup) StateFields() []string {
- return []string{}
-}
-
-func (i *InodeNoDynamicLookup) beforeSave() {}
-
-func (i *InodeNoDynamicLookup) StateSave(stateSinkObject state.Sink) {
- i.beforeSave()
-}
-
-func (i *InodeNoDynamicLookup) afterLoad() {}
-
-func (i *InodeNoDynamicLookup) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &i.InodeAlwaysValid)
}
func (i *InodeNotSymlink) StateTypeName() string {
@@ -319,8 +308,9 @@ func (s *slot) StateTypeName() string {
func (s *slot) StateFields() []string {
return []string{
- "Name",
- "Dentry",
+ "name",
+ "inode",
+ "static",
"slotEntry",
}
}
@@ -329,17 +319,19 @@ func (s *slot) beforeSave() {}
func (s *slot) StateSave(stateSinkObject state.Sink) {
s.beforeSave()
- stateSinkObject.Save(0, &s.Name)
- stateSinkObject.Save(1, &s.Dentry)
- stateSinkObject.Save(2, &s.slotEntry)
+ stateSinkObject.Save(0, &s.name)
+ stateSinkObject.Save(1, &s.inode)
+ stateSinkObject.Save(2, &s.static)
+ stateSinkObject.Save(3, &s.slotEntry)
}
func (s *slot) afterLoad() {}
func (s *slot) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &s.Name)
- stateSourceObject.Load(1, &s.Dentry)
- stateSourceObject.Load(2, &s.slotEntry)
+ stateSourceObject.Load(0, &s.name)
+ stateSourceObject.Load(1, &s.inode)
+ stateSourceObject.Load(2, &s.static)
+ stateSourceObject.Load(3, &s.slotEntry)
}
func (o *OrderedChildrenOptions) StateTypeName() string {
@@ -442,11 +434,12 @@ func (s *StaticDirectory) StateTypeName() string {
func (s *StaticDirectory) StateFields() []string {
return []string{
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
- "InodeNoDynamicLookup",
"InodeNoStatFS",
"InodeNotSymlink",
+ "InodeTemporary",
"OrderedChildren",
"StaticDirectoryRefs",
"locks",
@@ -458,48 +451,69 @@ func (s *StaticDirectory) beforeSave() {}
func (s *StaticDirectory) StateSave(stateSinkObject state.Sink) {
s.beforeSave()
- stateSinkObject.Save(0, &s.InodeAttrs)
- stateSinkObject.Save(1, &s.InodeDirectoryNoNewChildren)
- stateSinkObject.Save(2, &s.InodeNoDynamicLookup)
+ stateSinkObject.Save(0, &s.InodeAlwaysValid)
+ stateSinkObject.Save(1, &s.InodeAttrs)
+ stateSinkObject.Save(2, &s.InodeDirectoryNoNewChildren)
stateSinkObject.Save(3, &s.InodeNoStatFS)
stateSinkObject.Save(4, &s.InodeNotSymlink)
- stateSinkObject.Save(5, &s.OrderedChildren)
- stateSinkObject.Save(6, &s.StaticDirectoryRefs)
- stateSinkObject.Save(7, &s.locks)
- stateSinkObject.Save(8, &s.fdOpts)
+ stateSinkObject.Save(5, &s.InodeTemporary)
+ stateSinkObject.Save(6, &s.OrderedChildren)
+ stateSinkObject.Save(7, &s.StaticDirectoryRefs)
+ stateSinkObject.Save(8, &s.locks)
+ stateSinkObject.Save(9, &s.fdOpts)
}
func (s *StaticDirectory) afterLoad() {}
func (s *StaticDirectory) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &s.InodeAttrs)
- stateSourceObject.Load(1, &s.InodeDirectoryNoNewChildren)
- stateSourceObject.Load(2, &s.InodeNoDynamicLookup)
+ stateSourceObject.Load(0, &s.InodeAlwaysValid)
+ stateSourceObject.Load(1, &s.InodeAttrs)
+ stateSourceObject.Load(2, &s.InodeDirectoryNoNewChildren)
stateSourceObject.Load(3, &s.InodeNoStatFS)
stateSourceObject.Load(4, &s.InodeNotSymlink)
- stateSourceObject.Load(5, &s.OrderedChildren)
- stateSourceObject.Load(6, &s.StaticDirectoryRefs)
- stateSourceObject.Load(7, &s.locks)
- stateSourceObject.Load(8, &s.fdOpts)
+ stateSourceObject.Load(5, &s.InodeTemporary)
+ stateSourceObject.Load(6, &s.OrderedChildren)
+ stateSourceObject.Load(7, &s.StaticDirectoryRefs)
+ stateSourceObject.Load(8, &s.locks)
+ stateSourceObject.Load(9, &s.fdOpts)
}
-func (a *AlwaysValid) StateTypeName() string {
- return "pkg/sentry/fsimpl/kernfs.AlwaysValid"
+func (i *InodeAlwaysValid) StateTypeName() string {
+ return "pkg/sentry/fsimpl/kernfs.InodeAlwaysValid"
}
-func (a *AlwaysValid) StateFields() []string {
+func (i *InodeAlwaysValid) StateFields() []string {
return []string{}
}
-func (a *AlwaysValid) beforeSave() {}
+func (i *InodeAlwaysValid) beforeSave() {}
-func (a *AlwaysValid) StateSave(stateSinkObject state.Sink) {
- a.beforeSave()
+func (i *InodeAlwaysValid) StateSave(stateSinkObject state.Sink) {
+ i.beforeSave()
}
-func (a *AlwaysValid) afterLoad() {}
+func (i *InodeAlwaysValid) afterLoad() {}
-func (a *AlwaysValid) StateLoad(stateSourceObject state.Source) {
+func (i *InodeAlwaysValid) StateLoad(stateSourceObject state.Source) {
+}
+
+func (i *InodeTemporary) StateTypeName() string {
+ return "pkg/sentry/fsimpl/kernfs.InodeTemporary"
+}
+
+func (i *InodeTemporary) StateFields() []string {
+ return []string{}
+}
+
+func (i *InodeTemporary) beforeSave() {}
+
+func (i *InodeTemporary) StateSave(stateSinkObject state.Sink) {
+ i.beforeSave()
+}
+
+func (i *InodeTemporary) afterLoad() {}
+
+func (i *InodeTemporary) StateLoad(stateSourceObject state.Source) {
}
func (i *InodeNoStatFS) StateTypeName() string {
@@ -556,8 +570,9 @@ func (d *Dentry) StateTypeName() string {
func (d *Dentry) StateFields() []string {
return []string{
- "DentryRefs",
"vfsd",
+ "DentryRefs",
+ "fs",
"flags",
"parent",
"name",
@@ -570,25 +585,27 @@ func (d *Dentry) beforeSave() {}
func (d *Dentry) StateSave(stateSinkObject state.Sink) {
d.beforeSave()
- stateSinkObject.Save(0, &d.DentryRefs)
- stateSinkObject.Save(1, &d.vfsd)
- stateSinkObject.Save(2, &d.flags)
- stateSinkObject.Save(3, &d.parent)
- stateSinkObject.Save(4, &d.name)
- stateSinkObject.Save(5, &d.children)
- stateSinkObject.Save(6, &d.inode)
+ stateSinkObject.Save(0, &d.vfsd)
+ stateSinkObject.Save(1, &d.DentryRefs)
+ stateSinkObject.Save(2, &d.fs)
+ stateSinkObject.Save(3, &d.flags)
+ stateSinkObject.Save(4, &d.parent)
+ stateSinkObject.Save(5, &d.name)
+ stateSinkObject.Save(6, &d.children)
+ stateSinkObject.Save(7, &d.inode)
}
func (d *Dentry) afterLoad() {}
func (d *Dentry) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &d.DentryRefs)
- stateSourceObject.Load(1, &d.vfsd)
- stateSourceObject.Load(2, &d.flags)
- stateSourceObject.Load(3, &d.parent)
- stateSourceObject.Load(4, &d.name)
- stateSourceObject.Load(5, &d.children)
- stateSourceObject.Load(6, &d.inode)
+ stateSourceObject.Load(0, &d.vfsd)
+ stateSourceObject.Load(1, &d.DentryRefs)
+ stateSourceObject.Load(2, &d.fs)
+ stateSourceObject.Load(3, &d.flags)
+ stateSourceObject.Load(4, &d.parent)
+ stateSourceObject.Load(5, &d.name)
+ stateSourceObject.Load(6, &d.children)
+ stateSourceObject.Load(7, &d.inode)
}
func (l *slotList) StateTypeName() string {
@@ -707,12 +724,12 @@ func (dir *syntheticDirectory) StateTypeName() string {
func (dir *syntheticDirectory) StateFields() []string {
return []string{
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeNoStatFS",
- "InodeNoopRefCount",
- "InodeNoDynamicLookup",
"InodeNotSymlink",
"OrderedChildren",
+ "syntheticDirectoryRefs",
"locks",
}
}
@@ -721,27 +738,50 @@ func (dir *syntheticDirectory) beforeSave() {}
func (dir *syntheticDirectory) StateSave(stateSinkObject state.Sink) {
dir.beforeSave()
- stateSinkObject.Save(0, &dir.InodeAttrs)
- stateSinkObject.Save(1, &dir.InodeNoStatFS)
- stateSinkObject.Save(2, &dir.InodeNoopRefCount)
- stateSinkObject.Save(3, &dir.InodeNoDynamicLookup)
- stateSinkObject.Save(4, &dir.InodeNotSymlink)
- stateSinkObject.Save(5, &dir.OrderedChildren)
+ stateSinkObject.Save(0, &dir.InodeAlwaysValid)
+ stateSinkObject.Save(1, &dir.InodeAttrs)
+ stateSinkObject.Save(2, &dir.InodeNoStatFS)
+ stateSinkObject.Save(3, &dir.InodeNotSymlink)
+ stateSinkObject.Save(4, &dir.OrderedChildren)
+ stateSinkObject.Save(5, &dir.syntheticDirectoryRefs)
stateSinkObject.Save(6, &dir.locks)
}
func (dir *syntheticDirectory) afterLoad() {}
func (dir *syntheticDirectory) StateLoad(stateSourceObject state.Source) {
- stateSourceObject.Load(0, &dir.InodeAttrs)
- stateSourceObject.Load(1, &dir.InodeNoStatFS)
- stateSourceObject.Load(2, &dir.InodeNoopRefCount)
- stateSourceObject.Load(3, &dir.InodeNoDynamicLookup)
- stateSourceObject.Load(4, &dir.InodeNotSymlink)
- stateSourceObject.Load(5, &dir.OrderedChildren)
+ stateSourceObject.Load(0, &dir.InodeAlwaysValid)
+ stateSourceObject.Load(1, &dir.InodeAttrs)
+ stateSourceObject.Load(2, &dir.InodeNoStatFS)
+ stateSourceObject.Load(3, &dir.InodeNotSymlink)
+ stateSourceObject.Load(4, &dir.OrderedChildren)
+ stateSourceObject.Load(5, &dir.syntheticDirectoryRefs)
stateSourceObject.Load(6, &dir.locks)
}
+func (r *syntheticDirectoryRefs) StateTypeName() string {
+ return "pkg/sentry/fsimpl/kernfs.syntheticDirectoryRefs"
+}
+
+func (r *syntheticDirectoryRefs) StateFields() []string {
+ return []string{
+ "refCount",
+ }
+}
+
+func (r *syntheticDirectoryRefs) beforeSave() {}
+
+func (r *syntheticDirectoryRefs) StateSave(stateSinkObject state.Sink) {
+ r.beforeSave()
+ stateSinkObject.Save(0, &r.refCount)
+}
+
+func (r *syntheticDirectoryRefs) afterLoad() {}
+
+func (r *syntheticDirectoryRefs) StateLoad(stateSourceObject state.Source) {
+ stateSourceObject.Load(0, &r.refCount)
+}
+
func init() {
state.Register((*DentryRefs)(nil))
state.Register((*DynamicBytesFile)(nil))
@@ -752,7 +792,6 @@ func init() {
state.Register((*InodeNoopRefCount)(nil))
state.Register((*InodeDirectoryNoNewChildren)(nil))
state.Register((*InodeNotDirectory)(nil))
- state.Register((*InodeNoDynamicLookup)(nil))
state.Register((*InodeNotSymlink)(nil))
state.Register((*InodeAttrs)(nil))
state.Register((*slot)(nil))
@@ -761,7 +800,8 @@ func init() {
state.Register((*renameAcrossDifferentImplementationsError)(nil))
state.Register((*InodeSymlink)(nil))
state.Register((*StaticDirectory)(nil))
- state.Register((*AlwaysValid)(nil))
+ state.Register((*InodeAlwaysValid)(nil))
+ state.Register((*InodeTemporary)(nil))
state.Register((*InodeNoStatFS)(nil))
state.Register((*Filesystem)(nil))
state.Register((*Dentry)(nil))
@@ -770,4 +810,5 @@ func init() {
state.Register((*StaticDirectoryRefs)(nil))
state.Register((*StaticSymlink)(nil))
state.Register((*syntheticDirectory)(nil))
+ state.Register((*syntheticDirectoryRefs)(nil))
}
diff --git a/pkg/sentry/fsimpl/kernfs/symlink.go b/pkg/sentry/fsimpl/kernfs/symlink.go
index 58a93eaac..934cc6c9e 100644
--- a/pkg/sentry/fsimpl/kernfs/symlink.go
+++ b/pkg/sentry/fsimpl/kernfs/symlink.go
@@ -38,13 +38,10 @@ type StaticSymlink struct {
var _ Inode = (*StaticSymlink)(nil)
// NewStaticSymlink creates a new symlink file pointing to 'target'.
-func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) *Dentry {
+func NewStaticSymlink(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, target string) Inode {
inode := &StaticSymlink{}
inode.Init(creds, devMajor, devMinor, ino, target)
-
- d := &Dentry{}
- d.Init(inode)
- return d
+ return inode
}
// Init initializes the instance.
diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
index ea7f073eb..d0ed17b18 100644
--- a/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
+++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory.go
@@ -29,24 +29,22 @@ import (
//
// +stateify savable
type syntheticDirectory struct {
+ InodeAlwaysValid
InodeAttrs
InodeNoStatFS
- InodeNoopRefCount
- InodeNoDynamicLookup
InodeNotSymlink
OrderedChildren
+ syntheticDirectoryRefs
locks vfs.FileLocks
}
var _ Inode = (*syntheticDirectory)(nil)
-func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) *Dentry {
+func newSyntheticDirectory(creds *auth.Credentials, perm linux.FileMode) Inode {
inode := &syntheticDirectory{}
inode.Init(creds, 0 /* devMajor */, 0 /* devMinor */, 0 /* ino */, perm)
- d := &Dentry{}
- d.Init(inode)
- return d
+ return inode
}
func (dir *syntheticDirectory) Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode) {
@@ -69,34 +67,46 @@ func (dir *syntheticDirectory) Open(ctx context.Context, rp *vfs.ResolvingPath,
}
// NewFile implements Inode.NewFile.
-func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (*Dentry, error) {
+func (dir *syntheticDirectory) NewFile(ctx context.Context, name string, opts vfs.OpenOptions) (Inode, error) {
return nil, syserror.EPERM
}
// NewDir implements Inode.NewDir.
-func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (*Dentry, error) {
+func (dir *syntheticDirectory) NewDir(ctx context.Context, name string, opts vfs.MkdirOptions) (Inode, error) {
if !opts.ForSyntheticMountpoint {
return nil, syserror.EPERM
}
- subdird := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask)
- if err := dir.OrderedChildren.Insert(name, subdird); err != nil {
- subdird.DecRef(ctx)
+ subdirI := newSyntheticDirectory(auth.CredentialsFromContext(ctx), opts.Mode&linux.PermissionsMask)
+ if err := dir.OrderedChildren.Insert(name, subdirI); err != nil {
+ subdirI.DecRef(ctx)
return nil, err
}
- return subdird, nil
+ return subdirI, nil
}
// NewLink implements Inode.NewLink.
-func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (*Dentry, error) {
+func (dir *syntheticDirectory) NewLink(ctx context.Context, name string, target Inode) (Inode, error) {
return nil, syserror.EPERM
}
// NewSymlink implements Inode.NewSymlink.
-func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (*Dentry, error) {
+func (dir *syntheticDirectory) NewSymlink(ctx context.Context, name, target string) (Inode, error) {
return nil, syserror.EPERM
}
// NewNode implements Inode.NewNode.
-func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (*Dentry, error) {
+func (dir *syntheticDirectory) NewNode(ctx context.Context, name string, opts vfs.MknodOptions) (Inode, error) {
return nil, syserror.EPERM
}
+
+// DecRef implements Inode.DecRef.
+func (dir *syntheticDirectory) DecRef(ctx context.Context) {
+ dir.syntheticDirectoryRefs.DecRef(func() { dir.Destroy(ctx) })
+}
+
+// Keep implements Inode.Keep. This is redundant because inodes will never be
+// created via Lookup and inodes are always valid. Makes sense to return true
+// because these inodes are not temporary and should only be removed on RmDir.
+func (dir *syntheticDirectory) Keep() bool {
+ return true
+}
diff --git a/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go
new file mode 100644
index 000000000..28d556b42
--- /dev/null
+++ b/pkg/sentry/fsimpl/kernfs/synthetic_directory_refs.go
@@ -0,0 +1,118 @@
+package kernfs
+
+import (
+ "fmt"
+ "runtime"
+ "sync/atomic"
+
+ "gvisor.dev/gvisor/pkg/log"
+ refs_vfs1 "gvisor.dev/gvisor/pkg/refs"
+)
+
+// ownerType is used to customize logging. Note that we use a pointer to T so
+// that we do not copy the entire object when passed as a format parameter.
+var syntheticDirectoryownerType *syntheticDirectory
+
+// Refs implements refs.RefCounter. It keeps a reference count using atomic
+// operations and calls the destructor when the count reaches zero.
+//
+// Note that the number of references is actually refCount + 1 so that a default
+// zero-value Refs object contains one reference.
+//
+// TODO(gvisor.dev/issue/1486): Store stack traces when leak check is enabled in
+// a map with 16-bit hashes, and store the hash in the top 16 bits of refCount.
+// This will allow us to add stack trace information to the leak messages
+// without growing the size of Refs.
+//
+// +stateify savable
+type syntheticDirectoryRefs struct {
+ // refCount is composed of two fields:
+ //
+ // [32-bit speculative references]:[32-bit real references]
+ //
+ // Speculative references are used for TryIncRef, to avoid a CompareAndSwap
+ // loop. See IncRef, DecRef and TryIncRef for details of how these fields are
+ // used.
+ refCount int64
+}
+
+func (r *syntheticDirectoryRefs) finalize() {
+ var note string
+ switch refs_vfs1.GetLeakMode() {
+ case refs_vfs1.NoLeakChecking:
+ return
+ case refs_vfs1.UninitializedLeakChecking:
+ note = "(Leak checker uninitialized): "
+ }
+ if n := r.ReadRefs(); n != 0 {
+ log.Warningf("%sRefs %p owned by %T garbage collected with ref count of %d (want 0)", note, r, syntheticDirectoryownerType, n)
+ }
+}
+
+// EnableLeakCheck checks for reference leaks when Refs gets garbage collected.
+func (r *syntheticDirectoryRefs) EnableLeakCheck() {
+ if refs_vfs1.GetLeakMode() != refs_vfs1.NoLeakChecking {
+ runtime.SetFinalizer(r, (*syntheticDirectoryRefs).finalize)
+ }
+}
+
+// ReadRefs returns the current number of references. The returned count is
+// inherently racy and is unsafe to use without external synchronization.
+func (r *syntheticDirectoryRefs) ReadRefs() int64 {
+
+ return atomic.LoadInt64(&r.refCount) + 1
+}
+
+// IncRef implements refs.RefCounter.IncRef.
+//
+//go:nosplit
+func (r *syntheticDirectoryRefs) IncRef() {
+ if v := atomic.AddInt64(&r.refCount, 1); v <= 0 {
+ panic(fmt.Sprintf("Incrementing non-positive ref count %p owned by %T", r, syntheticDirectoryownerType))
+ }
+}
+
+// TryIncRef implements refs.RefCounter.TryIncRef.
+//
+// To do this safely without a loop, a speculative reference is first acquired
+// on the object. This allows multiple concurrent TryIncRef calls to distinguish
+// other TryIncRef calls from genuine references held.
+//
+//go:nosplit
+func (r *syntheticDirectoryRefs) TryIncRef() bool {
+ const speculativeRef = 1 << 32
+ v := atomic.AddInt64(&r.refCount, speculativeRef)
+ if int32(v) < 0 {
+
+ atomic.AddInt64(&r.refCount, -speculativeRef)
+ return false
+ }
+
+ atomic.AddInt64(&r.refCount, -speculativeRef+1)
+ return true
+}
+
+// DecRef implements refs.RefCounter.DecRef.
+//
+// Note that speculative references are counted here. Since they were added
+// prior to real references reaching zero, they will successfully convert to
+// real references. In other words, we see speculative references only in the
+// following case:
+//
+// A: TryIncRef [speculative increase => sees non-negative references]
+// B: DecRef [real decrease]
+// A: TryIncRef [transform speculative to real]
+//
+//go:nosplit
+func (r *syntheticDirectoryRefs) DecRef(destroy func()) {
+ switch v := atomic.AddInt64(&r.refCount, -1); {
+ case v < -1:
+ panic(fmt.Sprintf("Decrementing non-positive ref count %p, owned by %T", r, syntheticDirectoryownerType))
+
+ case v == -1:
+
+ if destroy != nil {
+ destroy()
+ }
+ }
+}
diff --git a/pkg/sentry/fsimpl/pipefs/pipefs.go b/pkg/sentry/fsimpl/pipefs/pipefs.go
index 4e2da4810..903bd8cdf 100644
--- a/pkg/sentry/fsimpl/pipefs/pipefs.go
+++ b/pkg/sentry/fsimpl/pipefs/pipefs.go
@@ -165,7 +165,7 @@ func NewConnectedPipeFDs(ctx context.Context, mnt *vfs.Mount, flags uint32) (*vf
fs := mnt.Filesystem().Impl().(*filesystem)
inode := newInode(ctx, fs)
var d kernfs.Dentry
- d.Init(inode)
+ d.Init(&fs.Filesystem, inode)
defer d.DecRef(ctx)
return inode.pipe.ReaderWriterPair(mnt, d.VFSDentry(), flags)
}
diff --git a/pkg/sentry/fsimpl/proc/filesystem.go b/pkg/sentry/fsimpl/proc/filesystem.go
index 05d7948ea..bea669906 100644
--- a/pkg/sentry/fsimpl/proc/filesystem.go
+++ b/pkg/sentry/fsimpl/proc/filesystem.go
@@ -73,7 +73,9 @@ func (ft FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualF
cgroups = data.Cgroups
}
- _, dentry := procfs.newTasksInode(k, pidns, cgroups)
+ inode := procfs.newTasksInode(k, pidns, cgroups)
+ var dentry kernfs.Dentry
+ dentry.Init(&procfs.Filesystem, inode)
return procfs.VFSFilesystem(), dentry.VFSDentry(), nil
}
@@ -94,12 +96,9 @@ type dynamicInode interface {
Init(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, data vfs.DynamicBytesSource, perm linux.FileMode)
}
-func (fs *filesystem) newDentry(creds *auth.Credentials, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
- inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, inode, perm)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
+func (fs *filesystem) newInode(creds *auth.Credentials, perm linux.FileMode, inode dynamicInode) dynamicInode {
+ inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), inode, perm)
+ return inode
}
// +stateify savable
@@ -114,8 +113,8 @@ func newStaticFile(data string) *staticFile {
return &staticFile{StaticData: vfs.StaticData{Data: data}}
}
-func newStaticDir(creds *auth.Credentials, devMajor, devMinor uint32, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry {
- return kernfs.NewStaticDir(creds, devMajor, devMinor, ino, perm, children, kernfs.GenericDirectoryFDOptions{
+func (fs *filesystem) newStaticDir(creds *auth.Credentials, children map[string]kernfs.Inode) kernfs.Inode {
+ return kernfs.NewStaticDir(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, children, kernfs.GenericDirectoryFDOptions{
SeekEnd: kernfs.SeekEndZero,
})
}
diff --git a/pkg/sentry/fsimpl/proc/proc_state_autogen.go b/pkg/sentry/fsimpl/proc/proc_state_autogen.go
index 0bbbd5761..e17a2a13c 100644
--- a/pkg/sentry/fsimpl/proc/proc_state_autogen.go
+++ b/pkg/sentry/fsimpl/proc/proc_state_autogen.go
@@ -172,10 +172,11 @@ func (i *subtasksInode) StateTypeName() string {
func (i *subtasksInode) StateFields() []string {
return []string{
"implStatFS",
- "AlwaysValid",
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
"InodeNotSymlink",
+ "InodeTemporary",
"OrderedChildren",
"subtasksInodeRefs",
"locks",
@@ -191,34 +192,36 @@ func (i *subtasksInode) beforeSave() {}
func (i *subtasksInode) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
stateSinkObject.Save(0, &i.implStatFS)
- stateSinkObject.Save(1, &i.AlwaysValid)
+ stateSinkObject.Save(1, &i.InodeAlwaysValid)
stateSinkObject.Save(2, &i.InodeAttrs)
stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren)
stateSinkObject.Save(4, &i.InodeNotSymlink)
- stateSinkObject.Save(5, &i.OrderedChildren)
- stateSinkObject.Save(6, &i.subtasksInodeRefs)
- stateSinkObject.Save(7, &i.locks)
- stateSinkObject.Save(8, &i.fs)
- stateSinkObject.Save(9, &i.task)
- stateSinkObject.Save(10, &i.pidns)
- stateSinkObject.Save(11, &i.cgroupControllers)
+ stateSinkObject.Save(5, &i.InodeTemporary)
+ stateSinkObject.Save(6, &i.OrderedChildren)
+ stateSinkObject.Save(7, &i.subtasksInodeRefs)
+ stateSinkObject.Save(8, &i.locks)
+ stateSinkObject.Save(9, &i.fs)
+ stateSinkObject.Save(10, &i.task)
+ stateSinkObject.Save(11, &i.pidns)
+ stateSinkObject.Save(12, &i.cgroupControllers)
}
func (i *subtasksInode) afterLoad() {}
func (i *subtasksInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.implStatFS)
- stateSourceObject.Load(1, &i.AlwaysValid)
+ stateSourceObject.Load(1, &i.InodeAlwaysValid)
stateSourceObject.Load(2, &i.InodeAttrs)
stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren)
stateSourceObject.Load(4, &i.InodeNotSymlink)
- stateSourceObject.Load(5, &i.OrderedChildren)
- stateSourceObject.Load(6, &i.subtasksInodeRefs)
- stateSourceObject.Load(7, &i.locks)
- stateSourceObject.Load(8, &i.fs)
- stateSourceObject.Load(9, &i.task)
- stateSourceObject.Load(10, &i.pidns)
- stateSourceObject.Load(11, &i.cgroupControllers)
+ stateSourceObject.Load(5, &i.InodeTemporary)
+ stateSourceObject.Load(6, &i.OrderedChildren)
+ stateSourceObject.Load(7, &i.subtasksInodeRefs)
+ stateSourceObject.Load(8, &i.locks)
+ stateSourceObject.Load(9, &i.fs)
+ stateSourceObject.Load(10, &i.task)
+ stateSourceObject.Load(11, &i.pidns)
+ stateSourceObject.Load(12, &i.cgroupControllers)
}
func (fd *subtasksFD) StateTypeName() string {
@@ -279,8 +282,8 @@ func (i *taskInode) StateFields() []string {
"implStatFS",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
- "InodeNoDynamicLookup",
"InodeNotSymlink",
+ "InodeTemporary",
"OrderedChildren",
"taskInodeRefs",
"locks",
@@ -295,8 +298,8 @@ func (i *taskInode) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(0, &i.implStatFS)
stateSinkObject.Save(1, &i.InodeAttrs)
stateSinkObject.Save(2, &i.InodeDirectoryNoNewChildren)
- stateSinkObject.Save(3, &i.InodeNoDynamicLookup)
- stateSinkObject.Save(4, &i.InodeNotSymlink)
+ stateSinkObject.Save(3, &i.InodeNotSymlink)
+ stateSinkObject.Save(4, &i.InodeTemporary)
stateSinkObject.Save(5, &i.OrderedChildren)
stateSinkObject.Save(6, &i.taskInodeRefs)
stateSinkObject.Save(7, &i.locks)
@@ -309,8 +312,8 @@ func (i *taskInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.implStatFS)
stateSourceObject.Load(1, &i.InodeAttrs)
stateSourceObject.Load(2, &i.InodeDirectoryNoNewChildren)
- stateSourceObject.Load(3, &i.InodeNoDynamicLookup)
- stateSourceObject.Load(4, &i.InodeNotSymlink)
+ stateSourceObject.Load(3, &i.InodeNotSymlink)
+ stateSourceObject.Load(4, &i.InodeTemporary)
stateSourceObject.Load(5, &i.OrderedChildren)
stateSourceObject.Load(6, &i.taskInodeRefs)
stateSourceObject.Load(7, &i.locks)
@@ -384,10 +387,11 @@ func (i *fdDirInode) StateFields() []string {
"fdDir",
"fdDirInodeRefs",
"implStatFS",
- "AlwaysValid",
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
"InodeNotSymlink",
+ "InodeTemporary",
"OrderedChildren",
}
}
@@ -399,11 +403,12 @@ func (i *fdDirInode) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(0, &i.fdDir)
stateSinkObject.Save(1, &i.fdDirInodeRefs)
stateSinkObject.Save(2, &i.implStatFS)
- stateSinkObject.Save(3, &i.AlwaysValid)
+ stateSinkObject.Save(3, &i.InodeAlwaysValid)
stateSinkObject.Save(4, &i.InodeAttrs)
stateSinkObject.Save(5, &i.InodeDirectoryNoNewChildren)
stateSinkObject.Save(6, &i.InodeNotSymlink)
- stateSinkObject.Save(7, &i.OrderedChildren)
+ stateSinkObject.Save(7, &i.InodeTemporary)
+ stateSinkObject.Save(8, &i.OrderedChildren)
}
func (i *fdDirInode) afterLoad() {}
@@ -412,11 +417,12 @@ func (i *fdDirInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.fdDir)
stateSourceObject.Load(1, &i.fdDirInodeRefs)
stateSourceObject.Load(2, &i.implStatFS)
- stateSourceObject.Load(3, &i.AlwaysValid)
+ stateSourceObject.Load(3, &i.InodeAlwaysValid)
stateSourceObject.Load(4, &i.InodeAttrs)
stateSourceObject.Load(5, &i.InodeDirectoryNoNewChildren)
stateSourceObject.Load(6, &i.InodeNotSymlink)
- stateSourceObject.Load(7, &i.OrderedChildren)
+ stateSourceObject.Load(7, &i.InodeTemporary)
+ stateSourceObject.Load(8, &i.OrderedChildren)
}
func (s *fdSymlink) StateTypeName() string {
@@ -466,10 +472,11 @@ func (i *fdInfoDirInode) StateFields() []string {
"fdDir",
"fdInfoDirInodeRefs",
"implStatFS",
- "AlwaysValid",
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
"InodeNotSymlink",
+ "InodeTemporary",
"OrderedChildren",
}
}
@@ -481,11 +488,12 @@ func (i *fdInfoDirInode) StateSave(stateSinkObject state.Sink) {
stateSinkObject.Save(0, &i.fdDir)
stateSinkObject.Save(1, &i.fdInfoDirInodeRefs)
stateSinkObject.Save(2, &i.implStatFS)
- stateSinkObject.Save(3, &i.AlwaysValid)
+ stateSinkObject.Save(3, &i.InodeAlwaysValid)
stateSinkObject.Save(4, &i.InodeAttrs)
stateSinkObject.Save(5, &i.InodeDirectoryNoNewChildren)
stateSinkObject.Save(6, &i.InodeNotSymlink)
- stateSinkObject.Save(7, &i.OrderedChildren)
+ stateSinkObject.Save(7, &i.InodeTemporary)
+ stateSinkObject.Save(8, &i.OrderedChildren)
}
func (i *fdInfoDirInode) afterLoad() {}
@@ -494,11 +502,12 @@ func (i *fdInfoDirInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.fdDir)
stateSourceObject.Load(1, &i.fdInfoDirInodeRefs)
stateSourceObject.Load(2, &i.implStatFS)
- stateSourceObject.Load(3, &i.AlwaysValid)
+ stateSourceObject.Load(3, &i.InodeAlwaysValid)
stateSourceObject.Load(4, &i.InodeAttrs)
stateSourceObject.Load(5, &i.InodeDirectoryNoNewChildren)
stateSourceObject.Load(6, &i.InodeNotSymlink)
- stateSourceObject.Load(7, &i.OrderedChildren)
+ stateSourceObject.Load(7, &i.InodeTemporary)
+ stateSourceObject.Load(8, &i.OrderedChildren)
}
func (d *fdInfoData) StateTypeName() string {
@@ -1365,17 +1374,16 @@ func (i *tasksInode) StateTypeName() string {
func (i *tasksInode) StateFields() []string {
return []string{
"implStatFS",
- "AlwaysValid",
+ "InodeAlwaysValid",
"InodeAttrs",
"InodeDirectoryNoNewChildren",
"InodeNotSymlink",
+ "InodeTemporary",
"OrderedChildren",
"tasksInodeRefs",
"locks",
"fs",
"pidns",
- "selfSymlink",
- "threadSelfSymlink",
"cgroupControllers",
}
}
@@ -1385,36 +1393,34 @@ func (i *tasksInode) beforeSave() {}
func (i *tasksInode) StateSave(stateSinkObject state.Sink) {
i.beforeSave()
stateSinkObject.Save(0, &i.implStatFS)
- stateSinkObject.Save(1, &i.AlwaysValid)
+ stateSinkObject.Save(1, &i.InodeAlwaysValid)
stateSinkObject.Save(2, &i.InodeAttrs)
stateSinkObject.Save(3, &i.InodeDirectoryNoNewChildren)
stateSinkObject.Save(4, &i.InodeNotSymlink)
- stateSinkObject.Save(5, &i.OrderedChildren)
- stateSinkObject.Save(6, &i.tasksInodeRefs)
- stateSinkObject.Save(7, &i.locks)
- stateSinkObject.Save(8, &i.fs)
- stateSinkObject.Save(9, &i.pidns)
- stateSinkObject.Save(10, &i.selfSymlink)
- stateSinkObject.Save(11, &i.threadSelfSymlink)
- stateSinkObject.Save(12, &i.cgroupControllers)
+ stateSinkObject.Save(5, &i.InodeTemporary)
+ stateSinkObject.Save(6, &i.OrderedChildren)
+ stateSinkObject.Save(7, &i.tasksInodeRefs)
+ stateSinkObject.Save(8, &i.locks)
+ stateSinkObject.Save(9, &i.fs)
+ stateSinkObject.Save(10, &i.pidns)
+ stateSinkObject.Save(11, &i.cgroupControllers)
}
func (i *tasksInode) afterLoad() {}
func (i *tasksInode) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &i.implStatFS)
- stateSourceObject.Load(1, &i.AlwaysValid)
+ stateSourceObject.Load(1, &i.InodeAlwaysValid)
stateSourceObject.Load(2, &i.InodeAttrs)
stateSourceObject.Load(3, &i.InodeDirectoryNoNewChildren)
stateSourceObject.Load(4, &i.InodeNotSymlink)
- stateSourceObject.Load(5, &i.OrderedChildren)
- stateSourceObject.Load(6, &i.tasksInodeRefs)
- stateSourceObject.Load(7, &i.locks)
- stateSourceObject.Load(8, &i.fs)
- stateSourceObject.Load(9, &i.pidns)
- stateSourceObject.Load(10, &i.selfSymlink)
- stateSourceObject.Load(11, &i.threadSelfSymlink)
- stateSourceObject.Load(12, &i.cgroupControllers)
+ stateSourceObject.Load(5, &i.InodeTemporary)
+ stateSourceObject.Load(6, &i.OrderedChildren)
+ stateSourceObject.Load(7, &i.tasksInodeRefs)
+ stateSourceObject.Load(8, &i.locks)
+ stateSourceObject.Load(9, &i.fs)
+ stateSourceObject.Load(10, &i.pidns)
+ stateSourceObject.Load(11, &i.cgroupControllers)
}
func (s *staticFileSetStat) StateTypeName() string {
diff --git a/pkg/sentry/fsimpl/proc/subtasks.go b/pkg/sentry/fsimpl/proc/subtasks.go
index 47ecd941c..bad2fab4f 100644
--- a/pkg/sentry/fsimpl/proc/subtasks.go
+++ b/pkg/sentry/fsimpl/proc/subtasks.go
@@ -32,10 +32,11 @@ import (
// +stateify savable
type subtasksInode struct {
implStatFS
- kernfs.AlwaysValid
+ kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeNotSymlink
+ kernfs.InodeTemporary
kernfs.OrderedChildren
subtasksInodeRefs
@@ -49,7 +50,7 @@ type subtasksInode struct {
var _ kernfs.Inode = (*subtasksInode)(nil)
-func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) *kernfs.Dentry {
+func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) kernfs.Inode {
subInode := &subtasksInode{
fs: fs,
task: task,
@@ -62,14 +63,11 @@ func (fs *filesystem) newSubtasks(task *kernel.Task, pidns *kernel.PIDNamespace,
subInode.EnableLeakCheck()
inode := &taskOwnedInode{Inode: subInode, owner: task}
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
-
- return dentry
+ return inode
}
-// Lookup implements kernfs.inodeDynamicLookup.Lookup.
-func (i *subtasksInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) {
+// Lookup implements kernfs.inodeDirectory.Lookup.
+func (i *subtasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
tid, err := strconv.ParseUint(name, 10, 32)
if err != nil {
return nil, syserror.ENOENT
@@ -82,10 +80,10 @@ func (i *subtasksInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry
if subTask.ThreadGroup() != i.task.ThreadGroup() {
return nil, syserror.ENOENT
}
- return i.fs.newTaskInode(subTask, i.pidns, false, i.cgroupControllers), nil
+ return i.fs.newTaskInode(subTask, i.pidns, false, i.cgroupControllers)
}
-// IterDirents implements kernfs.inodeDynamicLookup.IterDirents.
+// IterDirents implements kernfs.inodeDirectory.IterDirents.
func (i *subtasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
tasks := i.task.ThreadGroup().MemberIDs(i.pidns)
if len(tasks) == 0 {
@@ -186,6 +184,6 @@ func (*subtasksInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credential
}
// DecRef implements kernfs.Inode.DecRef.
-func (i *subtasksInode) DecRef(context.Context) {
- i.subtasksInodeRefs.DecRef(i.Destroy)
+func (i *subtasksInode) DecRef(ctx context.Context) {
+ i.subtasksInodeRefs.DecRef(func() { i.Destroy(ctx) })
}
diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go
index a7cd6f57e..b63a4eca0 100644
--- a/pkg/sentry/fsimpl/proc/task.go
+++ b/pkg/sentry/fsimpl/proc/task.go
@@ -35,8 +35,8 @@ type taskInode struct {
implStatFS
kernfs.InodeAttrs
kernfs.InodeDirectoryNoNewChildren
- kernfs.InodeNoDynamicLookup
kernfs.InodeNotSymlink
+ kernfs.InodeTemporary
kernfs.OrderedChildren
taskInodeRefs
@@ -47,41 +47,44 @@ type taskInode struct {
var _ kernfs.Inode = (*taskInode)(nil)
-func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) *kernfs.Dentry {
- // TODO(gvisor.dev/issue/164): Fail with ESRCH if task exited.
- contents := map[string]*kernfs.Dentry{
- "auxv": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &auxvData{task: task}),
- "cmdline": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}),
+func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace, isThreadGroup bool, cgroupControllers map[string]string) (kernfs.Inode, error) {
+ if task.ExitState() == kernel.TaskExitDead {
+ return nil, syserror.ESRCH
+ }
+
+ contents := map[string]kernfs.Inode{
+ "auxv": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &auxvData{task: task}),
+ "cmdline": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: cmdlineDataArg}),
"comm": fs.newComm(task, fs.NextIno(), 0444),
"cwd": fs.newCwdSymlink(task, fs.NextIno()),
- "environ": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}),
+ "environ": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &cmdlineData{task: task, arg: environDataArg}),
"exe": fs.newExeSymlink(task, fs.NextIno()),
"fd": fs.newFDDirInode(task),
"fdinfo": fs.newFDInfoDirInode(task),
- "gid_map": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &idMapData{task: task, gids: true}),
- "io": fs.newTaskOwnedFile(task, fs.NextIno(), 0400, newIO(task, isThreadGroup)),
- "maps": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mapsData{task: task}),
- "mountinfo": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mountInfoData{task: task}),
- "mounts": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &mountsData{task: task}),
+ "gid_map": fs.newTaskOwnedInode(task, fs.NextIno(), 0644, &idMapData{task: task, gids: true}),
+ "io": fs.newTaskOwnedInode(task, fs.NextIno(), 0400, newIO(task, isThreadGroup)),
+ "maps": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mapsData{task: task}),
+ "mountinfo": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mountInfoData{task: task}),
+ "mounts": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &mountsData{task: task}),
"net": fs.newTaskNetDir(task),
- "ns": fs.newTaskOwnedDir(task, fs.NextIno(), 0511, map[string]*kernfs.Dentry{
+ "ns": fs.newTaskOwnedDir(task, fs.NextIno(), 0511, map[string]kernfs.Inode{
"net": fs.newNamespaceSymlink(task, fs.NextIno(), "net"),
"pid": fs.newNamespaceSymlink(task, fs.NextIno(), "pid"),
"user": fs.newNamespaceSymlink(task, fs.NextIno(), "user"),
}),
- "oom_score": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, newStaticFile("0\n")),
- "oom_score_adj": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &oomScoreAdj{task: task}),
- "smaps": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &smapsData{task: task}),
- "stat": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
- "statm": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &statmData{task: task}),
- "status": fs.newTaskOwnedFile(task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
- "uid_map": fs.newTaskOwnedFile(task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}),
+ "oom_score": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, newStaticFile("0\n")),
+ "oom_score_adj": fs.newTaskOwnedInode(task, fs.NextIno(), 0644, &oomScoreAdj{task: task}),
+ "smaps": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &smapsData{task: task}),
+ "stat": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
+ "statm": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &statmData{task: task}),
+ "status": fs.newTaskOwnedInode(task, fs.NextIno(), 0444, &statusData{task: task, pidns: pidns}),
+ "uid_map": fs.newTaskOwnedInode(task, fs.NextIno(), 0644, &idMapData{task: task, gids: false}),
}
if isThreadGroup {
contents["task"] = fs.newSubtasks(task, pidns, cgroupControllers)
}
if len(cgroupControllers) > 0 {
- contents["cgroup"] = fs.newTaskOwnedFile(task, fs.NextIno(), 0444, newCgroupData(cgroupControllers))
+ contents["cgroup"] = fs.newTaskOwnedInode(task, fs.NextIno(), 0444, newCgroupData(cgroupControllers))
}
taskInode := &taskInode{task: task}
@@ -90,17 +93,15 @@ func (fs *filesystem) newTaskInode(task *kernel.Task, pidns *kernel.PIDNamespace
taskInode.EnableLeakCheck()
inode := &taskOwnedInode{Inode: taskInode, owner: task}
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
taskInode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- links := taskInode.OrderedChildren.Populate(dentry, contents)
+ links := taskInode.OrderedChildren.Populate(contents)
taskInode.IncLinks(links)
- return dentry
+ return inode, nil
}
-// Valid implements kernfs.inodeDynamicLookup. This inode remains valid as long
+// Valid implements kernfs.Inode.Valid. This inode remains valid as long
// as the task is still running. When it's dead, another tasks with the same
// PID could replace it.
func (i *taskInode) Valid(ctx context.Context) bool {
@@ -124,8 +125,8 @@ func (*taskInode) SetStat(context.Context, *vfs.Filesystem, *auth.Credentials, v
}
// DecRef implements kernfs.Inode.DecRef.
-func (i *taskInode) DecRef(context.Context) {
- i.taskInodeRefs.DecRef(i.Destroy)
+func (i *taskInode) DecRef(ctx context.Context) {
+ i.taskInodeRefs.DecRef(func() { i.Destroy(ctx) })
}
// taskOwnedInode implements kernfs.Inode and overrides inode owner with task
@@ -141,34 +142,23 @@ type taskOwnedInode struct {
var _ kernfs.Inode = (*taskOwnedInode)(nil)
-func (fs *filesystem) newTaskOwnedFile(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) *kernfs.Dentry {
+func (fs *filesystem) newTaskOwnedInode(task *kernel.Task, ino uint64, perm linux.FileMode, inode dynamicInode) kernfs.Inode {
// Note: credentials are overridden by taskOwnedInode.
inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, inode, perm)
- taskInode := &taskOwnedInode{Inode: inode, owner: task}
- d := &kernfs.Dentry{}
- d.Init(taskInode)
- return d
+ return &taskOwnedInode{Inode: inode, owner: task}
}
-func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]*kernfs.Dentry) *kernfs.Dentry {
- dir := &kernfs.StaticDirectory{}
-
+func (fs *filesystem) newTaskOwnedDir(task *kernel.Task, ino uint64, perm linux.FileMode, children map[string]kernfs.Inode) kernfs.Inode {
// Note: credentials are overridden by taskOwnedInode.
- dir.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, kernfs.GenericDirectoryFDOptions{
- SeekEnd: kernfs.SeekEndZero,
- })
- dir.EnableLeakCheck()
-
- inode := &taskOwnedInode{Inode: dir, owner: task}
- d := &kernfs.Dentry{}
- d.Init(inode)
+ fdOpts := kernfs.GenericDirectoryFDOptions{SeekEnd: kernfs.SeekEndZero}
+ dir := kernfs.NewStaticDir(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, perm, children, fdOpts)
- dir.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- links := dir.OrderedChildren.Populate(d, children)
- dir.IncLinks(links)
+ return &taskOwnedInode{Inode: dir, owner: task}
+}
- return d
+func (i *taskOwnedInode) Valid(ctx context.Context) bool {
+ return i.owner.ExitState() != kernel.TaskExitDead && i.Inode.Valid(ctx)
}
// Stat implements kernfs.Inode.Stat.
diff --git a/pkg/sentry/fsimpl/proc/task_fds.go b/pkg/sentry/fsimpl/proc/task_fds.go
index 0866cea2b..2c80ac5c2 100644
--- a/pkg/sentry/fsimpl/proc/task_fds.go
+++ b/pkg/sentry/fsimpl/proc/task_fds.go
@@ -63,7 +63,7 @@ type fdDir struct {
produceSymlink bool
}
-// IterDirents implements kernfs.inodeDynamicLookup.IterDirents.
+// IterDirents implements kernfs.inodeDirectory.IterDirents.
func (i *fdDir) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
var fds []int32
i.task.WithMuLocked(func(t *kernel.Task) {
@@ -109,16 +109,17 @@ type fdDirInode struct {
fdDir
fdDirInodeRefs
implStatFS
- kernfs.AlwaysValid
+ kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeNotSymlink
+ kernfs.InodeTemporary
kernfs.OrderedChildren
}
var _ kernfs.Inode = (*fdDirInode)(nil)
-func (fs *filesystem) newFDDirInode(task *kernel.Task) *kernfs.Dentry {
+func (fs *filesystem) newFDDirInode(task *kernel.Task) kernfs.Inode {
inode := &fdDirInode{
fdDir: fdDir{
fs: fs,
@@ -128,16 +129,17 @@ func (fs *filesystem) newFDDirInode(task *kernel.Task) *kernfs.Dentry {
}
inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
inode.EnableLeakCheck()
-
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
+ return inode
+}
- return dentry
+// IterDirents implements kernfs.inodeDirectory.IterDirents.
+func (i *fdDirInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (int64, error) {
+ return i.fdDir.IterDirents(ctx, cb, offset, relOffset)
}
-// Lookup implements kernfs.inodeDynamicLookup.Lookup.
-func (i *fdDirInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) {
+// Lookup implements kernfs.inodeDirectory.Lookup.
+func (i *fdDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
fdInt, err := strconv.ParseInt(name, 10, 32)
if err != nil {
return nil, syserror.ENOENT
@@ -183,8 +185,8 @@ func (i *fdDirInode) CheckPermissions(ctx context.Context, creds *auth.Credentia
}
// DecRef implements kernfs.Inode.DecRef.
-func (i *fdDirInode) DecRef(context.Context) {
- i.fdDirInodeRefs.DecRef(i.Destroy)
+func (i *fdDirInode) DecRef(ctx context.Context) {
+ i.fdDirInodeRefs.DecRef(func() { i.Destroy(ctx) })
}
// fdSymlink is an symlink for the /proc/[pid]/fd/[fd] file.
@@ -202,16 +204,13 @@ type fdSymlink struct {
var _ kernfs.Inode = (*fdSymlink)(nil)
-func (fs *filesystem) newFDSymlink(task *kernel.Task, fd int32, ino uint64) *kernfs.Dentry {
+func (fs *filesystem) newFDSymlink(task *kernel.Task, fd int32, ino uint64) kernfs.Inode {
inode := &fdSymlink{
task: task,
fd: fd,
}
inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
+ return inode
}
func (s *fdSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
@@ -236,6 +235,11 @@ func (s *fdSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.VirtualDen
return vd, "", nil
}
+// Valid implements kernfs.Inode.Valid.
+func (s *fdSymlink) Valid(ctx context.Context) bool {
+ return taskFDExists(ctx, s.task, s.fd)
+}
+
// fdInfoDirInode represents the inode for /proc/[pid]/fdinfo directory.
//
// +stateify savable
@@ -243,16 +247,17 @@ type fdInfoDirInode struct {
fdDir
fdInfoDirInodeRefs
implStatFS
- kernfs.AlwaysValid
+ kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeNotSymlink
+ kernfs.InodeTemporary
kernfs.OrderedChildren
}
var _ kernfs.Inode = (*fdInfoDirInode)(nil)
-func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) *kernfs.Dentry {
+func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) kernfs.Inode {
inode := &fdInfoDirInode{
fdDir: fdDir{
fs: fs,
@@ -261,16 +266,12 @@ func (fs *filesystem) newFDInfoDirInode(task *kernel.Task) *kernfs.Dentry {
}
inode.InodeAttrs.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
inode.EnableLeakCheck()
-
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
-
- return dentry
+ return inode
}
-// Lookup implements kernfs.inodeDynamicLookup.Lookup.
-func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) {
+// Lookup implements kernfs.inodeDirectory.Lookup.
+func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
fdInt, err := strconv.ParseInt(name, 10, 32)
if err != nil {
return nil, syserror.ENOENT
@@ -283,7 +284,12 @@ func (i *fdInfoDirInode) Lookup(ctx context.Context, name string) (*kernfs.Dentr
task: i.task,
fd: fd,
}
- return i.fs.newTaskOwnedFile(i.task, i.fs.NextIno(), 0444, data), nil
+ return i.fs.newTaskOwnedInode(i.task, i.fs.NextIno(), 0444, data), nil
+}
+
+// IterDirents implements Inode.IterDirents.
+func (i *fdInfoDirInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, relOffset int64) (newOffset int64, err error) {
+ return i.fdDir.IterDirents(ctx, cb, offset, relOffset)
}
// Open implements kernfs.Inode.Open.
@@ -298,8 +304,8 @@ func (i *fdInfoDirInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *ker
}
// DecRef implements kernfs.Inode.DecRef.
-func (i *fdInfoDirInode) DecRef(context.Context) {
- i.fdInfoDirInodeRefs.DecRef(i.Destroy)
+func (i *fdInfoDirInode) DecRef(ctx context.Context) {
+ i.fdInfoDirInodeRefs.DecRef(func() { i.Destroy(ctx) })
}
// fdInfoData implements vfs.DynamicBytesSource for /proc/[pid]/fdinfo/[fd].
@@ -328,3 +334,8 @@ func (d *fdInfoData) Generate(ctx context.Context, buf *bytes.Buffer) error {
fmt.Fprintf(buf, "flags:\t0%o\n", flags)
return nil
}
+
+// Valid implements kernfs.Inode.Valid.
+func (d *fdInfoData) Valid(ctx context.Context) bool {
+ return taskFDExists(ctx, d.task, d.fd)
+}
diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go
index 3fbf081a6..79f8b7e9f 100644
--- a/pkg/sentry/fsimpl/proc/task_files.go
+++ b/pkg/sentry/fsimpl/proc/task_files.go
@@ -247,13 +247,10 @@ type commInode struct {
task *kernel.Task
}
-func (fs *filesystem) newComm(task *kernel.Task, ino uint64, perm linux.FileMode) *kernfs.Dentry {
+func (fs *filesystem) newComm(task *kernel.Task, ino uint64, perm linux.FileMode) kernfs.Inode {
inode := &commInode{task: task}
inode.DynamicBytesFile.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, &commData{task: task}, perm)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
+ return inode
}
func (i *commInode) CheckPermissions(ctx context.Context, creds *auth.Credentials, ats vfs.AccessTypes) error {
@@ -658,13 +655,10 @@ type exeSymlink struct {
var _ kernfs.Inode = (*exeSymlink)(nil)
-func (fs *filesystem) newExeSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry {
+func (fs *filesystem) newExeSymlink(task *kernel.Task, ino uint64) kernfs.Inode {
inode := &exeSymlink{task: task}
inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
+ return inode
}
// Readlink implements kernfs.Inode.Readlink.
@@ -737,13 +731,10 @@ type cwdSymlink struct {
var _ kernfs.Inode = (*cwdSymlink)(nil)
-func (fs *filesystem) newCwdSymlink(task *kernel.Task, ino uint64) *kernfs.Dentry {
+func (fs *filesystem) newCwdSymlink(task *kernel.Task, ino uint64) kernfs.Inode {
inode := &cwdSymlink{task: task}
inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
+ return inode
}
// Readlink implements kernfs.Inode.Readlink.
@@ -851,7 +842,7 @@ type namespaceSymlink struct {
task *kernel.Task
}
-func (fs *filesystem) newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) *kernfs.Dentry {
+func (fs *filesystem) newNamespaceSymlink(task *kernel.Task, ino uint64, ns string) kernfs.Inode {
// Namespace symlinks should contain the namespace name and the inode number
// for the namespace instance, so for example user:[123456]. We currently fake
// the inode number by sticking the symlink inode in its place.
@@ -862,9 +853,7 @@ func (fs *filesystem) newNamespaceSymlink(task *kernel.Task, ino uint64, ns stri
inode.Init(task.Credentials(), linux.UNNAMED_MAJOR, fs.devMinor, ino, target)
taskInode := &taskOwnedInode{Inode: inode, owner: task}
- d := &kernfs.Dentry{}
- d.Init(taskInode)
- return d
+ return taskInode
}
// Readlink implements kernfs.Inode.Readlink.
@@ -882,11 +871,12 @@ func (s *namespaceSymlink) Getlink(ctx context.Context, mnt *vfs.Mount) (vfs.Vir
}
// Create a synthetic inode to represent the namespace.
+ fs := mnt.Filesystem().Impl().(*filesystem)
dentry := &kernfs.Dentry{}
- dentry.Init(&namespaceInode{})
+ dentry.Init(&fs.Filesystem, &namespaceInode{})
vd := vfs.MakeVirtualDentry(mnt, dentry.VFSDentry())
- vd.IncRef()
- dentry.DecRef(ctx)
+ // Only IncRef vd.Mount() because vd.Dentry() already holds a ref of 1.
+ mnt.IncRef()
return vd, "", nil
}
diff --git a/pkg/sentry/fsimpl/proc/task_net.go b/pkg/sentry/fsimpl/proc/task_net.go
index e7f748655..3425e8698 100644
--- a/pkg/sentry/fsimpl/proc/task_net.go
+++ b/pkg/sentry/fsimpl/proc/task_net.go
@@ -37,12 +37,12 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-func (fs *filesystem) newTaskNetDir(task *kernel.Task) *kernfs.Dentry {
+func (fs *filesystem) newTaskNetDir(task *kernel.Task) kernfs.Inode {
k := task.Kernel()
pidns := task.PIDNamespace()
root := auth.NewRootCredentials(pidns.UserNamespace())
- var contents map[string]*kernfs.Dentry
+ var contents map[string]kernfs.Inode
if stack := task.NetworkNamespace().Stack(); stack != nil {
const (
arp = "IP address HW type Flags HW address Mask Device\n"
@@ -56,34 +56,34 @@ func (fs *filesystem) newTaskNetDir(task *kernel.Task) *kernfs.Dentry {
// TODO(gvisor.dev/issue/1833): Make sure file contents reflect the task
// network namespace.
- contents = map[string]*kernfs.Dentry{
- "dev": fs.newDentry(root, fs.NextIno(), 0444, &netDevData{stack: stack}),
- "snmp": fs.newDentry(root, fs.NextIno(), 0444, &netSnmpData{stack: stack}),
+ contents = map[string]kernfs.Inode{
+ "dev": fs.newInode(root, 0444, &netDevData{stack: stack}),
+ "snmp": fs.newInode(root, 0444, &netSnmpData{stack: stack}),
// The following files are simple stubs until they are implemented in
// netstack, if the file contains a header the stub is just the header
// otherwise it is an empty file.
- "arp": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(arp)),
- "netlink": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(netlink)),
- "netstat": fs.newDentry(root, fs.NextIno(), 0444, &netStatData{}),
- "packet": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(packet)),
- "protocols": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(protocols)),
+ "arp": fs.newInode(root, 0444, newStaticFile(arp)),
+ "netlink": fs.newInode(root, 0444, newStaticFile(netlink)),
+ "netstat": fs.newInode(root, 0444, &netStatData{}),
+ "packet": fs.newInode(root, 0444, newStaticFile(packet)),
+ "protocols": fs.newInode(root, 0444, newStaticFile(protocols)),
// Linux sets psched values to: nsec per usec, psched tick in ns, 1000000,
// high res timer ticks per sec (ClockGetres returns 1ns resolution).
- "psched": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(psched)),
- "ptype": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(ptype)),
- "route": fs.newDentry(root, fs.NextIno(), 0444, &netRouteData{stack: stack}),
- "tcp": fs.newDentry(root, fs.NextIno(), 0444, &netTCPData{kernel: k}),
- "udp": fs.newDentry(root, fs.NextIno(), 0444, &netUDPData{kernel: k}),
- "unix": fs.newDentry(root, fs.NextIno(), 0444, &netUnixData{kernel: k}),
+ "psched": fs.newInode(root, 0444, newStaticFile(psched)),
+ "ptype": fs.newInode(root, 0444, newStaticFile(ptype)),
+ "route": fs.newInode(root, 0444, &netRouteData{stack: stack}),
+ "tcp": fs.newInode(root, 0444, &netTCPData{kernel: k}),
+ "udp": fs.newInode(root, 0444, &netUDPData{kernel: k}),
+ "unix": fs.newInode(root, 0444, &netUnixData{kernel: k}),
}
if stack.SupportsIPv6() {
- contents["if_inet6"] = fs.newDentry(root, fs.NextIno(), 0444, &ifinet6{stack: stack})
- contents["ipv6_route"] = fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(""))
- contents["tcp6"] = fs.newDentry(root, fs.NextIno(), 0444, &netTCP6Data{kernel: k})
- contents["udp6"] = fs.newDentry(root, fs.NextIno(), 0444, newStaticFile(upd6))
+ contents["if_inet6"] = fs.newInode(root, 0444, &ifinet6{stack: stack})
+ contents["ipv6_route"] = fs.newInode(root, 0444, newStaticFile(""))
+ contents["tcp6"] = fs.newInode(root, 0444, &netTCP6Data{kernel: k})
+ contents["udp6"] = fs.newInode(root, 0444, newStaticFile(upd6))
}
}
diff --git a/pkg/sentry/fsimpl/proc/tasks.go b/pkg/sentry/fsimpl/proc/tasks.go
index d8f5dd509..3259c3732 100644
--- a/pkg/sentry/fsimpl/proc/tasks.go
+++ b/pkg/sentry/fsimpl/proc/tasks.go
@@ -38,10 +38,11 @@ const (
// +stateify savable
type tasksInode struct {
implStatFS
- kernfs.AlwaysValid
+ kernfs.InodeAlwaysValid
kernfs.InodeAttrs
kernfs.InodeDirectoryNoNewChildren
kernfs.InodeNotSymlink
+ kernfs.InodeTemporary // This holds no meaning as this inode can't be Looked up and is always valid.
kernfs.OrderedChildren
tasksInodeRefs
@@ -52,8 +53,6 @@ type tasksInode struct {
// '/proc/self' and '/proc/thread-self' have custom directory offsets in
// Linux. So handle them outside of OrderedChildren.
- selfSymlink *kernfs.Dentry
- threadSelfSymlink *kernfs.Dentry
// cgroupControllers is a map of controller name to directory in the
// cgroup hierarchy. These controllers are immutable and will be listed
@@ -63,52 +62,53 @@ type tasksInode struct {
var _ kernfs.Inode = (*tasksInode)(nil)
-func (fs *filesystem) newTasksInode(k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) (*tasksInode, *kernfs.Dentry) {
+func (fs *filesystem) newTasksInode(k *kernel.Kernel, pidns *kernel.PIDNamespace, cgroupControllers map[string]string) *tasksInode {
root := auth.NewRootCredentials(pidns.UserNamespace())
- contents := map[string]*kernfs.Dentry{
- "cpuinfo": fs.newDentry(root, fs.NextIno(), 0444, newStaticFileSetStat(cpuInfoData(k))),
- "filesystems": fs.newDentry(root, fs.NextIno(), 0444, &filesystemsData{}),
- "loadavg": fs.newDentry(root, fs.NextIno(), 0444, &loadavgData{}),
+ contents := map[string]kernfs.Inode{
+ "cpuinfo": fs.newInode(root, 0444, newStaticFileSetStat(cpuInfoData(k))),
+ "filesystems": fs.newInode(root, 0444, &filesystemsData{}),
+ "loadavg": fs.newInode(root, 0444, &loadavgData{}),
"sys": fs.newSysDir(root, k),
- "meminfo": fs.newDentry(root, fs.NextIno(), 0444, &meminfoData{}),
+ "meminfo": fs.newInode(root, 0444, &meminfoData{}),
"mounts": kernfs.NewStaticSymlink(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/mounts"),
"net": kernfs.NewStaticSymlink(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), "self/net"),
- "stat": fs.newDentry(root, fs.NextIno(), 0444, &statData{}),
- "uptime": fs.newDentry(root, fs.NextIno(), 0444, &uptimeData{}),
- "version": fs.newDentry(root, fs.NextIno(), 0444, &versionData{}),
+ "stat": fs.newInode(root, 0444, &statData{}),
+ "uptime": fs.newInode(root, 0444, &uptimeData{}),
+ "version": fs.newInode(root, 0444, &versionData{}),
}
inode := &tasksInode{
pidns: pidns,
fs: fs,
- selfSymlink: fs.newSelfSymlink(root, fs.NextIno(), pidns),
- threadSelfSymlink: fs.newThreadSelfSymlink(root, fs.NextIno(), pidns),
cgroupControllers: cgroupControllers,
}
inode.InodeAttrs.Init(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0555)
inode.EnableLeakCheck()
- dentry := &kernfs.Dentry{}
- dentry.Init(inode)
-
inode.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
- links := inode.OrderedChildren.Populate(dentry, contents)
+ links := inode.OrderedChildren.Populate(contents)
inode.IncLinks(links)
- return inode, dentry
+ return inode
}
-// Lookup implements kernfs.inodeDynamicLookup.Lookup.
-func (i *tasksInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, error) {
- // Try to lookup a corresponding task.
+// Lookup implements kernfs.inodeDirectory.Lookup.
+func (i *tasksInode) Lookup(ctx context.Context, name string) (kernfs.Inode, error) {
+ // Check if a static entry was looked up.
+ if d, err := i.OrderedChildren.Lookup(ctx, name); err == nil {
+ return d, nil
+ }
+
+ // Not a static entry. Try to lookup a corresponding task.
tid, err := strconv.ParseUint(name, 10, 64)
if err != nil {
+ root := auth.NewRootCredentials(i.pidns.UserNamespace())
// If it failed to parse, check if it's one of the special handled files.
switch name {
case selfName:
- return i.selfSymlink, nil
+ return i.newSelfSymlink(root), nil
case threadSelfName:
- return i.threadSelfSymlink, nil
+ return i.newThreadSelfSymlink(root), nil
}
return nil, syserror.ENOENT
}
@@ -118,10 +118,10 @@ func (i *tasksInode) Lookup(ctx context.Context, name string) (*kernfs.Dentry, e
return nil, syserror.ENOENT
}
- return i.fs.newTaskInode(task, i.pidns, true, i.cgroupControllers), nil
+ return i.fs.newTaskInode(task, i.pidns, true, i.cgroupControllers)
}
-// IterDirents implements kernfs.inodeDynamicLookup.IterDirents.
+// IterDirents implements kernfs.inodeDirectory.IterDirents.
func (i *tasksInode) IterDirents(ctx context.Context, cb vfs.IterDirentsCallback, offset, _ int64) (int64, error) {
// fs/proc/internal.h: #define FIRST_PROCESS_ENTRY 256
const FIRST_PROCESS_ENTRY = 256
@@ -229,8 +229,8 @@ func (i *tasksInode) Stat(ctx context.Context, vsfs *vfs.Filesystem, opts vfs.St
}
// DecRef implements kernfs.Inode.DecRef.
-func (i *tasksInode) DecRef(context.Context) {
- i.tasksInodeRefs.DecRef(i.Destroy)
+func (i *tasksInode) DecRef(ctx context.Context) {
+ i.tasksInodeRefs.DecRef(func() { i.Destroy(ctx) })
}
// staticFileSetStat implements a special static file that allows inode
diff --git a/pkg/sentry/fsimpl/proc/tasks_files.go b/pkg/sentry/fsimpl/proc/tasks_files.go
index f268c59b0..07c27cdd9 100644
--- a/pkg/sentry/fsimpl/proc/tasks_files.go
+++ b/pkg/sentry/fsimpl/proc/tasks_files.go
@@ -43,13 +43,10 @@ type selfSymlink struct {
var _ kernfs.Inode = (*selfSymlink)(nil)
-func (fs *filesystem) newSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
- inode := &selfSymlink{pidns: pidns}
- inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
+func (i *tasksInode) newSelfSymlink(creds *auth.Credentials) kernfs.Inode {
+ inode := &selfSymlink{pidns: i.pidns}
+ inode.Init(creds, linux.UNNAMED_MAJOR, i.fs.devMinor, i.fs.NextIno(), linux.ModeSymlink|0777)
+ return inode
}
func (s *selfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
@@ -87,13 +84,10 @@ type threadSelfSymlink struct {
var _ kernfs.Inode = (*threadSelfSymlink)(nil)
-func (fs *filesystem) newThreadSelfSymlink(creds *auth.Credentials, ino uint64, pidns *kernel.PIDNamespace) *kernfs.Dentry {
- inode := &threadSelfSymlink{pidns: pidns}
- inode.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, ino, linux.ModeSymlink|0777)
-
- d := &kernfs.Dentry{}
- d.Init(inode)
- return d
+func (i *tasksInode) newThreadSelfSymlink(creds *auth.Credentials) kernfs.Inode {
+ inode := &threadSelfSymlink{pidns: i.pidns}
+ inode.Init(creds, linux.UNNAMED_MAJOR, i.fs.devMinor, i.fs.NextIno(), linux.ModeSymlink|0777)
+ return inode
}
func (s *threadSelfSymlink) Readlink(ctx context.Context, _ *vfs.Mount) (string, error) {
diff --git a/pkg/sentry/fsimpl/proc/tasks_sys.go b/pkg/sentry/fsimpl/proc/tasks_sys.go
index 3312b0418..95420368d 100644
--- a/pkg/sentry/fsimpl/proc/tasks_sys.go
+++ b/pkg/sentry/fsimpl/proc/tasks_sys.go
@@ -40,93 +40,93 @@ const (
)
// newSysDir returns the dentry corresponding to /proc/sys directory.
-func (fs *filesystem) newSysDir(root *auth.Credentials, k *kernel.Kernel) *kernfs.Dentry {
- return newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
- "kernel": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
- "hostname": fs.newDentry(root, fs.NextIno(), 0444, &hostnameData{}),
- "shmall": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMALL)),
- "shmmax": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMMAX)),
- "shmmni": fs.newDentry(root, fs.NextIno(), 0444, shmData(linux.SHMMNI)),
+func (fs *filesystem) newSysDir(root *auth.Credentials, k *kernel.Kernel) kernfs.Inode {
+ return fs.newStaticDir(root, map[string]kernfs.Inode{
+ "kernel": fs.newStaticDir(root, map[string]kernfs.Inode{
+ "hostname": fs.newInode(root, 0444, &hostnameData{}),
+ "shmall": fs.newInode(root, 0444, shmData(linux.SHMALL)),
+ "shmmax": fs.newInode(root, 0444, shmData(linux.SHMMAX)),
+ "shmmni": fs.newInode(root, 0444, shmData(linux.SHMMNI)),
}),
- "vm": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
- "mmap_min_addr": fs.newDentry(root, fs.NextIno(), 0444, &mmapMinAddrData{k: k}),
- "overcommit_memory": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0\n")),
+ "vm": fs.newStaticDir(root, map[string]kernfs.Inode{
+ "mmap_min_addr": fs.newInode(root, 0444, &mmapMinAddrData{k: k}),
+ "overcommit_memory": fs.newInode(root, 0444, newStaticFile("0\n")),
}),
"net": fs.newSysNetDir(root, k),
})
}
// newSysNetDir returns the dentry corresponding to /proc/sys/net directory.
-func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) *kernfs.Dentry {
- var contents map[string]*kernfs.Dentry
+func (fs *filesystem) newSysNetDir(root *auth.Credentials, k *kernel.Kernel) kernfs.Inode {
+ var contents map[string]kernfs.Inode
// TODO(gvisor.dev/issue/1833): Support for using the network stack in the
// network namespace of the calling process.
if stack := k.RootNetworkNamespace().Stack(); stack != nil {
- contents = map[string]*kernfs.Dentry{
- "ipv4": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
- "tcp_recovery": fs.newDentry(root, fs.NextIno(), 0644, &tcpRecoveryData{stack: stack}),
- "tcp_rmem": fs.newDentry(root, fs.NextIno(), 0644, &tcpMemData{stack: stack, dir: tcpRMem}),
- "tcp_sack": fs.newDentry(root, fs.NextIno(), 0644, &tcpSackData{stack: stack}),
- "tcp_wmem": fs.newDentry(root, fs.NextIno(), 0644, &tcpMemData{stack: stack, dir: tcpWMem}),
- "ip_forward": fs.newDentry(root, fs.NextIno(), 0444, &ipForwarding{stack: stack}),
+ contents = map[string]kernfs.Inode{
+ "ipv4": fs.newStaticDir(root, map[string]kernfs.Inode{
+ "tcp_recovery": fs.newInode(root, 0644, &tcpRecoveryData{stack: stack}),
+ "tcp_rmem": fs.newInode(root, 0644, &tcpMemData{stack: stack, dir: tcpRMem}),
+ "tcp_sack": fs.newInode(root, 0644, &tcpSackData{stack: stack}),
+ "tcp_wmem": fs.newInode(root, 0644, &tcpMemData{stack: stack, dir: tcpWMem}),
+ "ip_forward": fs.newInode(root, 0444, &ipForwarding{stack: stack}),
// The following files are simple stubs until they are implemented in
// netstack, most of these files are configuration related. We use the
// value closest to the actual netstack behavior or any empty file, all
// of these files will have mode 0444 (read-only for all users).
- "ip_local_port_range": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("16000 65535")),
- "ip_local_reserved_ports": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")),
- "ipfrag_time": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("30")),
- "ip_nonlocal_bind": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "ip_no_pmtu_disc": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
+ "ip_local_port_range": fs.newInode(root, 0444, newStaticFile("16000 65535")),
+ "ip_local_reserved_ports": fs.newInode(root, 0444, newStaticFile("")),
+ "ipfrag_time": fs.newInode(root, 0444, newStaticFile("30")),
+ "ip_nonlocal_bind": fs.newInode(root, 0444, newStaticFile("0")),
+ "ip_no_pmtu_disc": fs.newInode(root, 0444, newStaticFile("1")),
// tcp_allowed_congestion_control tell the user what they are able to
// do as an unprivledged process so we leave it empty.
- "tcp_allowed_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")),
- "tcp_available_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("reno")),
- "tcp_congestion_control": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("reno")),
+ "tcp_allowed_congestion_control": fs.newInode(root, 0444, newStaticFile("")),
+ "tcp_available_congestion_control": fs.newInode(root, 0444, newStaticFile("reno")),
+ "tcp_congestion_control": fs.newInode(root, 0444, newStaticFile("reno")),
// Many of the following stub files are features netstack doesn't
// support. The unsupported features return "0" to indicate they are
// disabled.
- "tcp_base_mss": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1280")),
- "tcp_dsack": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_early_retrans": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_fack": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_fastopen": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_fastopen_key": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("")),
- "tcp_invalid_ratelimit": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_intvl": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_probes": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_keepalive_time": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("7200")),
- "tcp_mtu_probing": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_no_metrics_save": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
- "tcp_probe_interval": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_probe_threshold": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "tcp_retries1": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("3")),
- "tcp_retries2": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("15")),
- "tcp_rfc1337": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
- "tcp_slow_start_after_idle": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
- "tcp_synack_retries": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("5")),
- "tcp_syn_retries": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("3")),
- "tcp_timestamps": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("1")),
+ "tcp_base_mss": fs.newInode(root, 0444, newStaticFile("1280")),
+ "tcp_dsack": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_early_retrans": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_fack": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_fastopen": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_fastopen_key": fs.newInode(root, 0444, newStaticFile("")),
+ "tcp_invalid_ratelimit": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_keepalive_intvl": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_keepalive_probes": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_keepalive_time": fs.newInode(root, 0444, newStaticFile("7200")),
+ "tcp_mtu_probing": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_no_metrics_save": fs.newInode(root, 0444, newStaticFile("1")),
+ "tcp_probe_interval": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_probe_threshold": fs.newInode(root, 0444, newStaticFile("0")),
+ "tcp_retries1": fs.newInode(root, 0444, newStaticFile("3")),
+ "tcp_retries2": fs.newInode(root, 0444, newStaticFile("15")),
+ "tcp_rfc1337": fs.newInode(root, 0444, newStaticFile("1")),
+ "tcp_slow_start_after_idle": fs.newInode(root, 0444, newStaticFile("1")),
+ "tcp_synack_retries": fs.newInode(root, 0444, newStaticFile("5")),
+ "tcp_syn_retries": fs.newInode(root, 0444, newStaticFile("3")),
+ "tcp_timestamps": fs.newInode(root, 0444, newStaticFile("1")),
}),
- "core": newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, map[string]*kernfs.Dentry{
- "default_qdisc": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("pfifo_fast")),
- "message_burst": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("10")),
- "message_cost": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("5")),
- "optmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("0")),
- "rmem_default": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")),
- "rmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")),
- "somaxconn": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("128")),
- "wmem_default": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")),
- "wmem_max": fs.newDentry(root, fs.NextIno(), 0444, newStaticFile("212992")),
+ "core": fs.newStaticDir(root, map[string]kernfs.Inode{
+ "default_qdisc": fs.newInode(root, 0444, newStaticFile("pfifo_fast")),
+ "message_burst": fs.newInode(root, 0444, newStaticFile("10")),
+ "message_cost": fs.newInode(root, 0444, newStaticFile("5")),
+ "optmem_max": fs.newInode(root, 0444, newStaticFile("0")),
+ "rmem_default": fs.newInode(root, 0444, newStaticFile("212992")),
+ "rmem_max": fs.newInode(root, 0444, newStaticFile("212992")),
+ "somaxconn": fs.newInode(root, 0444, newStaticFile("128")),
+ "wmem_default": fs.newInode(root, 0444, newStaticFile("212992")),
+ "wmem_max": fs.newInode(root, 0444, newStaticFile("212992")),
}),
}
}
- return newStaticDir(root, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), 0555, contents)
+ return fs.newStaticDir(root, contents)
}
// mmapMinAddrData implements vfs.DynamicBytesSource for
diff --git a/pkg/sentry/fsimpl/sockfs/sockfs.go b/pkg/sentry/fsimpl/sockfs/sockfs.go
index 29e5371d6..9eef16cc6 100644
--- a/pkg/sentry/fsimpl/sockfs/sockfs.go
+++ b/pkg/sentry/fsimpl/sockfs/sockfs.go
@@ -114,6 +114,6 @@ func NewDentry(creds *auth.Credentials, mnt *vfs.Mount) *vfs.Dentry {
i.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.Filesystem.NextIno(), filemode)
d := &kernfs.Dentry{}
- d.Init(i)
+ d.Init(&fs.Filesystem, i)
return d.VFSDentry()
}
diff --git a/pkg/sentry/fsimpl/sys/kcov.go b/pkg/sentry/fsimpl/sys/kcov.go
index 1a6749e53..94366d429 100644
--- a/pkg/sentry/fsimpl/sys/kcov.go
+++ b/pkg/sentry/fsimpl/sys/kcov.go
@@ -27,12 +27,10 @@ import (
"gvisor.dev/gvisor/pkg/usermem"
)
-func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials) *kernfs.Dentry {
+func (fs *filesystem) newKcovFile(ctx context.Context, creds *auth.Credentials) kernfs.Inode {
k := &kcovInode{}
k.InodeAttrs.Init(creds, 0, 0, fs.NextIno(), linux.S_IFREG|0600)
- d := &kernfs.Dentry{}
- d.Init(k)
- return d
+ return k
}
// kcovInode implements kernfs.Inode.
diff --git a/pkg/sentry/fsimpl/sys/sys.go b/pkg/sentry/fsimpl/sys/sys.go
index 1568c581f..5a06f4e1c 100644
--- a/pkg/sentry/fsimpl/sys/sys.go
+++ b/pkg/sentry/fsimpl/sys/sys.go
@@ -64,15 +64,15 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
}
fs.VFSFilesystem().Init(vfsObj, &fsType, fs)
- root := fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
+ root := fs.newDir(creds, defaultSysDirMode, map[string]kernfs.Inode{
"block": fs.newDir(creds, defaultSysDirMode, nil),
"bus": fs.newDir(creds, defaultSysDirMode, nil),
- "class": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
+ "class": fs.newDir(creds, defaultSysDirMode, map[string]kernfs.Inode{
"power_supply": fs.newDir(creds, defaultSysDirMode, nil),
}),
"dev": fs.newDir(creds, defaultSysDirMode, nil),
- "devices": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
- "system": fs.newDir(creds, defaultSysDirMode, map[string]*kernfs.Dentry{
+ "devices": fs.newDir(creds, defaultSysDirMode, map[string]kernfs.Inode{
+ "system": fs.newDir(creds, defaultSysDirMode, map[string]kernfs.Inode{
"cpu": cpuDir(ctx, fs, creds),
}),
}),
@@ -82,13 +82,15 @@ func (fsType FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
"module": fs.newDir(creds, defaultSysDirMode, nil),
"power": fs.newDir(creds, defaultSysDirMode, nil),
})
- return fs.VFSFilesystem(), root.VFSDentry(), nil
+ var rootD kernfs.Dentry
+ rootD.Init(&fs.Filesystem, root)
+ return fs.VFSFilesystem(), rootD.VFSDentry(), nil
}
-func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernfs.Dentry {
+func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) kernfs.Inode {
k := kernel.KernelFromContext(ctx)
maxCPUCores := k.ApplicationCores()
- children := map[string]*kernfs.Dentry{
+ children := map[string]kernfs.Inode{
"online": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)),
"possible": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)),
"present": fs.newCPUFile(creds, maxCPUCores, linux.FileMode(0444)),
@@ -99,14 +101,14 @@ func cpuDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernf
return fs.newDir(creds, defaultSysDirMode, children)
}
-func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) *kernfs.Dentry {
+func kernelDir(ctx context.Context, fs *filesystem, creds *auth.Credentials) kernfs.Inode {
// If kcov is available, set up /sys/kernel/debug/kcov. Technically, debugfs
// should be mounted at debug/, but for our purposes, it is sufficient to
// keep it in sys.
- var children map[string]*kernfs.Dentry
+ var children map[string]kernfs.Inode
if coverage.KcovAvailable() {
- children = map[string]*kernfs.Dentry{
- "debug": fs.newDir(creds, linux.FileMode(0700), map[string]*kernfs.Dentry{
+ children = map[string]kernfs.Inode{
+ "debug": fs.newDir(creds, linux.FileMode(0700), map[string]kernfs.Inode{
"kcov": fs.newKcovFile(ctx, creds),
}),
}
@@ -125,27 +127,23 @@ func (fs *filesystem) Release(ctx context.Context) {
// +stateify savable
type dir struct {
dirRefs
+ kernfs.InodeAlwaysValid
kernfs.InodeAttrs
- kernfs.InodeNoDynamicLookup
kernfs.InodeNotSymlink
kernfs.InodeDirectoryNoNewChildren
+ kernfs.InodeTemporary
kernfs.OrderedChildren
locks vfs.FileLocks
-
- dentry kernfs.Dentry
}
-func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]*kernfs.Dentry) *kernfs.Dentry {
+func (fs *filesystem) newDir(creds *auth.Credentials, mode linux.FileMode, contents map[string]kernfs.Inode) kernfs.Inode {
d := &dir{}
d.InodeAttrs.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linux.ModeDirectory|0755)
d.OrderedChildren.Init(kernfs.OrderedChildrenOptions{})
d.EnableLeakCheck()
- d.dentry.Init(d)
-
- d.IncLinks(d.OrderedChildren.Populate(&d.dentry, contents))
-
- return &d.dentry
+ d.IncLinks(d.OrderedChildren.Populate(contents))
+ return d
}
// SetStat implements kernfs.Inode.SetStat not allowing inode attributes to be changed.
@@ -165,8 +163,8 @@ func (d *dir) Open(ctx context.Context, rp *vfs.ResolvingPath, kd *kernfs.Dentry
}
// DecRef implements kernfs.Inode.DecRef.
-func (d *dir) DecRef(context.Context) {
- d.dirRefs.DecRef(d.Destroy)
+func (d *dir) DecRef(ctx context.Context) {
+ d.dirRefs.DecRef(func() { d.Destroy(ctx) })
}
// StatFS implements kernfs.Inode.StatFS.
@@ -190,12 +188,10 @@ func (c *cpuFile) Generate(ctx context.Context, buf *bytes.Buffer) error {
return nil
}
-func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode linux.FileMode) *kernfs.Dentry {
+func (fs *filesystem) newCPUFile(creds *auth.Credentials, maxCores uint, mode linux.FileMode) kernfs.Inode {
c := &cpuFile{maxCores: maxCores}
c.DynamicBytesFile.Init(creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), c, mode)
- d := &kernfs.Dentry{}
- d.Init(c)
- return d
+ return c
}
// +stateify savable
diff --git a/pkg/sentry/fsimpl/sys/sys_state_autogen.go b/pkg/sentry/fsimpl/sys/sys_state_autogen.go
index 410e5a7d0..64c9c9d1f 100644
--- a/pkg/sentry/fsimpl/sys/sys_state_autogen.go
+++ b/pkg/sentry/fsimpl/sys/sys_state_autogen.go
@@ -151,13 +151,13 @@ func (d *dir) StateTypeName() string {
func (d *dir) StateFields() []string {
return []string{
"dirRefs",
+ "InodeAlwaysValid",
"InodeAttrs",
- "InodeNoDynamicLookup",
"InodeNotSymlink",
"InodeDirectoryNoNewChildren",
+ "InodeTemporary",
"OrderedChildren",
"locks",
- "dentry",
}
}
@@ -166,26 +166,26 @@ func (d *dir) beforeSave() {}
func (d *dir) StateSave(stateSinkObject state.Sink) {
d.beforeSave()
stateSinkObject.Save(0, &d.dirRefs)
- stateSinkObject.Save(1, &d.InodeAttrs)
- stateSinkObject.Save(2, &d.InodeNoDynamicLookup)
+ stateSinkObject.Save(1, &d.InodeAlwaysValid)
+ stateSinkObject.Save(2, &d.InodeAttrs)
stateSinkObject.Save(3, &d.InodeNotSymlink)
stateSinkObject.Save(4, &d.InodeDirectoryNoNewChildren)
- stateSinkObject.Save(5, &d.OrderedChildren)
- stateSinkObject.Save(6, &d.locks)
- stateSinkObject.Save(7, &d.dentry)
+ stateSinkObject.Save(5, &d.InodeTemporary)
+ stateSinkObject.Save(6, &d.OrderedChildren)
+ stateSinkObject.Save(7, &d.locks)
}
func (d *dir) afterLoad() {}
func (d *dir) StateLoad(stateSourceObject state.Source) {
stateSourceObject.Load(0, &d.dirRefs)
- stateSourceObject.Load(1, &d.InodeAttrs)
- stateSourceObject.Load(2, &d.InodeNoDynamicLookup)
+ stateSourceObject.Load(1, &d.InodeAlwaysValid)
+ stateSourceObject.Load(2, &d.InodeAttrs)
stateSourceObject.Load(3, &d.InodeNotSymlink)
stateSourceObject.Load(4, &d.InodeDirectoryNoNewChildren)
- stateSourceObject.Load(5, &d.OrderedChildren)
- stateSourceObject.Load(6, &d.locks)
- stateSourceObject.Load(7, &d.dentry)
+ stateSourceObject.Load(5, &d.InodeTemporary)
+ stateSourceObject.Load(6, &d.OrderedChildren)
+ stateSourceObject.Load(7, &d.locks)
}
func (c *cpuFile) StateTypeName() string {